mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
Add cltz_complement idiom recognition
This recognises patterns of the form:
while (n) { n >>= 1 }
This patch results in improved (but still suboptimal) codegen:
foo (unsigned int b) {
int c = 0;
while (b) {
b >>= 1;
c++;
}
return c;
}
foo:
.LFB11:
.cfi_startproc
cbz w0, .L3
clz w1, w0
tst x0, 1
mov w0, 32
sub w0, w0, w1
csel w0, w0, wzr, ne
ret
The conditional is unnecessary. phiopt could recognise a redundant csel
(using cond_removal_in_builtin_zero_pattern) when one of the inputs is a
clz call, but it cannot recognise the redunancy when the input is (e.g.)
(32 - clz).
I could perhaps extend this function to recognise this pattern in a later
patch, if this is a good place to recognise more patterns.
gcc/ChangeLog:
PR tree-optimization/94793
* tree-scalar-evolution.cc (expression_expensive_p): Add checks
for c[lt]z optabs.
* tree-ssa-loop-niter.cc (build_cltz_expr): New.
(number_of_iterations_cltz_complement): New.
(number_of_iterations_bitcount): Add call to the above.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp (check_effective_target_clz)
(check_effective_target_clzl, check_effective_target_clzll)
(check_effective_target_ctz, check_effective_target_clzl)
(check_effective_target_ctzll): New.
* gcc.dg/tree-ssa/cltz-complement-max.c: New test.
* gcc.dg/tree-ssa/clz-complement-char.c: New test.
* gcc.dg/tree-ssa/clz-complement-int.c: New test.
* gcc.dg/tree-ssa/clz-complement-long-long.c: New test.
* gcc.dg/tree-ssa/clz-complement-long.c: New test.
* gcc.dg/tree-ssa/ctz-complement-char.c: New test.
* gcc.dg/tree-ssa/ctz-complement-int.c: New test.
* gcc.dg/tree-ssa/ctz-complement-long-long.c: New test.
* gcc.dg/tree-ssa/ctz-complement-long.c: New test.
This commit is contained in:
60
gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
Normal file
60
gcc/testsuite/gcc.dg/tree-ssa/cltz-complement-max.c
Normal file
@@ -0,0 +1,60 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__)
|
||||
|
||||
int clz_complement_count1 (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
if (c <= PREC)
|
||||
return 0;
|
||||
else
|
||||
return 34567;
|
||||
}
|
||||
|
||||
int clz_complement_count2 (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
if (c <= PREC - 1)
|
||||
return 0;
|
||||
else
|
||||
return 76543;
|
||||
}
|
||||
|
||||
int ctz_complement_count1 (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
if (c <= PREC)
|
||||
return 0;
|
||||
else
|
||||
return 23456;
|
||||
}
|
||||
|
||||
int ctz_complement_count2 (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
if (c <= PREC - 1)
|
||||
return 0;
|
||||
else
|
||||
return 65432;
|
||||
}
|
||||
/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "23456" 0 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times "65432" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-char.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target clz } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(5) != 3)
|
||||
__builtin_abort ();
|
||||
if (foo(255) != 8)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-int.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-int.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target clz } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned int b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(5) != 3)
|
||||
__builtin_abort ();
|
||||
if (foo(1 << (PREC - 1)) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-long-long.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-long-long.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target clzll } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned long long b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(5) != 3)
|
||||
__builtin_abort ();
|
||||
if (foo(1LL << (PREC - 1)) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-long.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/clz-complement-long.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target clzl } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned long b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b >>= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(5) != 3)
|
||||
__builtin_abort ();
|
||||
if (foo(1L << (PREC - 1)) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-char.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target ctz } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned char b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(96) != PREC - 5)
|
||||
__builtin_abort ();
|
||||
if (foo(35) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-int.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target ctz } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned int b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(96) != PREC - 5)
|
||||
__builtin_abort ();
|
||||
if (foo(35) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long-long.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target ctzll } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_LONG_LONG__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned long long b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(96) != PREC - 5)
|
||||
__builtin_abort ();
|
||||
if (foo(35) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
|
||||
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
Normal file
31
gcc/testsuite/gcc.dg/tree-ssa/ctz-complement-long.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target ctzl } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
#define PREC (__CHAR_BIT__ * __SIZEOF_LONG__)
|
||||
|
||||
int
|
||||
__attribute__ ((noinline, noclone))
|
||||
foo (unsigned long b) {
|
||||
int c = 0;
|
||||
|
||||
while (b) {
|
||||
b <<= 1;
|
||||
c++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
if (foo(0) != 0)
|
||||
__builtin_abort ();
|
||||
if (foo(96) != PREC - 5)
|
||||
__builtin_abort ();
|
||||
if (foo(35) != PREC)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 1 "optimized" } } */
|
||||
@@ -8708,6 +8708,72 @@ proc check_effective_target_popcount { } {
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports clz on int.
|
||||
|
||||
proc check_effective_target_clz { } {
|
||||
return [check_no_messages_and_pattern clz "!\\(call" rtl-expand {
|
||||
int foo (int b)
|
||||
{
|
||||
return __builtin_clz (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports clz on long.
|
||||
|
||||
proc check_effective_target_clzl { } {
|
||||
return [check_no_messages_and_pattern clzl "!\\(call" rtl-expand {
|
||||
int foo (long b)
|
||||
{
|
||||
return __builtin_clzl (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports clz on long long.
|
||||
|
||||
proc check_effective_target_clzll { } {
|
||||
return [check_no_messages_and_pattern clzll "!\\(call" rtl-expand {
|
||||
int foo (long long b)
|
||||
{
|
||||
return __builtin_clzll (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports ctz on int.
|
||||
|
||||
proc check_effective_target_ctz { } {
|
||||
return [check_no_messages_and_pattern ctz "!\\(call" rtl-expand {
|
||||
int foo (int b)
|
||||
{
|
||||
return __builtin_ctz (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports ctz on long.
|
||||
|
||||
proc check_effective_target_ctzl { } {
|
||||
return [check_no_messages_and_pattern ctzl "!\\(call" rtl-expand {
|
||||
int foo (long b)
|
||||
{
|
||||
return __builtin_ctzl (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports ctz on long long.
|
||||
|
||||
proc check_effective_target_ctzll { } {
|
||||
return [check_no_messages_and_pattern ctzll "!\\(call" rtl-expand {
|
||||
int foo (long long b)
|
||||
{
|
||||
return __builtin_ctzll (b);
|
||||
}
|
||||
} "" ]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports atomic operations on "long long"
|
||||
# and can execute them.
|
||||
#
|
||||
|
||||
@@ -3397,12 +3397,21 @@ expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache,
|
||||
library call for popcount when backend does not have an instruction
|
||||
to do so. We consider this to be expensive and generate
|
||||
__builtin_popcount only when backend defines it. */
|
||||
optab optab;
|
||||
combined_fn cfn = get_call_combined_fn (expr);
|
||||
switch (cfn)
|
||||
{
|
||||
CASE_CFN_POPCOUNT:
|
||||
optab = popcount_optab;
|
||||
goto bitcount_call;
|
||||
CASE_CFN_CLZ:
|
||||
optab = clz_optab;
|
||||
goto bitcount_call;
|
||||
CASE_CFN_CTZ:
|
||||
optab = ctz_optab;
|
||||
bitcount_call:
|
||||
/* Check if opcode for popcount is available in the mode required. */
|
||||
if (optab_handler (popcount_optab,
|
||||
if (optab_handler (optab,
|
||||
TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
|
||||
== CODE_FOR_nothing)
|
||||
{
|
||||
@@ -3415,7 +3424,7 @@ expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache,
|
||||
instructions. */
|
||||
if (is_a <scalar_int_mode> (mode, &int_mode)
|
||||
&& GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
|
||||
&& (optab_handler (popcount_optab, word_mode)
|
||||
&& (optab_handler (optab, word_mode)
|
||||
!= CODE_FOR_nothing))
|
||||
break;
|
||||
return true;
|
||||
|
||||
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-dfa.h"
|
||||
#include "internal-fn.h"
|
||||
#include "gimple-range.h"
|
||||
|
||||
|
||||
@@ -2198,6 +2199,224 @@ number_of_iterations_popcount (loop_p loop, edge exit,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return an expression that counts the leading/trailing zeroes of src.
|
||||
|
||||
If define_at_zero is true, then the built expression will be defined to
|
||||
return the precision of src when src == 0 (using either a conditional
|
||||
expression or a suitable internal function).
|
||||
Otherwise, we can elide the conditional expression and let src = 0 invoke
|
||||
undefined behaviour. */
|
||||
|
||||
static tree
|
||||
build_cltz_expr (tree src, bool leading, bool define_at_zero)
|
||||
{
|
||||
tree fn;
|
||||
internal_fn ifn = leading ? IFN_CLZ : IFN_CTZ;
|
||||
bool use_ifn = false;
|
||||
int prec = TYPE_PRECISION (TREE_TYPE (src));
|
||||
int i_prec = TYPE_PRECISION (integer_type_node);
|
||||
int li_prec = TYPE_PRECISION (long_integer_type_node);
|
||||
int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
|
||||
|
||||
tree utype = unsigned_type_for (TREE_TYPE (src));
|
||||
src = fold_convert (utype, src);
|
||||
|
||||
if (direct_internal_fn_supported_p (ifn, utype, OPTIMIZE_FOR_BOTH))
|
||||
use_ifn = true;
|
||||
else if (prec <= i_prec)
|
||||
fn = leading ? builtin_decl_implicit (BUILT_IN_CLZ)
|
||||
: builtin_decl_implicit (BUILT_IN_CTZ);
|
||||
else if (prec == li_prec)
|
||||
fn = leading ? builtin_decl_implicit (BUILT_IN_CLZL)
|
||||
: builtin_decl_implicit (BUILT_IN_CTZL);
|
||||
else if (prec == lli_prec || prec == 2 * lli_prec)
|
||||
fn = leading ? builtin_decl_implicit (BUILT_IN_CLZLL)
|
||||
: builtin_decl_implicit (BUILT_IN_CTZLL);
|
||||
else
|
||||
return NULL_TREE;
|
||||
|
||||
tree call;
|
||||
if (use_ifn)
|
||||
{
|
||||
call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
|
||||
integer_type_node, 1, src);
|
||||
int val;
|
||||
scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
|
||||
int optab_defined_at_zero
|
||||
= leading ? CLZ_DEFINED_VALUE_AT_ZERO (mode, val)
|
||||
: CTZ_DEFINED_VALUE_AT_ZERO (mode, val);
|
||||
if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec))
|
||||
{
|
||||
tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
|
||||
build_zero_cst (TREE_TYPE (src)));
|
||||
call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
|
||||
build_int_cst (integer_type_node, prec));
|
||||
}
|
||||
}
|
||||
else if (prec == 2 * lli_prec)
|
||||
{
|
||||
tree src1 = fold_convert (long_long_unsigned_type_node,
|
||||
fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
|
||||
unshare_expr (src),
|
||||
build_int_cst (integer_type_node,
|
||||
lli_prec)));
|
||||
tree src2 = fold_convert (long_long_unsigned_type_node, src);
|
||||
/* We count the zeroes in src1, and add the number in src2 when src1
|
||||
is 0. */
|
||||
if (!leading)
|
||||
std::swap(src1, src2);
|
||||
tree call1 = build_call_expr (fn, 1, src1);
|
||||
tree call2 = build_call_expr (fn, 1, src2);
|
||||
if (define_at_zero)
|
||||
{
|
||||
tree is_zero2 = fold_build2 (NE_EXPR, boolean_type_node, src2,
|
||||
build_zero_cst (TREE_TYPE (src2)));
|
||||
call2 = fold_build3(COND_EXPR, integer_type_node, is_zero2, call2,
|
||||
build_int_cst (integer_type_node, lli_prec));
|
||||
}
|
||||
tree is_zero1 = fold_build2 (NE_EXPR, boolean_type_node, src1,
|
||||
build_zero_cst (TREE_TYPE (src1)));
|
||||
call = fold_build3(COND_EXPR, integer_type_node, is_zero1, call1,
|
||||
fold_build2 (PLUS_EXPR, integer_type_node, call2,
|
||||
build_int_cst (integer_type_node,
|
||||
lli_prec)));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (prec < i_prec)
|
||||
src = fold_convert (unsigned_type_node, src);
|
||||
|
||||
call = build_call_expr (fn, 1, src);
|
||||
if (define_at_zero)
|
||||
{
|
||||
tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
|
||||
build_zero_cst (TREE_TYPE (src)));
|
||||
call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
|
||||
build_int_cst (integer_type_node, prec));
|
||||
}
|
||||
|
||||
if (leading && prec < i_prec)
|
||||
call = fold_build2(MINUS_EXPR, integer_type_node, call,
|
||||
build_int_cst (integer_type_node,
|
||||
i_prec - prec));
|
||||
}
|
||||
|
||||
return call;
|
||||
}
|
||||
|
||||
/* See comment below for number_of_iterations_bitcount.
|
||||
For c[lt]z complement, we have:
|
||||
|
||||
modify:
|
||||
iv_2 = iv_1 >> 1 OR iv_1 << 1
|
||||
|
||||
test:
|
||||
if (iv != 0)
|
||||
|
||||
modification count:
|
||||
src precision - c[lt]z (src)
|
||||
|
||||
*/
|
||||
|
||||
static bool
|
||||
number_of_iterations_cltz_complement (loop_p loop, edge exit,
|
||||
enum tree_code code,
|
||||
class tree_niter_desc *niter)
|
||||
{
|
||||
bool modify_before_test = true;
|
||||
HOST_WIDE_INT max;
|
||||
|
||||
/* Check that condition for staying inside the loop is like
|
||||
if (iv != 0). */
|
||||
gimple *cond_stmt = last_stmt (exit->src);
|
||||
if (!cond_stmt
|
||||
|| gimple_code (cond_stmt) != GIMPLE_COND
|
||||
|| code != NE_EXPR
|
||||
|| !integer_zerop (gimple_cond_rhs (cond_stmt))
|
||||
|| TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
|
||||
return false;
|
||||
|
||||
tree iv_2 = gimple_cond_lhs (cond_stmt);
|
||||
gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
|
||||
|
||||
/* If the test comes before the iv modification, then these will actually be
|
||||
iv_1 and a phi node. */
|
||||
if (gimple_code (iv_2_stmt) == GIMPLE_PHI
|
||||
&& gimple_bb (iv_2_stmt) == loop->header
|
||||
&& gimple_phi_num_args (iv_2_stmt) == 2
|
||||
&& (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
|
||||
loop_latch_edge (loop)->dest_idx))
|
||||
== SSA_NAME))
|
||||
{
|
||||
/* iv_2 is actually one of the inputs to the phi. */
|
||||
iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
|
||||
iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
|
||||
modify_before_test = false;
|
||||
}
|
||||
|
||||
/* Make sure iv_2_stmt is a logical shift by one stmt:
|
||||
iv_2 = iv_1 {>>|<<} 1 */
|
||||
if (!is_gimple_assign (iv_2_stmt)
|
||||
|| (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
|
||||
&& (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
|
||||
|| !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
|
||||
|| !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
|
||||
return false;
|
||||
|
||||
bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
|
||||
|
||||
tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
|
||||
|
||||
/* Check the recurrence. */
|
||||
gimple *phi = SSA_NAME_DEF_STMT (iv_1);
|
||||
if (gimple_code (phi) != GIMPLE_PHI
|
||||
|| (gimple_bb (phi) != loop_latch_edge (loop)->dest)
|
||||
|| (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
|
||||
return false;
|
||||
|
||||
/* We found a match. */
|
||||
tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
|
||||
int src_precision = TYPE_PRECISION (TREE_TYPE (src));
|
||||
|
||||
/* Get the corresponding c[lt]z builtin. */
|
||||
tree expr = build_cltz_expr (src, !left_shift, true);
|
||||
|
||||
if (!expr)
|
||||
return false;
|
||||
|
||||
expr = fold_build2 (MINUS_EXPR, integer_type_node,
|
||||
build_int_cst (integer_type_node, src_precision),
|
||||
expr);
|
||||
|
||||
max = src_precision;
|
||||
|
||||
tree may_be_zero = boolean_false_node;
|
||||
|
||||
if (modify_before_test)
|
||||
{
|
||||
expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
|
||||
integer_one_node);
|
||||
max = max - 1;
|
||||
may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
|
||||
build_zero_cst (TREE_TYPE (src)));
|
||||
}
|
||||
|
||||
expr = fold_convert (unsigned_type_node, expr);
|
||||
|
||||
niter->assumptions = boolean_true_node;
|
||||
niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
|
||||
niter->niter = simplify_using_initial_conditions (loop, expr);
|
||||
|
||||
if (TREE_CODE (niter->niter) == INTEGER_CST)
|
||||
niter->max = tree_to_uhwi (niter->niter);
|
||||
else
|
||||
niter->max = max;
|
||||
|
||||
niter->bound = NULL_TREE;
|
||||
niter->cmp = ERROR_MARK;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
|
||||
1. A modification to the induction variabler;.
|
||||
2. A test to determine whether or not to exit the loop.
|
||||
@@ -2244,7 +2463,8 @@ number_of_iterations_bitcount (loop_p loop, edge exit,
|
||||
enum tree_code code,
|
||||
class tree_niter_desc *niter)
|
||||
{
|
||||
return number_of_iterations_popcount (loop, exit, code, niter);
|
||||
return (number_of_iterations_popcount (loop, exit, code, niter)
|
||||
|| number_of_iterations_cltz_complement (loop, exit, code, niter));
|
||||
}
|
||||
|
||||
/* Substitute NEW_TREE for OLD in EXPR and fold the result.
|
||||
|
||||
Reference in New Issue
Block a user