public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction.
@ 2022-05-09  5:19 liuhongt
  2022-05-11  8:45 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2022-05-09  5:19 UTC (permalink / raw)
  To: gcc-patches

This patch will enable below optimization:

 {
-  int bit;
-  long long unsigned int _1;
-  long long unsigned int _2;
-
   <bb 2> [local count: 46707768]:
-
-  <bb 3> [local count: 1027034057]:
-  # tmp_11 = PHI <tmp_8(3), tmp_6(D)(2)>
-  # bit_13 = PHI <bit_9(3), 63(2)>
-  _1 = 1 << bit_13;
-  _2 = ~_1;
-  tmp_8 = _2 & tmp_11;
-  bit_9 = bit_13 + -3;
-  if (bit_9 != -3(OVF))
-    goto <bb 3>; [95.65%]
-  else
-    goto <bb 4>; [4.35%]
-
-  <bb 4> [local count: 46707768]:
-  return tmp_8;
+  tmp_12 = tmp_6(D) & 7905747460161236406;
+  return tmp_12;

 }


Boostrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for trunk?

gcc/ChangeLog:

	PR middle-end/103462
	* match.pd (bitwise_induction_p): New match.
	* tree-scalar-evolution.c (gimple_bitwise_induction_p):
	Declare.
	(analyze_and_compute_bitwise_induction_effect): New function.
	(enum bit_op_kind): New enum.
	(final_value_replacement_loop): Enhanced to handle bitwise
	induction.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr103462-1.c: New test.
	* gcc.target/i386/pr103462-2.c: New test.
	* gcc.target/i386/pr103462-3.c: New test.
	* gcc.target/i386/pr103462-4.c: New test.
	* gcc.target/i386/pr103462-5.c: New test.
	* gcc.target/i386/pr103462-6.c: New test.
---
 gcc/match.pd                               |   7 +
 gcc/testsuite/gcc.target/i386/pr103462-1.c | 111 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr103462-2.c |  45 ++++++
 gcc/testsuite/gcc.target/i386/pr103462-3.c | 111 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr103462-4.c |  46 ++++++
 gcc/testsuite/gcc.target/i386/pr103462-5.c | 111 +++++++++++++
 gcc/testsuite/gcc.target/i386/pr103462-6.c |  46 ++++++
 gcc/tree-scalar-evolution.cc               | 178 ++++++++++++++++++++-
 8 files changed, 654 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103462-6.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 6d691d302b3..24ff5f9e6a8 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7746,3 +7746,10 @@ and,
 	       == TYPE_UNSIGNED (TREE_TYPE (@3))))
        && single_use (@4)
        && single_use (@5))))
+
+(for bit_op (bit_and bit_ior bit_xor)
+ (match (bitwise_induction_p @0 @2 @3)
+   (bit_op:c (nop_convert1? (bit_not2?@0 (convert3? (lshift integer_onep@1 @2)))) @3)))
+
+(match (bitwise_induction_p @0 @2 @3)
+  (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3))))
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-1.c b/gcc/testsuite/gcc.target/i386/pr103462-1.c
new file mode 100644
index 00000000000..1dc4c2acad6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-1.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-sccp-details" } */
+/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } */
+
+unsigned long long
+__attribute__((noipa))
+foo (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp &= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo1 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp &= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo2 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp &= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo3 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp &= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo4 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp |= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo5 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp |= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo6 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp |= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo7 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp |= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo8 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp ^= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo9 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp ^= ~(1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo10 (unsigned long long tmp)
+{
+  for (int bit = 0; bit < 64; bit += 3)
+    tmp ^= (1ULL << bit);
+  return tmp;
+}
+
+unsigned long long
+__attribute__((noipa))
+foo11 (unsigned long long tmp)
+{
+  for (int bit = 63; bit >= 0; bit -= 3)
+    tmp ^= (1ULL << bit);
+  return tmp;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-2.c b/gcc/testsuite/gcc.target/i386/pr103462-2.c
new file mode 100644
index 00000000000..bc375cb78d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include "pr103462-1.c"
+
+int main()
+{
+  unsigned long long tmp = 0x1111111111111111ULL;
+  if (foo (tmp) != 0x110110110110110ULL)
+    __builtin_abort ();
+
+  if (foo1 (tmp) != 0x110110110110110ULL)
+    __builtin_abort ();
+
+  if (foo2 (tmp) != 0x0ULL)
+    __builtin_abort ();
+
+  if (foo3 (tmp) != 0x0ULL)
+    __builtin_abort ();
+
+  if (foo4 (tmp) != 0xffffffffffffffffULL)
+    __builtin_abort ();
+
+  if (foo5 (tmp) != 0xffffffffffffffffULL)
+    __builtin_abort ();
+
+  if (foo6 (tmp) != 0x9359359359359359ULL)
+    __builtin_abort ();
+
+  if (foo7 (tmp) != 0x9359359359359359ULL)
+    __builtin_abort ();
+
+  if (foo8 (tmp) != 0x8358358358358358ULL)
+    __builtin_abort ();
+
+  if (foo9 (tmp) != 0x8358358358358358ULL)
+    __builtin_abort ();
+
+  if (foo10 (tmp) != 0x8358358358358358ULL)
+    __builtin_abort ();
+
+  if (foo11 (tmp) != 0x8358358358358358ULL)
+    __builtin_abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-3.c b/gcc/testsuite/gcc.target/i386/pr103462-3.c
new file mode 100644
index 00000000000..4ba248a4872
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-3.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-sccp-details" } */
+/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } */
+
+unsigned int
+__attribute__((noipa))
+foo (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp &= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo1 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp &= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo2 (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp &= (1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo3 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp &= (1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo4 (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp |= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo5 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp |= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo6 (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp |= (1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo7 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp |= (1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo8 (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp ^= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo9 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp ^= ~(1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo10 (unsigned int tmp)
+{
+  for (int bit = 0; bit < 32; bit += 3)
+    tmp ^= (1U << bit);
+  return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo11 (unsigned int tmp)
+{
+  for (int bit = 31; bit >= 0; bit -= 3)
+    tmp ^= (1U << bit);
+  return tmp;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-4.c b/gcc/testsuite/gcc.target/i386/pr103462-4.c
new file mode 100644
index 00000000000..e2f4056f525
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-4.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include "pr103462-3.c"
+
+int main()
+{
+  unsigned int tmp = 0x11111111U;
+
+  if (foo (tmp) != 0x10110110U)
+    __builtin_abort ();
+
+  if (foo1 (tmp) != 0x1101101U)
+    __builtin_abort ();
+
+  if (foo2 (tmp) != 0x0U)
+    __builtin_abort ();
+
+  if (foo3 (tmp) != 0x0U)
+    __builtin_abort ();
+
+  if (foo4 (tmp) != 0xffffffffU)
+    __builtin_abort ();
+
+  if (foo5 (tmp) != 0xffffffffU)
+    __builtin_abort ();
+
+  if (foo6 (tmp) != 0x59359359U)
+    __builtin_abort ();
+
+  if (foo7 (tmp) != 0x93593593U)
+    __builtin_abort ();
+
+  if (foo8 (tmp) != 0xa7ca7ca7U)
+    __builtin_abort ();
+
+  if (foo9 (tmp) != 0x7ca7ca7cU)
+    __builtin_abort ();
+
+  if (foo10 (tmp) != 0x58358358U)
+    __builtin_abort ();
+
+  if (foo11 (tmp) != 0x83583583U)
+    __builtin_abort ();
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-5.c b/gcc/testsuite/gcc.target/i386/pr103462-5.c
new file mode 100644
index 00000000000..1f4ffa34b48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-5.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-sccp-details" } */
+/* { dg-final { scan-tree-dump-times {final value replacement} 12 "sccp" } } */
+
+unsigned short
+__attribute__((noipa))
+foo (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp &= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo1 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp &= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo2 (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp &= (1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo3 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp &= (1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo4 (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp |= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo5 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp |= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo6 (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp |= (1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo7 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp |= (1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo8 (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp ^= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo9 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp ^= ~(1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo10 (unsigned short tmp)
+{
+  for (int bit = 0; bit < 16; bit += 3)
+    tmp ^= (1U << bit);
+  return tmp;
+}
+
+unsigned short
+__attribute__((noipa))
+foo11 (unsigned short tmp)
+{
+  for (int bit = 15; bit >= 0; bit -= 3)
+    tmp ^= (1U << bit);
+  return tmp;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr103462-6.c b/gcc/testsuite/gcc.target/i386/pr103462-6.c
new file mode 100644
index 00000000000..65426d71efe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103462-6.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-options "-O1" } */
+
+#include "pr103462-5.c"
+
+int main()
+{
+  unsigned short tmp = 0x1111U;
+
+  if (foo (tmp) != 0x110)
+    __builtin_abort ();
+
+  if (foo1 (tmp) != 0x110)
+    __builtin_abort ();
+
+  if (foo2 (tmp) != 0x0)
+    __builtin_abort ();
+
+  if (foo3 (tmp) != 0x0)
+    __builtin_abort ();
+
+  if (foo4 (tmp) != 0xffff)
+    __builtin_abort ();
+
+  if (foo5 (tmp) != 0xffff)
+    __builtin_abort ();
+
+  if (foo6 (tmp) != 0x9359)
+    __builtin_abort ();
+
+  if (foo7 (tmp) != 0x9359)
+    __builtin_abort ();
+
+  if (foo8 (tmp) != 0x8358)
+    __builtin_abort ();
+
+  if (foo9 (tmp) != 0x8358)
+    __builtin_abort ();
+
+  if (foo10 (tmp) != 0x8358)
+    __builtin_abort ();
+
+  if (foo11 (tmp) != 0x8358)
+    __builtin_abort ();
+}
+
diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index 72ceb4001e3..9b0aec4fd09 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3487,6 +3487,164 @@ expression_expensive_p (tree expr)
 	  || expanded_size > cache.elements ());
 }
 
+/* Match.pd function to match bitwise inductive expression.
+   .i.e.
+   _2 = 1 << _1;
+   _3 = ~_2;
+   tmp_9 = _3 & tmp_12;  */
+extern bool gimple_bitwise_induction_p (tree, tree *, tree (*)(tree));
+
+/* Return the inductive expression of bitwise operation if possible,
+   otherwise returns DEF.  */
+static tree
+analyze_and_compute_bitwise_induction_effect (class loop* ex_loop,
+					      class loop* loop,
+					      tree phidef,
+					      tree def,
+					      tree niter)
+{
+  tree match_op[3],inv, bitwise_scev, bitwise_res;
+  bool folded_casts = false;
+  tree type = TREE_TYPE (phidef);
+  gimple* header_phi = NULL;
+
+  /* Match things like op2(MATCH_OP[2]), op1(MATCH_OP[1]), phidef(PHIDEF)
+
+     op2 = PHI <phidef, inv>
+     _1 = (int) bit_17;
+     _3 = 1 << _1;
+     op1 = ~_3;
+     phidef = op1 & op2;  */
+  if (!gimple_bitwise_induction_p (phidef, &match_op[0], NULL)
+      || TREE_CODE (match_op[2]) != SSA_NAME
+      || gimple_code (SSA_NAME_DEF_STMT (match_op[2])) != GIMPLE_PHI
+      || gimple_phi_num_args (SSA_NAME_DEF_STMT (match_op[2])) != 2)
+    return def;
+
+  header_phi = SSA_NAME_DEF_STMT (match_op[2]);
+  if (gimple_bb (header_phi) != loop->header)
+    return def;
+
+  if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
+    return def;
+
+  inv = gimple_phi_arg_def (header_phi, 0);
+  if (inv == phidef)
+    inv = gimple_phi_arg_def (header_phi, 1);
+
+  bitwise_scev = analyze_scalar_evolution_in_loop (ex_loop, loop,
+						   match_op[1],
+						   &folded_casts);
+
+  /* Make sure bits is in range of type precision.  */
+  if (TREE_CODE (bitwise_scev) != POLYNOMIAL_CHREC
+      || !tree_fits_uhwi_p (CHREC_LEFT (bitwise_scev))
+      || tree_to_uhwi (CHREC_LEFT (bitwise_scev)) >= TYPE_PRECISION (type)
+      || !tree_fits_shwi_p (CHREC_RIGHT (bitwise_scev)))
+    return def;
+
+  bitwise_res = compute_overall_effect_of_inner_loop (ex_loop, bitwise_scev);
+  if (!tree_fits_uhwi_p (bitwise_res)
+      || tree_to_uhwi (bitwise_res) >= TYPE_PRECISION (type))
+    return def;
+
+enum bit_op_kind
+  {
+   INDUCTION_BIT_CLEAR,
+   INDUCTION_BIT_IOR,
+   INDUCTION_BIT_XOR,
+   INDUCTION_BIT_RESET,
+   INDUCTION_ZERO,
+   INDUCTION_ALL
+  };
+
+  enum bit_op_kind induction_kind;
+  enum tree_code code1
+    = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef));
+  enum tree_code code2
+    = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (match_op[0]));
+
+  /* BIT_CLEAR: A &= ~(1 << bit)
+     BIT_RESET: A ^= (1 << bit).
+     BIT_IOR: A |= (1 << bit)
+     BIT_ZERO: A &= (1 << bit)
+     BIT_ALL: A |= ~(1 << bit)
+     BIT_XOR: A ^= ~(1 << bit).
+     bit is induction variable.  */
+  switch (code1)
+    {
+    case BIT_AND_EXPR:
+      induction_kind = code2 == BIT_NOT_EXPR
+	? INDUCTION_BIT_CLEAR
+	: INDUCTION_ZERO;
+      break;
+    case BIT_IOR_EXPR:
+      induction_kind = code2 == BIT_NOT_EXPR
+	? INDUCTION_ALL
+	: INDUCTION_BIT_IOR;
+      break;
+    case BIT_XOR_EXPR:
+      induction_kind = code2 == BIT_NOT_EXPR
+	? INDUCTION_BIT_XOR
+	: INDUCTION_BIT_RESET;
+      break;
+      /* A ^ ~(1 << bit) is equal to ~(A ^ (1 << bit)).  */
+    case BIT_NOT_EXPR:
+      gcc_assert (code2 == BIT_XOR_EXPR);
+      induction_kind = INDUCTION_BIT_XOR;
+      break;
+    default:
+      gcc_unreachable();
+    }
+
+  if (induction_kind == INDUCTION_ZERO)
+    return build_zero_cst (type);
+  if (induction_kind == INDUCTION_ALL)
+    return build_all_ones_cst (type);
+
+  wide_int bits = wi::zero (TYPE_PRECISION (type));
+  HOST_WIDE_INT start = tree_to_shwi (CHREC_LEFT (bitwise_scev));
+  HOST_WIDE_INT step = tree_to_shwi (CHREC_RIGHT (bitwise_scev));
+  unsigned HOST_WIDE_INT num_iter = tree_to_uhwi (niter);
+
+  /* Loop tripcount should be num_iter + 1.  */
+  for (unsigned i = 0; i != num_iter + 1; i++)
+    {
+      bits = wi::bit_or (bits,
+			 wi::lshift (wi::one (TYPE_PRECISION (type)),
+				     start));
+      start += step;
+    }
+
+  bool inverted = false;
+  switch (induction_kind)
+    {
+    case INDUCTION_BIT_CLEAR:
+      code1 = BIT_AND_EXPR;
+      inverted = true;
+      break;
+    case INDUCTION_BIT_IOR:
+      code1 = BIT_IOR_EXPR;
+      break;
+    case INDUCTION_BIT_RESET:
+      code1 = BIT_XOR_EXPR;
+      break;
+    /* A ^= ~(1 << bit) is special, when loop tripcount is even,
+       it's equal to  A ^= bits, else A ^= ~bits.  */
+    case INDUCTION_BIT_XOR:
+      code1 = BIT_XOR_EXPR;
+      if (num_iter % 2 == 0)
+	inverted = true;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (inverted)
+    bits = wi::bit_not (bits);
+  return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits));
+}
+
 /* Do final value replacement for LOOP, return true if we did anything.  */
 
 bool
@@ -3519,7 +3677,8 @@ final_value_replacement_loop (class loop *loop)
     {
       gphi *phi = psi.phi ();
       tree rslt = PHI_RESULT (phi);
-      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+      tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
+      tree def = phidef;
       if (virtual_operand_p (def))
 	{
 	  gsi_next (&psi);
@@ -3537,6 +3696,23 @@ final_value_replacement_loop (class loop *loop)
       def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
 					      &folded_casts);
       def = compute_overall_effect_of_inner_loop (ex_loop, def);
+
+      /* Handle bitwise induction expression.
+
+	 .i.e.
+	 for (int i = 0; i != 64; i+=3)
+	   res &= ~(1UL << i);
+
+	 RES can't be analyzed out by SCEV because it is not polynomially
+	 expressible, but in fact final value of RES can be replaced by
+	 RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
+	 being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR.  */
+      if (tree_fits_uhwi_p (niter)
+	  && tree_to_uhwi (niter)
+	  && tree_to_uhwi (niter) != HOST_WIDE_INT_M1U)
+	def = analyze_and_compute_bitwise_induction_effect (ex_loop, loop,
+							    phidef, def, niter);
+
       if (!tree_does_not_contain_chrecs (def)
 	  || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
 	  /* Moving the computation from the loop may prolong life range
-- 
2.18.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-05-18  6:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-09  5:19 [PATCH] [Middle-end] Enhance final_value_replacement_loop to handle bitwise induction liuhongt
2022-05-11  8:45 ` Richard Biener
2022-05-13  3:37   ` Hongtao Liu
2022-05-13 11:16     ` Richard Biener
2022-05-18  2:45       ` Hongtao Liu
2022-05-18  6:43         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).