public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] tree-optimization/114760 - check variants of >> and << in loop-niter
@ 2024-05-10 10:55 Di Zhao OS
  2024-05-10 12:55 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Di Zhao OS @ 2024-05-10 10:55 UTC (permalink / raw)
  To: gcc-patches

This patch tries to fix pr114760 by checking for the
variants explicitly. When recognizing bit counting idiom,
include pattern "x * 2" for "x << 1", and "x / 2" for
"x >> 1" (given x is unsigned).

Bootstrapped and tested on x86_64-linux-gnu.

Thanks,
Di Zhao

---

gcc/ChangeLog:
        PR tree-optimization/114760
        * tree-ssa-loop-niter.cc (is_lshift_by_1): New function
        to check if STMT is equivalent to x << 1.
        (is_rshift_by_1): New function to check if STMT is
        equivalent to x >> 1.
        (number_of_iterations_cltz): Enhance the identification
        of logical shift by one.
        (number_of_iterations_cltz_complement): Enhance the
        identification of logical shift by one.

gcc/testsuite/ChangeLog:
        PR tree-optimization/114760
        * gcc.dg/tree-ssa/pr114760-1.c: New test.
        * gcc.dg/tree-ssa/pr114760-2.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 ++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 +++++++
 gcc/tree-ssa-loop-niter.cc                 | 56 +++++++++++++-----
 3 files changed, 131 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
new file mode 100644
index 00000000000..9f10ccc3b51
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c
@@ -0,0 +1,69 @@
+/* PR tree-optimization/114760 */
+/* { dg-do compile } */
+/* { dg-require-effective-target clz } */
+/* { dg-require-effective-target ctz } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+unsigned
+ntz32_1 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+    {
+      n = n - 1;
+      x = x * 2;
+    }
+  return n;
+}
+
+unsigned
+ntz32_2 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+    {
+      n = n - 1;
+      x = x + x;
+    }
+  return n;
+}
+
+unsigned
+ntz32_3 (unsigned x)
+{
+  int n = 32;
+  while (x != 0)
+    {
+      n = n - 1;
+      x = x << 1;
+    }
+  return n;
+}
+
+#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
+int
+nlz32_1 (unsigned int b) {
+    int c = PREC;
+
+    while (b != 0) {
+	b >>= 1;
+	c --;
+    }
+
+    return c;
+}
+
+int
+nlz32_2 (unsigned int b) {
+    int c = PREC;
+
+    while (b != 0) {
+	b /= 2;
+	c --;
+    }
+
+    return c;
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
new file mode 100644
index 00000000000..e1b4c4b1338
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c
@@ -0,0 +1,20 @@
+/* PR tree-optimization/114760 */
+/* { dg-do compile } */
+/* { dg-require-effective-target clz } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+// Check that for signed type, there's no CLZ.
+#define PREC (__CHAR_BIT__ * __SIZEOF_INT__)
+int
+no_nlz32 (int b) {
+    int c = PREC;
+
+    while (b != 0) {
+	b /= 2;
+	c --;
+    }
+
+    return c;
+}
+
+/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } */
\ No newline at end of file
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 0fde07e626f..1d99264949b 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool define_at_zero)
   return call;
 }
 
+/* Returns true if STMT is equivalent to x << 1.  */
+
+static bool
+is_lshift_by_1 (gimple *stmt)
+{
+  if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR
+      && integer_onep (gimple_assign_rhs2 (stmt)))
+    return true;
+  if (gimple_assign_rhs_code (stmt) == MULT_EXPR
+      && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
+      && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
+    return true;
+  return false;
+}
+
+/* Returns true if STMT is equivalent to x >> 1.  */
+
+static bool
+is_rshift_by_1 (gimple *stmt)
+{
+  if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt))))
+    return false;
+  if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR
+      && integer_onep (gimple_assign_rhs2 (stmt)))
+    return true;
+  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR
+      && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
+      && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2)
+    return true;
+  return false;
+}
+
 /* See comment below for number_of_iterations_bitcount.
    For c[lt]z, we have:
 
@@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit,
 
   /* Make sure iv_2_stmt is a logical shift by one stmt:
      iv_2 = iv_1 {<<|>>} 1  */
-  if (!is_gimple_assign (iv_2_stmt)
-      || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
-	  && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
-	      || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
-      || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
+  if (!is_gimple_assign (iv_2_stmt))
+    return false;
+  bool left_shift = false;
+  if (!((left_shift = is_lshift_by_1 (iv_2_stmt))
+	|| is_rshift_by_1 (iv_2_stmt)))
     return false;
-
-  bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
 
   tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
 
@@ -2516,14 +2546,12 @@ number_of_iterations_cltz_complement (loop_p loop, edge exit,
 
   /* Make sure iv_2_stmt is a logical shift by one stmt:
      iv_2 = iv_1 {>>|<<} 1  */
-  if (!is_gimple_assign (iv_2_stmt)
-      || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
-	  && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
-	      || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
-      || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
+  if (!is_gimple_assign (iv_2_stmt))
+    return false;
+  bool left_shift = false;
+  if (!((left_shift = is_lshift_by_1 (iv_2_stmt))
+	|| is_rshift_by_1 (iv_2_stmt)))
     return false;
-
-  bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
 
   tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
 
-- 
2.25.1




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-11 10:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-10 10:55 [PATCH] tree-optimization/114760 - check variants of >> and << in loop-niter Di Zhao OS
2024-05-10 12:55 ` Richard Biener
2024-05-11 10:10   ` Di Zhao OS

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).