public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4208] Refactor number_of_iterations_popcount
@ 2022-11-21 16:01 Andrew Carlotti
  0 siblings, 0 replies; only message in thread
From: Andrew Carlotti @ 2022-11-21 16:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:eea52e38dd81b6632546b7cf933479986c972c69

commit r13-4208-geea52e38dd81b6632546b7cf933479986c972c69
Author: Andrew Carlotti <andrew.carlotti@arm.com>
Date:   Tue Oct 4 18:21:57 2022 +0100

    Refactor number_of_iterations_popcount
    
    This includes various changes to improve clarity, and to enable the code
    to be more similar to the clz and ctz idiom recognition added in
    subsequent patches.
    
    We create new number_of_iterations_bitcount function, which will be used
    to call the other bit-counting recognition functions added in subsequent
    patches, as well as a generic comment describing the loop structures
    that are common to each idiom. Some of the variables in
    number_of_iterations_popcount are given more descriptive names, and the
    popcount expression builder is extracted into a separate function.
    
    As part of the refactoring, we also fix a bug where the max loop count
    for modes shorter than an integer would be incorrectly computed as if
    the input mode were actually an integer.
    
    We also ensure that niter->max takes into account the final value for
    niter->niter (after any folding and simplifying), since if the latter is a
    constant, then record_estimate mandates that the two values are equivalent.
    
    gcc/ChangeLog:
    
            * tree-ssa-loop-niter.cc
            (number_of_iterations_exit_assumptions): Modify to call...
            (number_of_iterations_bitcount): ...this new function.
            (number_of_iterations_popcount): Now called by the above.
            Refactor, and extract popcount expression builder to...
            (build_popcount_expr): this new function.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/tree-ssa/popcount-max.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c |  33 +++
 gcc/tree-ssa-loop-niter.cc                   | 289 +++++++++++++++------------
 2 files changed, 191 insertions(+), 131 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c
new file mode 100644
index 00000000000..ca7204cbc3c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
+
+#define PREC (__CHAR_BIT__)
+
+int count1 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    if (c <= PREC)
+      return 0;
+    else
+      return 34567;
+}
+
+int count2 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    if (c <= PREC - 1)
+      return 0;
+    else
+      return 76543;
+}
+
+/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 0af34e46580..fece876099c 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2026,6 +2026,48 @@ number_of_iterations_cond (class loop *loop,
   return ret;
 }
 
+/* Return an expression that computes the popcount of src.  */
+
+static tree
+build_popcount_expr (tree src)
+{
+  tree fn;
+  int prec = TYPE_PRECISION (TREE_TYPE (src));
+  int i_prec = TYPE_PRECISION (integer_type_node);
+  int li_prec = TYPE_PRECISION (long_integer_type_node);
+  int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+  if (prec <= i_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
+  else if (prec == li_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
+  else if (prec == lli_prec || prec == 2 * lli_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  else
+    return NULL_TREE;
+
+  tree utype = unsigned_type_for (TREE_TYPE (src));
+  src = fold_convert (utype, src);
+  if (prec < i_prec)
+    src = fold_convert (unsigned_type_node, src);
+  tree call;
+  if (prec == 2 * lli_prec)
+    {
+      tree src1 = fold_convert (long_long_unsigned_type_node,
+				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+					     unshare_expr (src),
+					     build_int_cst (integer_type_node,
+							    lli_prec)));
+      tree src2 = fold_convert (long_long_unsigned_type_node, src);
+      tree call1 = build_call_expr (fn, 1, src1);
+      tree call2 = build_call_expr (fn, 1, src2);
+      call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
+    }
+  else
+    call = build_call_expr (fn, 1, src);
+
+  return call;
+}
+
 /* Utility function to check if OP is defined by a stmt
    that is a val - 1.  */
 
@@ -2041,45 +2083,18 @@ ssa_defined_by_minus_one_stmt_p (tree op, tree val)
 	  && integer_minus_onep (gimple_assign_rhs2 (stmt)));
 }
 
-/* See if LOOP is a popcout implementation, determine NITER for the loop
+/* See comment below for number_of_iterations_bitcount.
+   For popcount, we have:
 
-   We match:
-   <bb 2>
-   goto <bb 4>
+   modify:
+   _1 = iv_1 + -1
+   iv_2 = iv_1 & _1
 
-   <bb 3>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   <bb 4>
-   b_11 = PHI <b_5(D)(2), b_6(3)>
+   test:
+   if (iv != 0)
 
-   exit block
-   if (b_11 != 0)
-	goto <bb 3>
-   else
-	goto <bb 5>
-
-   OR we match copy-header version:
-   if (b_5 != 0)
-	goto <bb 3>
-   else
-	goto <bb 4>
-
-   <bb 3>
-   b_11 = PHI <b_5(2), b_6(3)>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   exit block
-   if (b_6 != 0)
-	goto <bb 3>
-   else
-	goto <bb 4>
-
-   If popcount pattern, update NITER accordingly.
-   i.e., set NITER to  __builtin_popcount (b)
-   return true if we did, false otherwise.
+   modification count:
+   popcount (src)
 
  */
 
@@ -2088,138 +2103,150 @@ number_of_iterations_popcount (loop_p loop, edge exit,
 			       enum tree_code code,
 			       class tree_niter_desc *niter)
 {
-  bool adjust = true;
-  tree iter;
+  bool modify_before_test = true;
   HOST_WIDE_INT max;
-  adjust = true;
-  tree fn = NULL_TREE;
-
-  /* Check loop terminating branch is like
-     if (b != 0).  */
-  gimple *stmt = last_stmt (exit->src);
-  if (!stmt
-      || gimple_code (stmt) != GIMPLE_COND
+
+  /* Check that condition for staying inside the loop is like
+     if (iv != 0).  */
+  gimple *cond_stmt = last_stmt (exit->src);
+  if (!cond_stmt
+      || gimple_code (cond_stmt) != GIMPLE_COND
       || code != NE_EXPR
-      || !integer_zerop (gimple_cond_rhs (stmt))
-      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME)
+      || !integer_zerop (gimple_cond_rhs (cond_stmt))
+      || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
     return false;
 
-  gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
+  tree iv_2 = gimple_cond_lhs (cond_stmt);
+  gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
 
-  /* Depending on copy-header is performed, feeding PHI stmts might be in
-     the loop header or loop latch, handle this.  */
-  if (gimple_code (and_stmt) == GIMPLE_PHI
-      && gimple_bb (and_stmt) == loop->header
-      && gimple_phi_num_args (and_stmt) == 2
-      && (TREE_CODE (gimple_phi_arg_def (and_stmt,
+  /* If the test comes before the iv modification, then these will actually be
+     iv_1 and a phi node.  */
+  if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+      && gimple_bb (iv_2_stmt) == loop->header
+      && gimple_phi_num_args (iv_2_stmt) == 2
+      && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
 					 loop_latch_edge (loop)->dest_idx))
 	  == SSA_NAME))
     {
-      /* SSA used in exit condition is defined by PHI stmt
-	b_11 = PHI <b_5(D)(2), b_6(3)>
-	from the PHI stmt, get the and_stmt
-	b_6 = _1 & b_11.  */
-      tree t = gimple_phi_arg_def (and_stmt, loop_latch_edge (loop)->dest_idx);
-      and_stmt = SSA_NAME_DEF_STMT (t);
-      adjust = false;
+      /* iv_2 is actually one of the inputs to the phi.  */
+      iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+      iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+      modify_before_test = false;
     }
 
-  /* Make sure it is indeed an and stmt (b_6 = _1 & b_11).  */
-  if (!is_gimple_assign (and_stmt)
-      || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+  /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1).  */
+  if (!is_gimple_assign (iv_2_stmt)
+      || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR)
     return false;
 
-  tree b_11 = gimple_assign_rhs1 (and_stmt);
-  tree _1 = gimple_assign_rhs2 (and_stmt);
+  tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+  tree _1 = gimple_assign_rhs2 (iv_2_stmt);
 
-  /* Check that _1 is defined by _b11 + -1 (_1 = b_11 + -1).
-     Also make sure that b_11 is the same in and_stmt and _1 defining stmt.
+  /* Check that _1 is defined by (_1 = iv_1 + -1).
+     Also make sure that _1 is the same in and_stmt and _1 defining stmt.
      Also canonicalize if _1 and _b11 are revrsed.  */
-  if (ssa_defined_by_minus_one_stmt_p (b_11, _1))
-    std::swap (b_11, _1);
-  else if (ssa_defined_by_minus_one_stmt_p (_1, b_11))
+  if (ssa_defined_by_minus_one_stmt_p (iv_1, _1))
+    std::swap (iv_1, _1);
+  else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1))
     ;
   else
     return false;
-  /* Check the recurrence:
-   ... = PHI <b_5(2), b_6(3)>.  */
-  gimple *phi = SSA_NAME_DEF_STMT (b_11);
+
+  /* Check the recurrence.  */
+  gimple *phi = SSA_NAME_DEF_STMT (iv_1);
   if (gimple_code (phi) != GIMPLE_PHI
       || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
-      || (gimple_assign_lhs (and_stmt)
-	  != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+      || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
     return false;
 
-  /* We found a match. Get the corresponding popcount builtin.  */
+  /* We found a match.  */
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
-  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-	   == TYPE_PRECISION (long_integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-	   == TYPE_PRECISION (long_long_integer_type_node)
-	   || (TYPE_PRECISION (TREE_TYPE (src))
-	       == 2 * TYPE_PRECISION (long_long_integer_type_node)))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  int src_precision = TYPE_PRECISION (TREE_TYPE (src));
 
-  if (!fn)
+  /* Get the corresponding popcount builtin.  */
+  tree expr = build_popcount_expr (src);
+
+  if (!expr)
     return false;
 
-  /* Update NITER params accordingly  */
-  tree utype = unsigned_type_for (TREE_TYPE (src));
-  src = fold_convert (utype, src);
-  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
-    src = fold_convert (unsigned_type_node, src);
-  tree call;
-  if (TYPE_PRECISION (TREE_TYPE (src))
-      == 2 * TYPE_PRECISION (long_long_integer_type_node))
+  max = src_precision;
+
+  tree may_be_zero = boolean_false_node;
+
+  if (modify_before_test)
     {
-      int prec = TYPE_PRECISION (long_long_integer_type_node);
-      tree src1 = fold_convert (long_long_unsigned_type_node,
-				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
-					     unshare_expr (src),
-					     build_int_cst (integer_type_node,
-							    prec)));
-      tree src2 = fold_convert (long_long_unsigned_type_node, src);
-      call = build_call_expr (fn, 1, src1);
-      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
-			  build_call_expr (fn, 1, src2));
-      call = fold_convert (utype, call);
+      expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
+			  integer_one_node);
+      max = max - 1;
+      may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
+				      build_zero_cst (TREE_TYPE (src)));
     }
-  else
-    call = fold_convert (utype, build_call_expr (fn, 1, src));
-  if (adjust)
-    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
-  else
-    iter = call;
 
-  if (TREE_CODE (call) == INTEGER_CST)
-    max = tree_to_uhwi (call);
-  else
-    max = TYPE_PRECISION (TREE_TYPE (src));
-  if (adjust)
-    max = max - 1;
+  expr = fold_convert (unsigned_type_node, expr);
 
-  niter->niter = iter;
   niter->assumptions = boolean_true_node;
+  niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
+  niter->niter = simplify_using_initial_conditions(loop, expr);
 
-  if (adjust)
-    {
-      tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
-				      build_zero_cst (TREE_TYPE (src)));
-      niter->may_be_zero
-	= simplify_using_initial_conditions (loop, may_be_zero);
-    }
+  if (TREE_CODE (niter->niter) == INTEGER_CST)
+    niter->max = tree_to_uhwi (niter->niter);
   else
-    niter->may_be_zero = boolean_false_node;
+    niter->max = max;
 
-  niter->max = max;
   niter->bound = NULL_TREE;
   niter->cmp = ERROR_MARK;
   return true;
 }
 
+/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
+   1. A modification to the induction variabler;.
+   2. A test to determine whether or not to exit the loop.
+
+   These can come in either order - i.e.:
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   if (test (iv_1))
+     goto <bb 4>
+   else
+     goto <bb 5>
+
+   <bb 4>
+   iv_2 = modify (iv_1)
+   goto <bb 3>
+
+   OR
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   iv_2 = modify (iv_1)
+
+   <bb 4>
+   if (test (iv_2))
+     goto <bb 3>
+   else
+     goto <bb 5>
+
+   The second form can be generated by copying the loop header out of the loop.
+
+   In the first case, the number of latch executions will be equal to the
+   number of induction variable modifications required before the test fails.
+
+   In the second case (modify_before_test), if we assume that the number of
+   modifications required before the test fails is nonzero, then the number of
+   latch executions will be one less than this number.
+
+   If we recognise the pattern, then we update niter accordingly, and return
+   true.  */
+
+static bool
+number_of_iterations_bitcount (loop_p loop, edge exit,
+			       enum tree_code code,
+			       class tree_niter_desc *niter)
+{
+  return number_of_iterations_popcount (loop, exit, code, niter);
+}
+
 /* Substitute NEW_TREE for OLD in EXPR and fold the result.
    If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead
    all SSA names are replaced with the result of calling the VALUEIZE
@@ -2758,7 +2785,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
   tree iv0_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
-    return number_of_iterations_popcount (loop, exit, code, niter);
+    return number_of_iterations_bitcount (loop, exit, code, niter);
   tree iv1_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op1, &iv1, safe ? &iv1_niters : NULL, false))

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-11-21 16:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-21 16:01 [gcc r13-4208] Refactor number_of_iterations_popcount Andrew Carlotti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).