public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Andrew Carlotti <andrew.carlotti@arm.com>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH 3/8] middle-end: Refactor number_of_iterations_popcount
Date: Fri, 11 Nov 2022 13:52:40 +0000	[thread overview]
Message-ID: <Y25TqHuEvlEQEF6Q@e124511.cambridge.arm.com> (raw)
In-Reply-To: <Y25OIx12f6lJmhtC@e124511.cambridge.arm.com>

This includes various changes to improve clarity, and to enable the code
to be more similar to the clz and ctz idiom recognition added in
subsequent patches.

We create new number_of_iterations_bitcount function, which will be used
to call the other bit-counting recognition functions added in subsequent
patches, as well as a generic comment describing the loop structures
that are common to each idiom. Some of the variables in
number_of_iterations_popcount are given more descriptive names, and the
popcount expression builder is extracted into a separate function.

As part of the refactoring, we also fix a bug where the max loop count
for modes shorter than an integer would be incorrectly computed as if
the input mode were actually an integer.

We also ensure that niter->max takes into account the final value for
niter->niter (after any folding and simplifying), since if the latter is a
constant, then record_estimate mandates that the two values are equivalent.

gcc/ChangeLog:

	* tree-ssa-loop-niter.cc
	(number_of_iterations_exit_assumptions): Modify to call...
	(number_of_iterations_bitcount): ...this new function.
	(number_of_iterations_popcount): Now called by the above.
	Refactor, and extract popcount expression builder to...
	(build_popcount_expr): this new function.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/popcount-max.c: New test.


--


diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c
new file mode 100644
index 0000000000000000000000000000000000000000..ca7204cbc3cea636183408e24d7dd36d702ffdb2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount-max.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-loop-optimize -fdump-tree-optimized" } */
+
+#define PREC (__CHAR_BIT__)
+
+int count1 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    if (c <= PREC)
+      return 0;
+    else
+      return 34567;
+}
+
+int count2 (unsigned char b) {
+    int c = 0;
+
+    while (b) {
+	b &= b - 1;
+	c++;
+    }
+    if (c <= PREC - 1)
+      return 0;
+    else
+      return 76543;
+}
+
+/* { dg-final { scan-tree-dump-times "34567" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "76543" 1 "optimized" } } */
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 0af34e46580bb9a6f9b40e09c9f29b8454a4aaf6..fece876099c1687569d6351e7d2416ea6acae5b5 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2026,6 +2026,48 @@ number_of_iterations_cond (class loop *loop,
   return ret;
 }
 
+/* Return an expression that computes the popcount of src.  */
+
+static tree
+build_popcount_expr (tree src)
+{
+  tree fn;
+  int prec = TYPE_PRECISION (TREE_TYPE (src));
+  int i_prec = TYPE_PRECISION (integer_type_node);
+  int li_prec = TYPE_PRECISION (long_integer_type_node);
+  int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+  if (prec <= i_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
+  else if (prec == li_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
+  else if (prec == lli_prec || prec == 2 * lli_prec)
+    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  else
+    return NULL_TREE;
+
+  tree utype = unsigned_type_for (TREE_TYPE (src));
+  src = fold_convert (utype, src);
+  if (prec < i_prec)
+    src = fold_convert (unsigned_type_node, src);
+  tree call;
+  if (prec == 2 * lli_prec)
+    {
+      tree src1 = fold_convert (long_long_unsigned_type_node,
+				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+					     unshare_expr (src),
+					     build_int_cst (integer_type_node,
+							    lli_prec)));
+      tree src2 = fold_convert (long_long_unsigned_type_node, src);
+      tree call1 = build_call_expr (fn, 1, src1);
+      tree call2 = build_call_expr (fn, 1, src2);
+      call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
+    }
+  else
+    call = build_call_expr (fn, 1, src);
+
+  return call;
+}
+
 /* Utility function to check if OP is defined by a stmt
    that is a val - 1.  */
 
@@ -2041,45 +2083,18 @@ ssa_defined_by_minus_one_stmt_p (tree op, tree val)
 	  && integer_minus_onep (gimple_assign_rhs2 (stmt)));
 }
 
-/* See if LOOP is a popcout implementation, determine NITER for the loop
+/* See comment below for number_of_iterations_bitcount.
+   For popcount, we have:
 
-   We match:
-   <bb 2>
-   goto <bb 4>
+   modify:
+   _1 = iv_1 + -1
+   iv_2 = iv_1 & _1
 
-   <bb 3>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   <bb 4>
-   b_11 = PHI <b_5(D)(2), b_6(3)>
+   test:
+   if (iv != 0)
 
-   exit block
-   if (b_11 != 0)
-	goto <bb 3>
-   else
-	goto <bb 5>
-
-   OR we match copy-header version:
-   if (b_5 != 0)
-	goto <bb 3>
-   else
-	goto <bb 4>
-
-   <bb 3>
-   b_11 = PHI <b_5(2), b_6(3)>
-   _1 = b_11 + -1
-   b_6 = _1 & b_11
-
-   exit block
-   if (b_6 != 0)
-	goto <bb 3>
-   else
-	goto <bb 4>
-
-   If popcount pattern, update NITER accordingly.
-   i.e., set NITER to  __builtin_popcount (b)
-   return true if we did, false otherwise.
+   modification count:
+   popcount (src)
 
  */
 
@@ -2088,138 +2103,150 @@ number_of_iterations_popcount (loop_p loop, edge exit,
 			       enum tree_code code,
 			       class tree_niter_desc *niter)
 {
-  bool adjust = true;
-  tree iter;
+  bool modify_before_test = true;
   HOST_WIDE_INT max;
-  adjust = true;
-  tree fn = NULL_TREE;
-
-  /* Check loop terminating branch is like
-     if (b != 0).  */
-  gimple *stmt = last_stmt (exit->src);
-  if (!stmt
-      || gimple_code (stmt) != GIMPLE_COND
+
+  /* Check that condition for staying inside the loop is like
+     if (iv != 0).  */
+  gimple *cond_stmt = last_stmt (exit->src);
+  if (!cond_stmt
+      || gimple_code (cond_stmt) != GIMPLE_COND
       || code != NE_EXPR
-      || !integer_zerop (gimple_cond_rhs (stmt))
-      || TREE_CODE (gimple_cond_lhs (stmt)) != SSA_NAME)
+      || !integer_zerop (gimple_cond_rhs (cond_stmt))
+      || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
     return false;
 
-  gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (stmt));
+  tree iv_2 = gimple_cond_lhs (cond_stmt);
+  gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
 
-  /* Depending on copy-header is performed, feeding PHI stmts might be in
-     the loop header or loop latch, handle this.  */
-  if (gimple_code (and_stmt) == GIMPLE_PHI
-      && gimple_bb (and_stmt) == loop->header
-      && gimple_phi_num_args (and_stmt) == 2
-      && (TREE_CODE (gimple_phi_arg_def (and_stmt,
+  /* If the test comes before the iv modification, then these will actually be
+     iv_1 and a phi node.  */
+  if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+      && gimple_bb (iv_2_stmt) == loop->header
+      && gimple_phi_num_args (iv_2_stmt) == 2
+      && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
 					 loop_latch_edge (loop)->dest_idx))
 	  == SSA_NAME))
     {
-      /* SSA used in exit condition is defined by PHI stmt
-	b_11 = PHI <b_5(D)(2), b_6(3)>
-	from the PHI stmt, get the and_stmt
-	b_6 = _1 & b_11.  */
-      tree t = gimple_phi_arg_def (and_stmt, loop_latch_edge (loop)->dest_idx);
-      and_stmt = SSA_NAME_DEF_STMT (t);
-      adjust = false;
+      /* iv_2 is actually one of the inputs to the phi.  */
+      iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+      iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+      modify_before_test = false;
     }
 
-  /* Make sure it is indeed an and stmt (b_6 = _1 & b_11).  */
-  if (!is_gimple_assign (and_stmt)
-      || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR)
+  /* Make sure iv_2_stmt is an and stmt (iv_2 = _1 & iv_1).  */
+  if (!is_gimple_assign (iv_2_stmt)
+      || gimple_assign_rhs_code (iv_2_stmt) != BIT_AND_EXPR)
     return false;
 
-  tree b_11 = gimple_assign_rhs1 (and_stmt);
-  tree _1 = gimple_assign_rhs2 (and_stmt);
+  tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+  tree _1 = gimple_assign_rhs2 (iv_2_stmt);
 
-  /* Check that _1 is defined by _b11 + -1 (_1 = b_11 + -1).
-     Also make sure that b_11 is the same in and_stmt and _1 defining stmt.
+  /* Check that _1 is defined by (_1 = iv_1 + -1).
+     Also make sure that _1 is the same in and_stmt and _1 defining stmt.
      Also canonicalize if _1 and _b11 are revrsed.  */
-  if (ssa_defined_by_minus_one_stmt_p (b_11, _1))
-    std::swap (b_11, _1);
-  else if (ssa_defined_by_minus_one_stmt_p (_1, b_11))
+  if (ssa_defined_by_minus_one_stmt_p (iv_1, _1))
+    std::swap (iv_1, _1);
+  else if (ssa_defined_by_minus_one_stmt_p (_1, iv_1))
     ;
   else
     return false;
-  /* Check the recurrence:
-   ... = PHI <b_5(2), b_6(3)>.  */
-  gimple *phi = SSA_NAME_DEF_STMT (b_11);
+
+  /* Check the recurrence.  */
+  gimple *phi = SSA_NAME_DEF_STMT (iv_1);
   if (gimple_code (phi) != GIMPLE_PHI
       || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
-      || (gimple_assign_lhs (and_stmt)
-	  != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+      || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
     return false;
 
-  /* We found a match. Get the corresponding popcount builtin.  */
+  /* We found a match.  */
   tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
-  if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-	   == TYPE_PRECISION (long_integer_type_node))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
-  else if (TYPE_PRECISION (TREE_TYPE (src))
-	   == TYPE_PRECISION (long_long_integer_type_node)
-	   || (TYPE_PRECISION (TREE_TYPE (src))
-	       == 2 * TYPE_PRECISION (long_long_integer_type_node)))
-    fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
+  int src_precision = TYPE_PRECISION (TREE_TYPE (src));
 
-  if (!fn)
+  /* Get the corresponding popcount builtin.  */
+  tree expr = build_popcount_expr (src);
+
+  if (!expr)
     return false;
 
-  /* Update NITER params accordingly  */
-  tree utype = unsigned_type_for (TREE_TYPE (src));
-  src = fold_convert (utype, src);
-  if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
-    src = fold_convert (unsigned_type_node, src);
-  tree call;
-  if (TYPE_PRECISION (TREE_TYPE (src))
-      == 2 * TYPE_PRECISION (long_long_integer_type_node))
+  max = src_precision;
+
+  tree may_be_zero = boolean_false_node;
+
+  if (modify_before_test)
     {
-      int prec = TYPE_PRECISION (long_long_integer_type_node);
-      tree src1 = fold_convert (long_long_unsigned_type_node,
-				fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
-					     unshare_expr (src),
-					     build_int_cst (integer_type_node,
-							    prec)));
-      tree src2 = fold_convert (long_long_unsigned_type_node, src);
-      call = build_call_expr (fn, 1, src1);
-      call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
-			  build_call_expr (fn, 1, src2));
-      call = fold_convert (utype, call);
+      expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
+			  integer_one_node);
+      max = max - 1;
+      may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
+				      build_zero_cst (TREE_TYPE (src)));
     }
-  else
-    call = fold_convert (utype, build_call_expr (fn, 1, src));
-  if (adjust)
-    iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
-  else
-    iter = call;
 
-  if (TREE_CODE (call) == INTEGER_CST)
-    max = tree_to_uhwi (call);
-  else
-    max = TYPE_PRECISION (TREE_TYPE (src));
-  if (adjust)
-    max = max - 1;
+  expr = fold_convert (unsigned_type_node, expr);
 
-  niter->niter = iter;
   niter->assumptions = boolean_true_node;
+  niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
+  niter->niter = simplify_using_initial_conditions(loop, expr);
 
-  if (adjust)
-    {
-      tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
-				      build_zero_cst (TREE_TYPE (src)));
-      niter->may_be_zero
-	= simplify_using_initial_conditions (loop, may_be_zero);
-    }
+  if (TREE_CODE (niter->niter) == INTEGER_CST)
+    niter->max = tree_to_uhwi (niter->niter);
   else
-    niter->may_be_zero = boolean_false_node;
+    niter->max = max;
 
-  niter->max = max;
   niter->bound = NULL_TREE;
   niter->cmp = ERROR_MARK;
   return true;
 }
 
+/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
+   1. A modification to the induction variabler;.
+   2. A test to determine whether or not to exit the loop.
+
+   These can come in either order - i.e.:
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   if (test (iv_1))
+     goto <bb 4>
+   else
+     goto <bb 5>
+
+   <bb 4>
+   iv_2 = modify (iv_1)
+   goto <bb 3>
+
+   OR
+
+   <bb 3>
+   iv_1 = PHI <src(2), iv_2(4)>
+   iv_2 = modify (iv_1)
+
+   <bb 4>
+   if (test (iv_2))
+     goto <bb 3>
+   else
+     goto <bb 5>
+
+   The second form can be generated by copying the loop header out of the loop.
+
+   In the first case, the number of latch executions will be equal to the
+   number of induction variable modifications required before the test fails.
+
+   In the second case (modify_before_test), if we assume that the number of
+   modifications required before the test fails is nonzero, then the number of
+   latch executions will be one less than this number.
+
+   If we recognise the pattern, then we update niter accordingly, and return
+   true.  */
+
+static bool
+number_of_iterations_bitcount (loop_p loop, edge exit,
+			       enum tree_code code,
+			       class tree_niter_desc *niter)
+{
+  return number_of_iterations_popcount (loop, exit, code, niter);
+}
+
 /* Substitute NEW_TREE for OLD in EXPR and fold the result.
    If VALUEIZE is non-NULL then OLD and NEW_TREE are ignored and instead
    all SSA names are replaced with the result of calling the VALUEIZE
@@ -2758,7 +2785,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
   tree iv0_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
-    return number_of_iterations_popcount (loop, exit, code, niter);
+    return number_of_iterations_bitcount (loop, exit, code, niter);
   tree iv1_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op1, &iv1, safe ? &iv1_niters : NULL, false))

  parent reply	other threads:[~2022-11-11 13:57 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-11 13:29 [PATCH 0/8] middle-end: Popcount and clz/ctz idiom recognition improvements Andrew Carlotti
2022-11-11 13:39 ` [PATCH 0/8] middle-end: Ensure at_stmt is defined before an early exit Andrew Carlotti
2022-11-14 14:23   ` Jeff Law
2022-11-11 13:46 ` [PATCH 2/8] middle-end: Remove prototype for number_of_iterations_popcount Andrew Carlotti
2022-11-14 14:24   ` Jeff Law
2022-11-11 13:52 ` Andrew Carlotti [this message]
2022-11-14 14:52   ` [PATCH 3/8] middle-end: Refactor number_of_iterations_popcount Richard Biener
2022-11-11 18:43 ` [PATCH 4/8] Modify test, to prevent the next patch breaking it Andrew Carlotti
2022-11-14 10:18   ` Richard Biener
2022-11-11 18:50 ` [PATCH 5/8] middle-end: Add cltz_complement idiom recognition Andrew Carlotti
2022-11-14 15:10   ` Richard Biener
2022-11-21 15:53     ` Andrew Carlotti
2022-11-24 10:41       ` Richard Biener
2022-12-22 17:42         ` [PATCH 5/8 v2] " Andrew Carlotti
2023-01-12 13:19           ` Richard Biener
2023-01-19  9:19           ` Jan-Benedict Glaw
2023-01-19  9:43             ` Richard Biener
2022-11-11 18:54 ` [PATCH 6/8] docs: Add popcount, clz and ctz target attributes Andrew Carlotti
2022-11-14 14:52   ` Jeff Law
2022-12-22 17:42     ` [PATCH 6/8 v2] " Andrew Carlotti
2022-11-11 19:01 ` [PATCH 7/8] middle-end: Add c[lt]z idiom recognition Andrew Carlotti
2022-11-14 15:22   ` Richard Biener
2022-11-11 19:07 ` [PATCH 8/8] middle-end: Expand comment for tree_niter_desc.max Andrew Carlotti
2022-11-14 14:51   ` Jeff Law
2022-12-22 17:43 ` [PATCH 9/8] middle-end: Allow build_popcount_expr to use an IFN Andrew Carlotti
2023-01-12 13:20   ` Richard Biener
2023-01-16 14:03   ` Andrew Carlotti
2023-01-16 14:07     ` Andrew Carlotti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y25TqHuEvlEQEF6Q@e124511.cambridge.arm.com \
    --to=andrew.carlotti@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).