public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5699] Final value replacement improvements for until-wrap loops.
@ 2021-12-01 19:59 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2021-12-01 19:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:de3e5aae6c4b540e808c822c1e878b0a3304d09c

commit r12-5699-gde3e5aae6c4b540e808c822c1e878b0a3304d09c
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Wed Dec 1 19:58:40 2021 +0000

    Final value replacement improvements for until-wrap loops.
    
    This middle-end patch is inspired by the Richard Beiner's until-wrap
    loop example in PR tree-optimization/101145.
    
    unsigned foo(unsigned val, unsigned start)
    {
      unsigned cnt = 0;
      for (unsigned i = start; i > val; ++i)
        cnt++;
      return cnt;
    }
    
    For this loop, the tree optimizers currently generate:
    
    unsigned int foo (unsigned int val, unsigned int start)
    {
      unsigned int cnt;
      unsigned int _1;
      unsigned int _5;
    
      <bb 2> [local count: 118111600]:
      if (start_3(D) > val_4(D))
        goto <bb 3>; [89.00%]
      else
        goto <bb 4>; [11.00%]
    
      <bb 3> [local count: 105119324]:
      _1 = start_3(D) + 1;
      _5 = -start_3(D);
      cnt_2 = _1 > val_4(D) ? _5 : 1;
    
      <bb 4> [local count: 118111600]:
      # cnt_11 = PHI <cnt_2(3), 0(2)>
      return cnt_11;
    }
    
    or perhaps slightly easier to read:
    
      if (start > val) {
        cnt = (start+1) > val ? -start : 1;
      } else cnt = 0;
    
    In this snippet, if we know start > val, then (start+1) > val
    unless start+1 overflows, i.e. (start+1) == 0 and start == ~0.
    We can use this (loop header) context to simplify the ternary
    expression to "(start != -1) ? -start : 1", which with a little
    help from match.pd can be folded to -start.  Hence the optimal
    final value replacement should be:
    
      cnt = (start > val) ? -start : 0;
    
    Or as now generated by this patch:
    
    unsigned int foo (unsigned int val, unsigned int start)
    {
      unsigned int cnt;
    
      <bb 2> [local count: 118111600]:
      if (start_3(D) > val_4(D))
        goto <bb 3>; [89.00%]
      else
        goto <bb 4>; [11.00%]
    
      <bb 3> [local count: 105119324]:
      cnt_2 = -start_3(D);
    
      <bb 4> [local count: 118111600]:
      # cnt_11 = PHI <cnt_2(3), 0(2)>
      return cnt_11;
    }
    
    We can also improve until-wrap loops that don't have a (suitable) loop
    header, as determined by simplify_using_initial_conditions.
    
    unsigned bar(unsigned val, unsigned start)
    {
      unsigned cnt = 0;
      unsigned i = start;
      do {
        cnt++;
        i++;
      } while (i > val);
      return cnt;
    }
    
    which is currently optimized to:
    
    unsigned int foo (unsigned int val, unsigned int start)
    {
      unsigned int cnt;
      unsigned int _9;
      unsigned int _10;
    
      <bb 2> [local count: 118111600]:
      _9 = start_4(D) + 1;
      _10 = -start_4(D);
      cnt_3 = val_7(D) < _9 ? _10 : 1;
      return cnt_3;
    }
    
    Here we have "val < (start+1) ? -start : 1", which again with the
    help of match.pd can be slightly simplified to "val <= start ? -start : 1"
    when dealing with unsigned types, because at the complicating value where
    start == ~0, we fortunately have -start == 1, hence it doesn't matter
    whether the second or third operand of the ternary operator is returned.
    
    To summarize, this patch (in addition to tweaking may_be_zero in
    number_of_iterations_until_wrap) adds three new constant folding
    transforms to match.pd.
    
    X != C1 ? -X : C2 simplifies to -X when -C1 == C2.
    which is the generalized form of the simplification above.
    
    X != C1 ? ~X : C2 simplifies to ~X when ~C1 == C2.
    which is the BIT_NOT_EXPR analog of the NEGATE_EXPR case.
    
    and the "until-wrap final value replacement without context":
    
    (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
    X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.
    
    2021-12-01  Roger Sayle  <roger@nextmovesoftware.com>
                Richard Biener  <rguenther@suse.de>
    
    gcc/ChangeLog
            * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
            Check if simplify_using_initial_conditions allows us to
            simplify the expression for may_be_zero.
            * match.pd (X != C ? -X : -C -> -X): New transform.
            (X != C ? ~X : ~C -> ~X): Likewise.
            ((X+1) > Y ? -X : 1 -> X >= Y ? -X : 1): Likewise.
    
    gcc/testsuite/ChangeLog
            * gcc.dg/fold-condneg-1.c: New test case.
            * gcc.dg/fold-condneg-2.c: New test case.
            * gcc.dg/fold-condnot-1.c: New test case.
            * gcc.dg/pr101145-1.c: New test case.
            * gcc.dg/pr101145-2.c: New test case.

Diff:
---
 gcc/match.pd                          | 22 +++++++++
 gcc/testsuite/gcc.dg/fold-condneg-1.c | 59 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/fold-condneg-2.c | 11 +++++
 gcc/testsuite/gcc.dg/fold-condnot-1.c | 84 +++++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/pr101145-1.c     | 12 +++++
 gcc/testsuite/gcc.dg/pr101145-2.c     | 15 +++++++
 gcc/tree-ssa-loop-niter.c             | 19 +++++++-
 7 files changed, 221 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 0a00b08e2ab..0d7b8dd1334 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4403,6 +4403,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	(op (min @X { wide_int_to_tree (from_type, real_c1); })
 	    { wide_int_to_tree (from_type, c2); })))))))))
 
+/* X != C1 ? -X : C2 simplifies to -X when -C1 == C2.  */
+(simplify
+ (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2)
+ (if (!TYPE_SATURATING (type)
+      && (TYPE_OVERFLOW_WRAPS (type)
+	  || !wi::only_sign_bit_p (wi::to_wide (@1)))
+      && wi::eq_p (wi::neg (wi::to_wide (@1)), wi::to_wide (@2)))
+  @3))
+
+/* X != C1 ? ~X : C2 simplifies to ~X when ~C1 == C2.  */
+(simplify
+ (cond (ne @0 INTEGER_CST@1) (bit_not@3 @0) INTEGER_CST@2)
+ (if (wi::eq_p (wi::bit_not (wi::to_wide (@1)), wi::to_wide (@2)))
+  @3))
+
+/* (X + 1) > Y ? -X : 1 simplifies to X >= Y ? -X : 1 when
+   X is unsigned, as when X + 1 overflows, X is -1, so -X == 1.  */
+(simplify
+ (cond (gt (plus @0 integer_onep) @1) (negate @0) integer_onep@2)
+ (if (TYPE_UNSIGNED (type))
+  (cond (ge @0 @1) (negate @0) @2)))
+
 (for cnd (cond vec_cond)
  /* A ? B : (A ? X : C) -> A ? B : C.  */
  (simplify
diff --git a/gcc/testsuite/gcc.dg/fold-condneg-1.c b/gcc/testsuite/gcc.dg/fold-condneg-1.c
new file mode 100644
index 00000000000..c4edd7487e8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-condneg-1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_i0(int x)
+{
+  return x != 0 ? -x : 0;
+}
+
+int test_i1(int x)
+{
+  return x != 1 ? -x : -1;
+}
+
+int test_im1(int x)
+{
+  return x != -1 ? -x : 1;
+}
+
+unsigned int test_u0(unsigned int x)
+{
+  return x != 0 ? -x : 0;
+}
+
+unsigned int test_u1(unsigned int x)
+{
+  return x != 1 ? -x : ~0u;
+}
+
+unsigned int test_um1(unsigned int x)
+{
+  return x != ~0u ? -x : 1;
+}
+
+unsigned char test_uc0(unsigned char x)
+{
+  return x != 0 ? -x : 0;
+}
+
+unsigned char test_uc1(unsigned char x)
+{
+  return x != 1 ? -x : 255;
+}
+
+unsigned char test_uc127(unsigned char x)
+{
+  return x != 127 ? -x : 129;
+}
+
+unsigned char test_uc128(unsigned char x)
+{
+  return x != 128 ? -x : 128;
+}
+
+unsigned char test_uc255(unsigned char x)
+{
+  return x != 255 ? -x : 1;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-condneg-2.c b/gcc/testsuite/gcc.dg/fold-condneg-2.c
new file mode 100644
index 00000000000..1af24636ec7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-condneg-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftrapv -fdump-tree-optimized" } */
+
+#define INT_MIN  (-__INT_MAX__ - 1)
+
+int test(int x)
+{
+  return x != INT_MIN ? -x : INT_MIN;
+}
+
+/* { dg-final { scan-tree-dump "goto" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-condnot-1.c b/gcc/testsuite/gcc.dg/fold-condnot-1.c
new file mode 100644
index 00000000000..75d558c2686
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-condnot-1.c
@@ -0,0 +1,84 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int test_i0(int x)
+{
+  return x != 0 ? ~x : ~0;
+}
+
+int test_i1(int x)
+{
+  return x != 1 ? ~x : -2;
+}
+
+int test_im1(int x)
+{
+  return x != ~0 ? ~x : 0;
+}
+
+unsigned int test_u0(unsigned int x)
+{
+  return x != 0 ? ~x : ~0;
+}
+
+unsigned int test_u1(unsigned int x)
+{
+  return x != 1 ? ~x : ~1u;
+}
+
+unsigned int test_um1(unsigned int x)
+{
+  return x != ~0u ? ~x : 0;
+}
+
+signed char test_c0(signed char x)
+{
+  return x != 0 ? ~x : -1;
+}
+
+signed char test_c1(signed char x)
+{
+  return x != 1 ? ~x : -2;
+}
+
+signed char test_cm1(signed char x)
+{
+  return x != -1 ? ~x : 0;
+}
+
+signed char test_cm128(signed char x)
+{
+  return x != -128 ? ~x : 127;
+}
+
+signed char test_c127(signed char x)
+{
+  return x != 127 ? ~x : -128;
+}
+
+unsigned char test_uc0(unsigned char x)
+{
+  return x != 0 ? ~x : 255;
+}
+
+unsigned char test_uc1(unsigned char x)
+{
+  return x != 1 ? ~x : 254;
+}
+
+unsigned char test_uc127(unsigned char x)
+{
+  return x != 127 ? ~x : 128;
+}
+
+unsigned char test_uc128(unsigned char x)
+{
+  return x != 128 ? ~x : 127;
+}
+
+unsigned char test_ucm1(unsigned char x)
+{
+  return x != 255 ? ~x : 0;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr101145-1.c b/gcc/testsuite/gcc.dg/pr101145-1.c
new file mode 100644
index 00000000000..e6f7923d78a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101145-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned foo(unsigned val, unsigned start)
+{
+  unsigned cnt = 0;
+  for (unsigned i = start; i > val; ++i)
+    cnt++;
+  return cnt;
+}
+
+/* { dg-final { scan-tree-dump "cnt_\[0-9\] = -start_\[0-9\]\\(D\\);" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/pr101145-2.c b/gcc/testsuite/gcc.dg/pr101145-2.c
new file mode 100644
index 00000000000..6ecfeb2c0d5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101145-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned foo(unsigned val, unsigned start)
+{
+  unsigned cnt = 0;
+  unsigned i = start;
+  do {
+    cnt++;
+    i++;
+  } while (i > val);
+  return cnt;
+}
+
+/* { dg-final { scan-tree-dump "cnt_\[0-9\] = start_\[0-9\]\\(D\\) >= val_\[0-9\]\\(D\\) \\? _\[0-9\] : 1;" "optimized" } } */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 75109407124..06954e437f5 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1478,7 +1478,7 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
    The number of iterations is stored to NITER.  */
 
 static bool
-number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
+number_of_iterations_until_wrap (class loop *loop, tree type, affine_iv *iv0,
 				 affine_iv *iv1, class tree_niter_desc *niter)
 {
   tree niter_type = unsigned_type_for (type);
@@ -1506,6 +1506,23 @@ number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
 
       num = fold_build2 (MINUS_EXPR, niter_type, wide_int_to_tree (type, max),
 			 iv1->base);
+
+      /* When base has the form iv + 1, if we know iv >= n, then iv + 1 < n
+	 only when iv + 1 overflows, i.e. when iv == TYPE_VALUE_MAX.  */
+      if (sgn == UNSIGNED
+	  && integer_onep (step)
+	  && TREE_CODE (iv1->base) == PLUS_EXPR
+	  && integer_onep (TREE_OPERAND (iv1->base, 1)))
+	{
+	  tree cond = fold_build2 (GE_EXPR, boolean_type_node,
+				   TREE_OPERAND (iv1->base, 0), iv0->base);
+	  cond = simplify_using_initial_conditions (loop, cond);
+	  if (integer_onep (cond))
+	    may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node,
+				       TREE_OPERAND (iv1->base, 0),
+				       TYPE_MAX_VALUE (type));
+	}
+
       high = max;
       if (TREE_CODE (iv1->base) == INTEGER_CST)
 	low = wi::to_wide (iv1->base) - 1;


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-12-01 19:59 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-01 19:59 [gcc r12-5699] Final value replacement improvements for until-wrap loops Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).