public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]
@ 2020-12-30  9:18 Jakub Jelinek
  2020-12-31  8:39 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2020-12-30  9:18 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

The following patch adds an optimization mentioned in PR56719 #c8.
We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
optimizations, this patch just extends that to
x < C && y < C && z < C for power of two constants C into
(x | y | z) < C (for unsigned comparisons).

I didn't want to create too many buckets (there can be TYPE_PRECISION such
constants), so the patch instead just uses one buckets for all such
constants and loops over that bucket up to TYPE_PRECISION times.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-12-30  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/56719
	* tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
	x < C && y < C && z < C when C is a power of two constant into
	(x | y | z) < C.

	* gcc.dg/tree-ssa/pr56719.c: New test.

--- gcc/tree-ssa-reassoc.c.jj	2020-12-01 13:19:12.859127403 +0100
+++ gcc/tree-ssa-reassoc.c	2020-12-29 12:42:23.432102952 +0100
@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
 }
 
 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
-   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.  */
+   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
+   Also, handle x < C && y < C && z < C where C is power of two as
+   (x | y | z) < C.  */
 
 static bool
 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
 
   for (i = first; i < length; i++)
     {
+      int idx;
+
       if (ranges[i].exp == NULL_TREE
 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
 	  || !ranges[i].in_p
 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
-	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
-	  || ranges[i].low == NULL_TREE
-	  || ranges[i].low != ranges[i].high)
+	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
 	continue;
 
-      bool zero_p = integer_zerop (ranges[i].low);
-      if (!zero_p && !integer_all_onesp (ranges[i].low))
+      if (ranges[i].low != NULL_TREE
+	  && ranges[i].high != NULL_TREE
+	  && tree_int_cst_equal (ranges[i].low, ranges[i].high))
+	{
+	  idx = !integer_zerop (ranges[i].low);
+	  if (idx && !integer_all_onesp (ranges[i].low))
+	    continue;
+	}
+      else if (ranges[i].high != NULL_TREE
+	       && TREE_CODE (ranges[i].high) == INTEGER_CST)
+	{
+	  wide_int w = wi::to_wide (ranges[i].high);
+	  int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
+	  int l = wi::clz (w);
+	  idx = 2;
+	  if (l <= 0
+	      || l >= prec
+	      || w != wi::mask (prec - l, false, prec))
+	    continue;
+	  if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
+		 && ranges[i].low == NULL_TREE)
+		|| (ranges[i].low
+		    && integer_zerop (ranges[i].low))))
+	    continue;
+	}
+      else
 	continue;
 
-      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
+      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
       if (buckets.length () <= b)
 	buckets.safe_grow_cleared (b + 1, true);
       if (chains.length () <= (unsigned) i)
@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
     if (i && chains[i - 1])
       {
 	int j, k = i;
+	if ((b % 3) == 2)
+	  {
+	    /* When ranges[X - 1].high + 1 is a power of two,
+	       we need to process the same bucket up to
+	       precision - 1 times, each time split the entries
+	       with the same high bound into one chain and the
+	       rest into another one to be processed later.  */
+	    int this_prev = i;
+	    int other_prev = 0;
+	    for (j = chains[i - 1]; j; j = chains[j - 1])
+	      {
+		if (tree_int_cst_equal (ranges[i - 1].high,
+					ranges[j - 1].high))
+		  {
+		    chains[this_prev - 1] = j;
+		    this_prev = j;
+		  }
+		else if (other_prev == 0)
+		  {
+		    buckets[b] = j;
+		    other_prev = j;
+		  }
+		else
+		  {
+		    chains[other_prev - 1] = j;
+		    other_prev = j;
+		  }
+	      }
+	    chains[this_prev - 1] = 0;
+	    if (other_prev)
+	      chains[other_prev - 1] = 0;
+	    if (chains[i - 1] == 0)
+	      {
+		if (other_prev)
+		  b--;
+		continue;
+	      }
+	  }
 	for (j = chains[i - 1]; j; j = chains[j - 1])
 	  {
 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t
 		exp = gimple_assign_lhs (g);
 	      }
 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
-				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
-				     op, exp);
+				     (b % 3) == 1
+				     ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
 	    gimple_seq_add_stmt_without_update (&seq, g);
 	    op = gimple_assign_lhs (g);
 	  }
@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t
 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
 			       candidates.length (), opcode, ops, op,
 			       seq, true, ranges[k - 1].low,
-			       ranges[k - 1].low, strict_overflow_p))
+			       ranges[k - 1].high, strict_overflow_p))
 	  any_changes = true;
 	else
 	  gimple_seq_discard (seq);
+	if ((b % 3) == 2 && buckets[b] != i)
+	  /* There is more work to do for this bucket.  */
+	  b--;
       }
 
   return any_changes;
--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj	2020-12-29 12:58:31.662269428 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c	2020-12-29 12:57:47.367768181 +0100
@@ -0,0 +1,33 @@
+/* PR tree-optimization/56719 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
+
+int
+f1 (int x, int y)
+{
+  return x > 0x3ffU || y > 0x3ffU;
+}
+
+int
+f2 (int x, int y, int z, unsigned w)
+{
+  return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
+}
+
+int
+f3 (int x, int y)
+{
+  return x <= 0x3ffU && y <= 0x3ffU;
+}
+
+int
+f4 (int x, int y, unsigned z, unsigned w)
+{
+  return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
+}

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]
  2020-12-30  9:18 [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719] Jakub Jelinek
@ 2020-12-31  8:39 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2020-12-31  8:39 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On December 30, 2020 10:18:52 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>The following patch adds an optimization mentioned in PR56719 #c8.
>We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
>optimizations, this patch just extends that to
>x < C && y < C && z < C for power of two constants C into
>(x | y | z) < C (for unsigned comparisons).
>
>I didn't want to create too many buckets (there can be TYPE_PRECISION
>such
>constants), so the patch instead just uses one buckets for all such
>constants and loops over that bucket up to TYPE_PRECISION times.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Richard. 

>2020-12-30  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/56719
>	* tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
>	x < C && y < C && z < C when C is a power of two constant into
>	(x | y | z) < C.
>
>	* gcc.dg/tree-ssa/pr56719.c: New test.
>
>--- gcc/tree-ssa-reassoc.c.jj	2020-12-01 13:19:12.859127403 +0100
>+++ gcc/tree-ssa-reassoc.c	2020-12-29 12:42:23.432102952 +0100
>@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
> }
> 
> /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>-   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
> */
>+   and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
>+   Also, handle x < C && y < C && z < C where C is power of two as
>+   (x | y | z) < C.  */
> 
> static bool
>optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int
>length,
>@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
> 
>   for (i = first; i < length; i++)
>     {
>+      int idx;
>+
>       if (ranges[i].exp == NULL_TREE
> 	  || TREE_CODE (ranges[i].exp) != SSA_NAME
> 	  || !ranges[i].in_p
> 	  || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
>-	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
>-	  || ranges[i].low == NULL_TREE
>-	  || ranges[i].low != ranges[i].high)
>+	  || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
> 	continue;
> 
>-      bool zero_p = integer_zerop (ranges[i].low);
>-      if (!zero_p && !integer_all_onesp (ranges[i].low))
>+      if (ranges[i].low != NULL_TREE
>+	  && ranges[i].high != NULL_TREE
>+	  && tree_int_cst_equal (ranges[i].low, ranges[i].high))
>+	{
>+	  idx = !integer_zerop (ranges[i].low);
>+	  if (idx && !integer_all_onesp (ranges[i].low))
>+	    continue;
>+	}
>+      else if (ranges[i].high != NULL_TREE
>+	       && TREE_CODE (ranges[i].high) == INTEGER_CST)
>+	{
>+	  wide_int w = wi::to_wide (ranges[i].high);
>+	  int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
>+	  int l = wi::clz (w);
>+	  idx = 2;
>+	  if (l <= 0
>+	      || l >= prec
>+	      || w != wi::mask (prec - l, false, prec))
>+	    continue;
>+	  if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
>+		 && ranges[i].low == NULL_TREE)
>+		|| (ranges[i].low
>+		    && integer_zerop (ranges[i].low))))
>+	    continue;
>+	}
>+      else
> 	continue;
> 
>-      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
>+      b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
>       if (buckets.length () <= b)
> 	buckets.safe_grow_cleared (b + 1, true);
>       if (chains.length () <= (unsigned) i)
>@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
>     if (i && chains[i - 1])
>       {
> 	int j, k = i;
>+	if ((b % 3) == 2)
>+	  {
>+	    /* When ranges[X - 1].high + 1 is a power of two,
>+	       we need to process the same bucket up to
>+	       precision - 1 times, each time split the entries
>+	       with the same high bound into one chain and the
>+	       rest into another one to be processed later.  */
>+	    int this_prev = i;
>+	    int other_prev = 0;
>+	    for (j = chains[i - 1]; j; j = chains[j - 1])
>+	      {
>+		if (tree_int_cst_equal (ranges[i - 1].high,
>+					ranges[j - 1].high))
>+		  {
>+		    chains[this_prev - 1] = j;
>+		    this_prev = j;
>+		  }
>+		else if (other_prev == 0)
>+		  {
>+		    buckets[b] = j;
>+		    other_prev = j;
>+		  }
>+		else
>+		  {
>+		    chains[other_prev - 1] = j;
>+		    other_prev = j;
>+		  }
>+	      }
>+	    chains[this_prev - 1] = 0;
>+	    if (other_prev)
>+	      chains[other_prev - 1] = 0;
>+	    if (chains[i - 1] == 0)
>+	      {
>+		if (other_prev)
>+		  b--;
>+		continue;
>+	      }
>+	  }
> 	for (j = chains[i - 1]; j; j = chains[j - 1])
> 	  {
> 	    gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
>@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t
> 		exp = gimple_assign_lhs (g);
> 	      }
> 	    g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
>-				     (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
>-				     op, exp);
>+				     (b % 3) == 1
>+				     ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
> 	    gimple_seq_add_stmt_without_update (&seq, g);
> 	    op = gimple_assign_lhs (g);
> 	  }
>@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t
> 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
> 			       candidates.length (), opcode, ops, op,
> 			       seq, true, ranges[k - 1].low,
>-			       ranges[k - 1].low, strict_overflow_p))
>+			       ranges[k - 1].high, strict_overflow_p))
> 	  any_changes = true;
> 	else
> 	  gimple_seq_discard (seq);
>+	if ((b % 3) == 2 && buckets[b] != i)
>+	  /* There is more work to do for this bucket.  */
>+	  b--;
>       }
> 
>   return any_changes;
>--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj	2020-12-29
>12:58:31.662269428 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c	2020-12-29
>12:57:47.367768181 +0100
>@@ -0,0 +1,33 @@
>+/* PR tree-optimization/56719 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
>+
>+int
>+f1 (int x, int y)
>+{
>+  return x > 0x3ffU || y > 0x3ffU;
>+}
>+
>+int
>+f2 (int x, int y, int z, unsigned w)
>+{
>+  return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
>+}
>+
>+int
>+f3 (int x, int y)
>+{
>+  return x <= 0x3ffU && y <= 0x3ffU;
>+}
>+
>+int
>+f4 (int x, int y, unsigned z, unsigned w)
>+{
>+  return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
>+}
>
>	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-12-31  8:39 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-30  9:18 [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719] Jakub Jelinek
2020-12-31  8:39 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).