* [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]
@ 2020-12-30 9:18 Jakub Jelinek
2020-12-31 8:39 ` Richard Biener
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2020-12-30 9:18 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Hi!
The following patch adds an optimization mentioned in PR56719 #c8.
We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
optimizations, this patch just extends that to
x < C && y < C && z < C for power of two constants C into
(x | y | z) < C (for unsigned comparisons).
I didn't want to create too many buckets (there can be TYPE_PRECISION such
constants), so the patch instead just uses one buckets for all such
constants and loops over that bucket up to TYPE_PRECISION times.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2020-12-30 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/56719
* tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
x < C && y < C && z < C when C is a power of two constant into
(x | y | z) < C.
* gcc.dg/tree-ssa/pr56719.c: New test.
--- gcc/tree-ssa-reassoc.c.jj 2020-12-01 13:19:12.859127403 +0100
+++ gcc/tree-ssa-reassoc.c 2020-12-29 12:42:23.432102952 +0100
@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
}
/* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
- and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
+ and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
+ Also, handle x < C && y < C && z < C where C is power of two as
+ (x | y | z) < C. */
static bool
optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
for (i = first; i < length; i++)
{
+ int idx;
+
if (ranges[i].exp == NULL_TREE
|| TREE_CODE (ranges[i].exp) != SSA_NAME
|| !ranges[i].in_p
|| TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
- || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
- || ranges[i].low == NULL_TREE
- || ranges[i].low != ranges[i].high)
+ || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
continue;
- bool zero_p = integer_zerop (ranges[i].low);
- if (!zero_p && !integer_all_onesp (ranges[i].low))
+ if (ranges[i].low != NULL_TREE
+ && ranges[i].high != NULL_TREE
+ && tree_int_cst_equal (ranges[i].low, ranges[i].high))
+ {
+ idx = !integer_zerop (ranges[i].low);
+ if (idx && !integer_all_onesp (ranges[i].low))
+ continue;
+ }
+ else if (ranges[i].high != NULL_TREE
+ && TREE_CODE (ranges[i].high) == INTEGER_CST)
+ {
+ wide_int w = wi::to_wide (ranges[i].high);
+ int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
+ int l = wi::clz (w);
+ idx = 2;
+ if (l <= 0
+ || l >= prec
+ || w != wi::mask (prec - l, false, prec))
+ continue;
+ if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
+ && ranges[i].low == NULL_TREE)
+ || (ranges[i].low
+ && integer_zerop (ranges[i].low))))
+ continue;
+ }
+ else
continue;
- b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
+ b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
if (buckets.length () <= b)
buckets.safe_grow_cleared (b + 1, true);
if (chains.length () <= (unsigned) i)
@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
if (i && chains[i - 1])
{
int j, k = i;
+ if ((b % 3) == 2)
+ {
+ /* When ranges[X - 1].high + 1 is a power of two,
+ we need to process the same bucket up to
+ precision - 1 times, each time split the entries
+ with the same high bound into one chain and the
+ rest into another one to be processed later. */
+ int this_prev = i;
+ int other_prev = 0;
+ for (j = chains[i - 1]; j; j = chains[j - 1])
+ {
+ if (tree_int_cst_equal (ranges[i - 1].high,
+ ranges[j - 1].high))
+ {
+ chains[this_prev - 1] = j;
+ this_prev = j;
+ }
+ else if (other_prev == 0)
+ {
+ buckets[b] = j;
+ other_prev = j;
+ }
+ else
+ {
+ chains[other_prev - 1] = j;
+ other_prev = j;
+ }
+ }
+ chains[this_prev - 1] = 0;
+ if (other_prev)
+ chains[other_prev - 1] = 0;
+ if (chains[i - 1] == 0)
+ {
+ if (other_prev)
+ b--;
+ continue;
+ }
+ }
for (j = chains[i - 1]; j; j = chains[j - 1])
{
gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t
exp = gimple_assign_lhs (g);
}
g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
- (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
- op, exp);
+ (b % 3) == 1
+ ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
gimple_seq_add_stmt_without_update (&seq, g);
op = gimple_assign_lhs (g);
}
@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t
if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
candidates.length (), opcode, ops, op,
seq, true, ranges[k - 1].low,
- ranges[k - 1].low, strict_overflow_p))
+ ranges[k - 1].high, strict_overflow_p))
any_changes = true;
else
gimple_seq_discard (seq);
+ if ((b % 3) == 2 && buckets[b] != i)
+ /* There is more work to do for this bucket. */
+ b--;
}
return any_changes;
--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj 2020-12-29 12:58:31.662269428 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c 2020-12-29 12:57:47.367768181 +0100
@@ -0,0 +1,33 @@
+/* PR tree-optimization/56719 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
+
+int
+f1 (int x, int y)
+{
+ return x > 0x3ffU || y > 0x3ffU;
+}
+
+int
+f2 (int x, int y, int z, unsigned w)
+{
+ return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
+}
+
+int
+f3 (int x, int y)
+{
+ return x <= 0x3ffU && y <= 0x3ffU;
+}
+
+int
+f4 (int x, int y, unsigned z, unsigned w)
+{
+ return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
+}
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719]
2020-12-30 9:18 [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719] Jakub Jelinek
@ 2020-12-31 8:39 ` Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2020-12-31 8:39 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On December 30, 2020 10:18:52 AM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>The following patch adds an optimization mentioned in PR56719 #c8.
>We already have the x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>and x != -1 && y != -1 && y != -1 into (x & y & z) != -1
>optimizations, this patch just extends that to
>x < C && y < C && z < C for power of two constants C into
>(x | y | z) < C (for unsigned comparisons).
>
>I didn't want to create too many buckets (there can be TYPE_PRECISION
>such
>constants), so the patch instead just uses one buckets for all such
>constants and loops over that bucket up to TYPE_PRECISION times.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Richard.
>2020-12-30 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/56719
> * tree-ssa-reassoc.c (optimize_range_tests_cmp_bitwise): Also optimize
> x < C && y < C && z < C when C is a power of two constant into
> (x | y | z) < C.
>
> * gcc.dg/tree-ssa/pr56719.c: New test.
>
>--- gcc/tree-ssa-reassoc.c.jj 2020-12-01 13:19:12.859127403 +0100
>+++ gcc/tree-ssa-reassoc.c 2020-12-29 12:42:23.432102952 +0100
>@@ -3317,7 +3317,9 @@ optimize_range_tests_to_bit_test (enum t
> }
>
> /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
>- and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
> */
>+ and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
>+ Also, handle x < C && y < C && z < C where C is power of two as
>+ (x | y | z) < C. */
>
> static bool
>optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int
>length,
>@@ -3333,20 +3335,44 @@ optimize_range_tests_cmp_bitwise (enum t
>
> for (i = first; i < length; i++)
> {
>+ int idx;
>+
> if (ranges[i].exp == NULL_TREE
> || TREE_CODE (ranges[i].exp) != SSA_NAME
> || !ranges[i].in_p
> || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
>- || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE
>- || ranges[i].low == NULL_TREE
>- || ranges[i].low != ranges[i].high)
>+ || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
> continue;
>
>- bool zero_p = integer_zerop (ranges[i].low);
>- if (!zero_p && !integer_all_onesp (ranges[i].low))
>+ if (ranges[i].low != NULL_TREE
>+ && ranges[i].high != NULL_TREE
>+ && tree_int_cst_equal (ranges[i].low, ranges[i].high))
>+ {
>+ idx = !integer_zerop (ranges[i].low);
>+ if (idx && !integer_all_onesp (ranges[i].low))
>+ continue;
>+ }
>+ else if (ranges[i].high != NULL_TREE
>+ && TREE_CODE (ranges[i].high) == INTEGER_CST)
>+ {
>+ wide_int w = wi::to_wide (ranges[i].high);
>+ int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
>+ int l = wi::clz (w);
>+ idx = 2;
>+ if (l <= 0
>+ || l >= prec
>+ || w != wi::mask (prec - l, false, prec))
>+ continue;
>+ if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
>+ && ranges[i].low == NULL_TREE)
>+ || (ranges[i].low
>+ && integer_zerop (ranges[i].low))))
>+ continue;
>+ }
>+ else
> continue;
>
>- b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 2 + !zero_p;
>+ b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 3 + idx;
> if (buckets.length () <= b)
> buckets.safe_grow_cleared (b + 1, true);
> if (chains.length () <= (unsigned) i)
>@@ -3359,6 +3385,44 @@ optimize_range_tests_cmp_bitwise (enum t
> if (i && chains[i - 1])
> {
> int j, k = i;
>+ if ((b % 3) == 2)
>+ {
>+ /* When ranges[X - 1].high + 1 is a power of two,
>+ we need to process the same bucket up to
>+ precision - 1 times, each time split the entries
>+ with the same high bound into one chain and the
>+ rest into another one to be processed later. */
>+ int this_prev = i;
>+ int other_prev = 0;
>+ for (j = chains[i - 1]; j; j = chains[j - 1])
>+ {
>+ if (tree_int_cst_equal (ranges[i - 1].high,
>+ ranges[j - 1].high))
>+ {
>+ chains[this_prev - 1] = j;
>+ this_prev = j;
>+ }
>+ else if (other_prev == 0)
>+ {
>+ buckets[b] = j;
>+ other_prev = j;
>+ }
>+ else
>+ {
>+ chains[other_prev - 1] = j;
>+ other_prev = j;
>+ }
>+ }
>+ chains[this_prev - 1] = 0;
>+ if (other_prev)
>+ chains[other_prev - 1] = 0;
>+ if (chains[i - 1] == 0)
>+ {
>+ if (other_prev)
>+ b--;
>+ continue;
>+ }
>+ }
> for (j = chains[i - 1]; j; j = chains[j - 1])
> {
> gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
>@@ -3426,8 +3490,8 @@ optimize_range_tests_cmp_bitwise (enum t
> exp = gimple_assign_lhs (g);
> }
> g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
>- (b & 1) ? BIT_AND_EXPR : BIT_IOR_EXPR,
>- op, exp);
>+ (b % 3) == 1
>+ ? BIT_AND_EXPR : BIT_IOR_EXPR, op, exp);
> gimple_seq_add_stmt_without_update (&seq, g);
> op = gimple_assign_lhs (g);
> }
>@@ -3435,10 +3499,13 @@ optimize_range_tests_cmp_bitwise (enum t
> if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
> candidates.length (), opcode, ops, op,
> seq, true, ranges[k - 1].low,
>- ranges[k - 1].low, strict_overflow_p))
>+ ranges[k - 1].high, strict_overflow_p))
> any_changes = true;
> else
> gimple_seq_discard (seq);
>+ if ((b % 3) == 2 && buckets[b] != i)
>+ /* There is more work to do for this bucket. */
>+ b--;
> }
>
> return any_changes;
>--- gcc/testsuite/gcc.dg/tree-ssa/pr56719.c.jj 2020-12-29
>12:58:31.662269428 +0100
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr56719.c 2020-12-29
>12:57:47.367768181 +0100
>@@ -0,0 +1,33 @@
>+/* PR tree-optimization/56719 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fdump-tree-optimized" } */
>+/* { dg-final { scan-tree-dump-times " > 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 2047;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " > 8191;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 1023;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 4095;" 1 "optimized" } } */
>+/* { dg-final { scan-tree-dump-times " <= 8191;" 1 "optimized" } } */
>+
>+int
>+f1 (int x, int y)
>+{
>+ return x > 0x3ffU || y > 0x3ffU;
>+}
>+
>+int
>+f2 (int x, int y, int z, unsigned w)
>+{
>+ return x > 0x1fffU || z > 0x7ffU || w > 0x7ffU || y > 0x1fffU;
>+}
>+
>+int
>+f3 (int x, int y)
>+{
>+ return x <= 0x3ffU && y <= 0x3ffU;
>+}
>+
>+int
>+f4 (int x, int y, unsigned z, unsigned w)
>+{
>+ return x <= 0x1fffU && z <= 0xfff && w <= 0xfff && y <= 0x1fffU;
>+}
>
> Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-12-31 8:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-30 9:18 [PATCH] reassoc: Optimize x > 0x1fff || y > 0x1fff into (x | y) > 0x1fff [PR56719] Jakub Jelinek
2020-12-31 8:39 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).