public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR tree-optimization/71343: Optimize (X<<C)&(Y<<C) as (X&Y)<<C.
@ 2022-08-08  8:07 Roger Sayle
  2022-08-08 11:42 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: Roger Sayle @ 2022-08-08  8:07 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2145 bytes --]


This patch resolves PR tree-optimization/71343, a missed-optimization
enhancement request where GCC fails to see that (a<<2)+(b<<2) == a*4+b*4.
This requires two related (sets of) optimizations to be added to match.pd.

The first is that (X<<C) op (Y<<C) can be simplified to (X op Y) << C,
for many binary operators, including AND, IOR, XOR, and (if overflow
isn't an issue) PLUS and MINUS.  Likewise, the right shifts (both logical
and arithmetic) and bit-wise logical operators can be simplified in a
similar fashion.  These all reduce the number of GIMPLE binary operations
from 3 to 2, by combining/eliminating a shift operation.

The second optimization reflects that the middle-end doesn't impose a
canonical form on multiplications by powers of two, vs. left shifts,
instead leaving these operations as specified by the programmer
unless there's a good reason to change them.  Hence, GIMPLE code may
contain the expressions "X * 8" and "X << 3" even though these represent
the same value/computation.  The tweak to match.pd is that comparison
operations whose operands are equivalent non-canonical expressions can
be taught their equivalence.  Hence "(X * 8) == (X << 3)" will always
evaluate to true, and "(X<<2) > 4*X" will always evaluate to false.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-08-08  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
        PR tree-optimization/71343
        * match.pd (op (lshift @0 @1) (lshift @2 @1)): Optimize the
        expression (X<<C) + (Y<<C) to (X+Y)<<C for multiple operators.
        (op (rshift @0 @1) (rshift @2 @1)): Likwise, simplify (X>>C)^(Y>>C)
        to (X^Y)>>C for binary logical operators, AND, IOR and XOR.
        (cmp:c (lshift @0) (mult @1)): Optimize comparisons between
        shifts by integer constants and multiplications by powers of 2.

gcc/testsuite/ChangeLog
        PR tree-optimization/71343
        * gcc.dg/pr71343-1.c: New test case.
        * gcc.dg/pr71343-2.c: Likewise.


Thanks in advance,
Roger
--


[-- Attachment #2: patchfd.txt --]
[-- Type: text/plain, Size: 4890 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index f82f94a..d6b9cfb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -982,6 +982,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && tree_nop_conversion_p (type, TREE_TYPE (@1)))
    (lshift @0 @2)))
 
+/* Shifts by constants distribute over several binary operations,
+   hence (X << C) + (Y << C) can be simplified to (X + Y) << C.  */
+(for op (plus minus)
+  (simplify
+    (op (lshift:s @0 INTEGER_CST@1) (lshift:s @2 INTEGER_CST@1))
+    (if (INTEGRAL_TYPE_P (type)
+	 && TYPE_OVERFLOW_WRAPS (type)
+	 && !TYPE_SATURATING (type)
+	 && tree_fits_shwi_p (@1)
+	 && tree_to_shwi (@1) > 0
+	 && tree_to_shwi (@1) < TYPE_PRECISION (type))
+      (lshift (op @0 @2) @1))))
+
+(for op (bit_and bit_ior bit_xor)
+  (simplify
+    (op (lshift:s @0 INTEGER_CST@1) (lshift:s @2 INTEGER_CST@1))
+    (if (INTEGRAL_TYPE_P (type)
+	 && tree_fits_shwi_p (@1)
+	 && tree_to_shwi (@1) > 0
+	 && tree_to_shwi (@1) < TYPE_PRECISION (type))
+      (lshift (op @0 @2) @1)))
+  (simplify
+    (op (rshift:s @0 INTEGER_CST@1) (rshift:s @2 INTEGER_CST@1))
+    (if (INTEGRAL_TYPE_P (type)
+	 && tree_fits_shwi_p (@1)
+	 && tree_to_shwi (@1) > 0
+	 && tree_to_shwi (@1) < TYPE_PRECISION (type))
+      (rshift (op @0 @2) @1))))
+
 /* Fold (1 << (C - x)) where C = precision(type) - 1
    into ((1 << C) >> x). */
 (simplify
@@ -2241,6 +2270,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (TREE_INT_CST_LOW (@1) & 1)
    { constant_boolean_node (cmp == NE_EXPR, type); })))
 
+/* Some tree expressions are intentionally non-canonical.
+   We handle the comparison of the equivalent forms here.  */
+(for cmp (eq le ge)
+  (simplify
+    (cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	 && tree_fits_shwi_p (@1)
+	 && tree_to_shwi (@1) > 0
+	 && tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+	 && wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+      { constant_boolean_node (true, type); })))
+
+(for cmp (ne lt gt)
+  (simplify
+    (cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+	 && tree_fits_shwi_p (@1)
+	 && tree_to_shwi (@1) > 0
+	 && tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+	 && wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+      { constant_boolean_node (false, type); })))
+
 /* Arguments on which one can call get_nonzero_bits to get the bits
    possibly set.  */
 (match with_possible_nonzero_bits
diff --git a/gcc/testsuite/gcc.dg/pr71343-1.c b/gcc/testsuite/gcc.dg/pr71343-1.c
new file mode 100644
index 0000000..146f5fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71343-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo_plus(unsigned int a, unsigned int b)
+{
+  return (a << 2) + (b << 2);
+}
+
+unsigned int foo_and(unsigned int a, unsigned int b)
+{
+  return (a << 2) & (b << 2);
+}
+
+unsigned int foo_ior(unsigned int a, unsigned int b)
+{
+  return (a << 2) | (b << 2);
+}
+
+unsigned int foo_xor(unsigned int a, unsigned int b)
+{
+  return (a << 2) ^ (b << 2);
+}
+
+unsigned int bar_and(unsigned int a, unsigned int b)
+{
+  return (a >> 2) & (b >> 2);
+}
+
+unsigned int bar_ior(unsigned int a, unsigned int b)
+{
+  return (a >> 2) | (b >> 2);
+}
+
+unsigned int bar_xor(unsigned int a, unsigned int b)
+{
+  return (a >> 2) ^ (b >> 2);
+}
+
+int baz_and(int a, int b)
+{
+  return (a >> 2) & (b >> 2);
+}
+
+int baz_ior(int a, int b)
+{
+  return (a >> 2) | (b >> 2);
+}
+
+int baz_xor(int a, int b)
+{
+  return (a >> 2) ^ (b >> 2);
+}
+
+/* { dg-final { scan-tree-dump-times " << " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " >> " 6 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/pr71343-2.c b/gcc/testsuite/gcc.dg/pr71343-2.c
new file mode 100644
index 0000000..11800a9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71343-2.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int test1(unsigned int a , unsigned int b)
+{
+  return (a << 2) + (b << 2) == a * 4 + b * 4;
+}
+
+unsigned int test2(unsigned int a , unsigned int b)
+{
+  return (a << 2) + (b << 2) == (a + b) << 2;
+}
+
+unsigned int test3(unsigned int a , unsigned int b)
+{
+  return a * 4 + b * 4 == (a + b) * 4;
+}
+
+unsigned int test4(unsigned int a , unsigned int b)
+{
+  return (a + b) << 2 == (a + b) * 4;
+}
+
+unsigned int test5(unsigned int a , unsigned int b)
+{
+  return (a << 2) + (b << 2) ==  (a + b) * 4;
+}
+
+unsigned int test6(unsigned int a , unsigned int b)
+{
+  return (a + b) << 2 == a * 4 + b * 4;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 6 "optimized" } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-09-14  8:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-08  8:07 [PATCH] PR tree-optimization/71343: Optimize (X<<C)&(Y<<C) as (X&Y)<<C Roger Sayle
2022-08-08 11:42 ` Richard Biener
2022-08-12 21:45   ` [PATCH take #2] " Roger Sayle
2022-08-15  7:48     ` Richard Biener
2022-09-13 17:54   ` [PATCH] PR tree-optimization/71343: Value number X<<2 as X*4 Roger Sayle
2022-09-14  8:24     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).