public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-793] Canonicalize X&-Y as X*Y in match.pd when Y is [0,1].
@ 2022-05-27  8:02 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2022-05-27  8:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8fb94fc6097c0a934aac0d89c9c5e2038da67655

commit r13-793-g8fb94fc6097c0a934aac0d89c9c5e2038da67655
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Fri May 27 08:57:46 2022 +0100

    Canonicalize X&-Y as X*Y in match.pd when Y is [0,1].
    
    "For every pessimization, there's an equal and opposite optimization".
    
    In the review of my original patch for PR middle-end/98865, Richard
    Biener pointed out that match.pd shouldn't be transforming X*Y into
    X&-Y as the former is considered cheaper by tree-ssa's cost model
    (operator count).  A corollary of this is that we should instead be
    transforming X&-Y into the cheaper X*Y as a preferred canonical form
    (especially as RTL expansion now intelligently selects the appropriate
    implementation based on the target's costs).
    
    With this patch we now generate identical code for:
    int foo(int x, int y) { return -(x&1) & y; }
    int bar(int x, int y) { return (x&1) * y; }
    
    specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2,
    but both use and/mul with -Os.
    
    One minor wrinkle/improvement is that this patch includes three
    additional optimizations (that account for the change in canonical
    form) to continue to optimize PR92834 and PR94786.
    
    2022-05-27  Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            * match.pd (match_zero_one_valued_p): New predicate.
            (mult @0 @1): Use zero_one_valued_p for optimization to the
            expression "bit_and @0 @1".
            (bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR.
            (plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform.
            (minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise.
            (bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise.
            Remove three redundant transforms obsoleted by the three above.
    
    gcc/testsuite/ChangeLog
            * gcc.dg/pr98865.c: New test case.

Diff:
---
 gcc/match.pd                   | 86 ++++++++++++++++++++----------------------
 gcc/testsuite/gcc.dg/pr98865.c | 14 +++++++
 2 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 183a0d4123f..88c6c414881 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
            || !COMPLEX_FLOAT_TYPE_P (type)))
    (negate @0)))
 
-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */
-(simplify
- (mult SSA_NAME@1 SSA_NAME@2)
-  (if (INTEGRAL_TYPE_P (type)
-       && get_nonzero_bits (@1) == 1
-       && get_nonzero_bits (@2) == 1)
-   (bit_and @1 @2)))
-
 /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
    unless the target has native support for the former but not the latter.  */
 (simplify
@@ -1787,6 +1779,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (bit_not (bit_not @0))
   @0)
 
+(match zero_one_valued_p
+ @0
+ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1)))
+(match zero_one_valued_p
+ truth_valued_p@0)
+
+/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }.  */
+(simplify
+ (mult zero_one_valued_p@0 zero_one_valued_p@1)
+ (if (INTEGRAL_TYPE_P (type))
+  (bit_and @0 @1)))
+
+/* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
+(simplify
+ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
+ (if (INTEGRAL_TYPE_P (type)
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
+      && !TYPE_UNSIGNED (TREE_TYPE (@0)))
+  (mult (convert @0) @1)))
+
 /* Convert ~ (-A) to A - 1.  */
 (simplify
  (bit_not (convert? (negate @0)))
@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cmp @0 (minmax:c @0 @1))
   { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
 
-/* Undo fancy way of writing max/min or other ?: expressions,
-   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
+/* Undo fancy ways of writing max/min or other ?: expressions, like
+   a - ((a - b) & -(a < b))  and  a - (a - b) * (a < b) into (a < b) ? b : a.
    People normally use ?: and that is what we actually try to optimize.  */
-(for cmp (simple_comparison)
- (simplify
-  (minus @0 (bit_and:c (minus @0 @1)
-		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
-  (if (INTEGRAL_TYPE_P (type)
-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
-   (cond (cmp @2 @3) @1 @0)))
- (simplify
-  (plus:c @0 (bit_and:c (minus @1 @0)
-			(convert? (negate@4 (convert? (cmp@5 @2 @3))))))
-  (if (INTEGRAL_TYPE_P (type)
-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
-   (cond (cmp @2 @3) @1 @0)))
- /* Similarly with ^ instead of - though in that case with :c.  */
- (simplify
-  (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1)
-			   (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
-  (if (INTEGRAL_TYPE_P (type)
-       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
-       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
-       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
-       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
-	   || !TYPE_UNSIGNED (TREE_TYPE (@4)))
-       && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
-   (cond (cmp @2 @3) @1 @0))))
+/* Transform A + (B-A)*cmp into cmp ? B : A.  */
+(simplify
+ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2))
+ (if (INTEGRAL_TYPE_P (type)
+      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+  (cond (convert:boolean_type_node @2) @1 @0)))
+/* Transform A - (A-B)*cmp into cmp ? B : A.  */
+(simplify
+ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2))
+ (if (INTEGRAL_TYPE_P (type)
+      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+  (cond (convert:boolean_type_node @2) @1 @0)))
+/* Transform A ^ (A^B)*cmp into cmp ? B : A.  */
+(simplify
+ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2))
+ (if (INTEGRAL_TYPE_P (type)
+      && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
+  (cond (convert:boolean_type_node @2) @1 @0)))
 
 /* Simplifications of shift and rotates.  */
 
diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c
new file mode 100644
index 00000000000..95f72703390
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr98865.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int foo(int x, int y)
+{
+  return -(x&1) & y;
+}
+
+int bar(int x, int y)
+{
+  return (x&1) * y;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-27  8:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-27  8:02 [gcc r13-793] Canonicalize X&-Y as X*Y in match.pd when Y is [0,1] Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).