public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)
@ 2019-12-06 16:41 Jakub Jelinek
  2019-12-09  9:08 ` Richard Biener
  2019-12-09  9:54 ` Marc Glisse
  0 siblings, 2 replies; 4+ messages in thread
From: Jakub Jelinek @ 2019-12-06 16:41 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

The following patch canonicalizes fancy ways of writing cond ? A : B
into COND_EXPR, which is what we expect people writing and thus are able to
optimize it better.  If in some case we wouldn't optimize it better,
the right way would be improve the COND_EXPR handling, as that is what
people are using in the wild most of the time.
E.g. on the testcase in the patch on x86_64-linux with -O2, the difference
is that test used to be 519 bytes long and now is 223, with -O2
-march=skylake used to be the same 519 bytes long and now is 275 bytes (in
that case it uses the SSE4.1+ min/max).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-12-06  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/92834
	* match.pd (A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A,
	A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): New simplifications.

	* gcc.dg/tree-ssa/pr92834.c: New test.

--- gcc/match.pd.jj	2019-12-06 14:07:26.877749065 +0100
+++ gcc/match.pd	2019-12-06 15:06:08.042953309 +0100
@@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
   (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
   (comb (cmp @0 @2) (cmp @1 @2))))
 
+/* Undo fancy way of writing max/min or other ?: expressions,
+   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
+   People normally use ?: and that is what we actually try to optimize.  */
+(for cmp (simple_comparison)
+ (simplify
+  (minus @0 (bit_and:c (minus @0 @1)
+		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type)
+       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+   (cond (cmp @2 @3) @1 @0)))
+ (simplify
+  (plus:c @0 (bit_and:c (minus @1 @0)
+			(convert? (negate@4 (convert? (cmp@5 @2 @3))))))
+  (if (INTEGRAL_TYPE_P (type)
+       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
+       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
+       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
+       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
+	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
+   (cond (cmp @2 @3) @1 @0))))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
--- gcc/testsuite/gcc.dg/tree-ssa/pr92834.c.jj	2019-12-06 15:24:56.817353747 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr92834.c	2019-12-06 15:24:08.921100518 +0100
@@ -0,0 +1,122 @@
+/* PR tree-optimization/92834 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "MIN_EXPR <" 8 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MAX_EXPR <" 8 "optimized" } } */
+
+static inline unsigned
+umax1 (unsigned a, unsigned b)
+{
+  return a - ((a - b) & -(a < b));
+}
+
+static inline unsigned
+umin1 (unsigned a, unsigned b)
+{
+  return a - ((a - b) & -(a > b));
+}
+
+static inline int
+smax1 (int a, int b)
+{
+  return a - ((a - b) & -(a < b));
+}
+
+static inline int
+smin1 (int a, int b)
+{
+  return a - ((a - b) & -(a > b));
+}
+
+static inline unsigned long long
+umax2 (unsigned long long a, unsigned long long b)
+{
+  return a - ((a - b) & -(a <= b));
+}
+
+static inline unsigned long long
+umin2 (unsigned long long a, unsigned long long b)
+{
+  return a - ((a - b) & -(a >= b));
+}
+
+static inline long long
+smax2 (long long a, long long b)
+{
+  return a - ((a - b) & -(a <= b));
+}
+
+static inline long long
+smin2 (long long a, long long b)
+{
+  return a - ((a - b) & -(a >= b));
+}
+
+static inline unsigned
+umax3 (unsigned a, unsigned b)
+{
+  return a + ((b - a) & -(a < b));
+}
+
+static inline unsigned
+umin3 (unsigned a, unsigned b)
+{
+  return a + ((b - a) & -(a > b));
+}
+
+static inline int
+smax3 (int a, int b)
+{
+  return a + ((b - a) & -(a < b));
+}
+
+static inline int
+smin3 (int a, int b)
+{
+  return a + ((b - a) & -(a > b));
+}
+
+static inline unsigned long long
+umax4 (unsigned long long a, unsigned long long b)
+{
+  return a + ((b - a) & -(a <= b));
+}
+
+static inline unsigned long long
+umin4 (unsigned long long a, unsigned long long b)
+{
+  return a + ((b - a) & -(a >= b));
+}
+
+static inline long long
+smax4 (long long a, long long b)
+{
+  return a + ((b - a) & -(a <= b));
+}
+
+static inline long long
+smin4 (long long a, long long b)
+{
+  return a + ((b - a) & -(a >= b));
+}
+
+void
+test (unsigned *x, int *y, unsigned long long *z, long long *w)
+{
+  x[2] = umax1 (x[0], x[1]);
+  x[5] = umin1 (x[2], x[3]);
+  y[2] = smax1 (y[0], y[1]);
+  y[5] = smin1 (y[2], y[3]);
+  z[2] = umax2 (z[0], z[1]);
+  z[5] = umin2 (z[2], z[3]);
+  w[2] = smax2 (w[0], w[1]);
+  w[5] = smin2 (w[2], w[3]);
+  x[8] = umax3 (x[6], x[7]);
+  x[11] = umin3 (x[9], x[10]);
+  y[8] = smax3 (y[6], y[7]);
+  y[11] = smin3 (y[9], y[10]);
+  z[8] = umax4 (z[6], z[7]);
+  z[11] = umin4 (z[9], z[10]);
+  w[8] = smax4 (w[6], w[7]);
+  w[11] = smin4 (w[9], w[10]);
+}

	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)
  2019-12-06 16:41 [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834) Jakub Jelinek
@ 2019-12-09  9:08 ` Richard Biener
  2019-12-09  9:54 ` Marc Glisse
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Biener @ 2019-12-09  9:08 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5626 bytes --]

On Fri, 6 Dec 2019, Jakub Jelinek wrote:

> Hi!
> 
> The following patch canonicalizes fancy ways of writing cond ? A : B
> into COND_EXPR, which is what we expect people writing and thus are able to
> optimize it better.  If in some case we wouldn't optimize it better,
> the right way would be improve the COND_EXPR handling, as that is what
> people are using in the wild most of the time.
> E.g. on the testcase in the patch on x86_64-linux with -O2, the difference
> is that test used to be 519 bytes long and now is 223, with -O2
> -march=skylake used to be the same 519 bytes long and now is 275 bytes (in
> that case it uses the SSE4.1+ min/max).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2019-12-06  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/92834
> 	* match.pd (A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A,
> 	A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): New simplifications.
> 
> 	* gcc.dg/tree-ssa/pr92834.c: New test.
> 
> --- gcc/match.pd.jj	2019-12-06 14:07:26.877749065 +0100
> +++ gcc/match.pd	2019-12-06 15:06:08.042953309 +0100
> @@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
>    (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
>    (comb (cmp @0 @2) (cmp @1 @2))))
>  
> +/* Undo fancy way of writing max/min or other ?: expressions,
> +   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
> +   People normally use ?: and that is what we actually try to optimize.  */
> +(for cmp (simple_comparison)
> + (simplify
> +  (minus @0 (bit_and:c (minus @0 @1)
> +		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> +	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> +   (cond (cmp @2 @3) @1 @0)))
> + (simplify
> +  (plus:c @0 (bit_and:c (minus @1 @0)
> +			(convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> +	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> +   (cond (cmp @2 @3) @1 @0))))
> +
>  /* Simplifications of shift and rotates.  */
>  
>  (for rotate (lrotate rrotate)
> --- gcc/testsuite/gcc.dg/tree-ssa/pr92834.c.jj	2019-12-06 15:24:56.817353747 +0100
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr92834.c	2019-12-06 15:24:08.921100518 +0100
> @@ -0,0 +1,122 @@
> +/* PR tree-optimization/92834 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-times "MIN_EXPR <" 8 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "MAX_EXPR <" 8 "optimized" } } */
> +
> +static inline unsigned
> +umax1 (unsigned a, unsigned b)
> +{
> +  return a - ((a - b) & -(a < b));
> +}
> +
> +static inline unsigned
> +umin1 (unsigned a, unsigned b)
> +{
> +  return a - ((a - b) & -(a > b));
> +}
> +
> +static inline int
> +smax1 (int a, int b)
> +{
> +  return a - ((a - b) & -(a < b));
> +}
> +
> +static inline int
> +smin1 (int a, int b)
> +{
> +  return a - ((a - b) & -(a > b));
> +}
> +
> +static inline unsigned long long
> +umax2 (unsigned long long a, unsigned long long b)
> +{
> +  return a - ((a - b) & -(a <= b));
> +}
> +
> +static inline unsigned long long
> +umin2 (unsigned long long a, unsigned long long b)
> +{
> +  return a - ((a - b) & -(a >= b));
> +}
> +
> +static inline long long
> +smax2 (long long a, long long b)
> +{
> +  return a - ((a - b) & -(a <= b));
> +}
> +
> +static inline long long
> +smin2 (long long a, long long b)
> +{
> +  return a - ((a - b) & -(a >= b));
> +}
> +
> +static inline unsigned
> +umax3 (unsigned a, unsigned b)
> +{
> +  return a + ((b - a) & -(a < b));
> +}
> +
> +static inline unsigned
> +umin3 (unsigned a, unsigned b)
> +{
> +  return a + ((b - a) & -(a > b));
> +}
> +
> +static inline int
> +smax3 (int a, int b)
> +{
> +  return a + ((b - a) & -(a < b));
> +}
> +
> +static inline int
> +smin3 (int a, int b)
> +{
> +  return a + ((b - a) & -(a > b));
> +}
> +
> +static inline unsigned long long
> +umax4 (unsigned long long a, unsigned long long b)
> +{
> +  return a + ((b - a) & -(a <= b));
> +}
> +
> +static inline unsigned long long
> +umin4 (unsigned long long a, unsigned long long b)
> +{
> +  return a + ((b - a) & -(a >= b));
> +}
> +
> +static inline long long
> +smax4 (long long a, long long b)
> +{
> +  return a + ((b - a) & -(a <= b));
> +}
> +
> +static inline long long
> +smin4 (long long a, long long b)
> +{
> +  return a + ((b - a) & -(a >= b));
> +}
> +
> +void
> +test (unsigned *x, int *y, unsigned long long *z, long long *w)
> +{
> +  x[2] = umax1 (x[0], x[1]);
> +  x[5] = umin1 (x[2], x[3]);
> +  y[2] = smax1 (y[0], y[1]);
> +  y[5] = smin1 (y[2], y[3]);
> +  z[2] = umax2 (z[0], z[1]);
> +  z[5] = umin2 (z[2], z[3]);
> +  w[2] = smax2 (w[0], w[1]);
> +  w[5] = smin2 (w[2], w[3]);
> +  x[8] = umax3 (x[6], x[7]);
> +  x[11] = umin3 (x[9], x[10]);
> +  y[8] = smax3 (y[6], y[7]);
> +  y[11] = smin3 (y[9], y[10]);
> +  z[8] = umax4 (z[6], z[7]);
> +  z[11] = umin4 (z[9], z[10]);
> +  w[8] = smax4 (w[6], w[7]);
> +  w[11] = smin4 (w[9], w[10]);
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)
  2019-12-06 16:41 [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834) Jakub Jelinek
  2019-12-09  9:08 ` Richard Biener
@ 2019-12-09  9:54 ` Marc Glisse
  2019-12-09 10:30   ` Jakub Jelinek
  1 sibling, 1 reply; 4+ messages in thread
From: Marc Glisse @ 2019-12-09  9:54 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches

On Fri, 6 Dec 2019, Jakub Jelinek wrote:

> --- gcc/match.pd.jj	2019-12-06 14:07:26.877749065 +0100
> +++ gcc/match.pd	2019-12-06 15:06:08.042953309 +0100
> @@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
>   (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
>   (comb (cmp @0 @2) (cmp @1 @2))))
>
> +/* Undo fancy way of writing max/min or other ?: expressions,
> +   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
> +   People normally use ?: and that is what we actually try to optimize.  */
> +(for cmp (simple_comparison)
> + (simplify
> +  (minus @0 (bit_and:c (minus @0 @1)
> +		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> +  (if (INTEGRAL_TYPE_P (type)
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> +	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> +   (cond (cmp @2 @3) @1 @0)))

I was going to suggest
  (cond @5 @1 @0)

and possibly replacing (cmp@5 @2 @3) with truth_valued_p@5, before 
remembering that COND_EXPR embeds the comparison, and that not 
transforming when we don't see the comparison is likely on purpose. Plus, 
if @5 was in a signed 1-bit type, it may look more like -1 than 1 and 
break the transformation (is that forbidden as return type of a 
comparion?).

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834)
  2019-12-09  9:54 ` Marc Glisse
@ 2019-12-09 10:30   ` Jakub Jelinek
  0 siblings, 0 replies; 4+ messages in thread
From: Jakub Jelinek @ 2019-12-09 10:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener

On Mon, Dec 09, 2019 at 10:54:34AM +0100, Marc Glisse wrote:
> On Fri, 6 Dec 2019, Jakub Jelinek wrote:
> 
> > --- gcc/match.pd.jj	2019-12-06 14:07:26.877749065 +0100
> > +++ gcc/match.pd	2019-12-06 15:06:08.042953309 +0100
> > @@ -2697,6 +2697,31 @@ (define_operator_list COND_TERNARY
> >   (cmp (minmax @0 INTEGER_CST@1) INTEGER_CST@2)
> >   (comb (cmp @0 @2) (cmp @1 @2))))
> > 
> > +/* Undo fancy way of writing max/min or other ?: expressions,
> > +   like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
> > +   People normally use ?: and that is what we actually try to optimize.  */
> > +(for cmp (simple_comparison)
> > + (simplify
> > +  (minus @0 (bit_and:c (minus @0 @1)
> > +		       (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
> > +  (if (INTEGRAL_TYPE_P (type)
> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@4))
> > +       && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
> > +       && INTEGRAL_TYPE_P (TREE_TYPE (@5))
> > +       && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
> > +	   || !TYPE_UNSIGNED (TREE_TYPE (@4))))
> > +   (cond (cmp @2 @3) @1 @0)))
> 
> I was going to suggest
>  (cond @5 @1 @0)
> 
> and possibly replacing (cmp@5 @2 @3) with truth_valued_p@5, before
> remembering that COND_EXPR embeds the comparison, and that not transforming
> when we don't see the comparison is likely on purpose. Plus, if @5 was in a
> signed 1-bit type, it may look more like -1 than 1 and break the
> transformation (is that forbidden as return type of a comparion?).

FYI, I've already committed the patch, so any improvement or bugfix needs to
be done incrementally.
The comparison in there was mainly an attempt to have a truth value
in there, so maybe truth_valued_p would work too, maybe even a
get_range_info checked value of [0, 1] would, though perhaps just
truth_valued_p is better because it involves some kind of setcc-like
instruction in the end.  All I'd like to see for comparisons is that they
are in the COND_EXPR's first operand if they can't throw.
I'm afraid I have no idea whether we can have signed 1-bit truth_valued_p
operations and what will happen with them, if it is possible, then I guess
an additional condition will be needed, check that it has prec > 1 or
TYPE_UNSIGNED.

	Jakub

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-12-09 10:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-06 16:41 [PATCH] Canonicalize fancy ways of expressing blend operation into COND_EXPR (PR tree-optimization/92834) Jakub Jelinek
2019-12-09  9:08 ` Richard Biener
2019-12-09  9:54 ` Marc Glisse
2019-12-09 10:30   ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).