> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Monday, June 20, 2022 9:57 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>
> Subject: Re: [PATCH]middle-end simplify complex if expressions where
> comparisons are inverse of one another.
> 
> On Thu, 16 Jun 2022, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This optimizes the following sequence
> >
> >   ((a < b) & c) | ((a >= b) & d)
> >
> > into
> >
> >   (a < b ? c : d) & 1
> >
> > for scalar. On vector we can omit the & 1.
> >
> > This changes the code generation from
> >
> > zoo2:
> > 	cmp     w0, w1
> > 	cset    w0, lt
> > 	cset    w1, ge
> > 	and     w0, w0, w2
> > 	and     w1, w1, w3
> > 	orr     w0, w0, w1
> > 	ret
> >
> > into
> >
> > 	cmp	w0, w1
> > 	csel	w0, w2, w3, lt
> > 	and	w0, w0, 1
> > 	ret
> >
> > and significantly reduces the number of selects we have to do in the
> > vector code.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* fold-const.cc (inverse_conditions_p): Traverse if SSA_NAME.
> > 	* match.pd: Add new rule.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	* gcc.target/aarch64/if-compare_1.c: New test.
> > 	* gcc.target/aarch64/if-compare_2.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index
> >
> 39a5a52958d87497f301826e706886b290771a2d..f180599b90150acd3ed895a64
> 280
> > aa3255061256 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -2833,15 +2833,38 @@ compcode_to_comparison (enum
> comparison_code
> > code)  bool  inverse_conditions_p (const_tree cond1, const_tree cond2)
> > {
> > -  return (COMPARISON_CLASS_P (cond1)
> > -	  && COMPARISON_CLASS_P (cond2)
> > -	  && (invert_tree_comparison
> > -	      (TREE_CODE (cond1),
> > -	       HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE
> (cond2))
> > -	  && operand_equal_p (TREE_OPERAND (cond1, 0),
> > -			      TREE_OPERAND (cond2, 0), 0)
> > -	  && operand_equal_p (TREE_OPERAND (cond1, 1),
> > -			      TREE_OPERAND (cond2, 1), 0));
> > +  if (COMPARISON_CLASS_P (cond1)
> > +      && COMPARISON_CLASS_P (cond2)
> > +      && (invert_tree_comparison
> > +	   (TREE_CODE (cond1),
> > +	    HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE
> (cond2))
> > +      && operand_equal_p (TREE_OPERAND (cond1, 0),
> > +			  TREE_OPERAND (cond2, 0), 0)
> > +      && operand_equal_p (TREE_OPERAND (cond1, 1),
> > +			  TREE_OPERAND (cond2, 1), 0))
> > +    return true;
> > +
> > +  if (TREE_CODE (cond1) == SSA_NAME
> > +      && TREE_CODE (cond2) == SSA_NAME)
> > +    {
> > +      gimple *gcond1 = SSA_NAME_DEF_STMT (cond1);
> > +      gimple *gcond2 = SSA_NAME_DEF_STMT (cond2);
> > +      if (!is_gimple_assign (gcond1) || !is_gimple_assign (gcond2))
> > +	return false;
> > +
> > +      tree_code code1 = gimple_assign_rhs_code (gcond1);
> > +      tree_code code2 = gimple_assign_rhs_code (gcond2);
> > +      return TREE_CODE_CLASS (code1) == tcc_comparison
> > +	     && TREE_CODE_CLASS (code2) == tcc_comparison
> > +	     && invert_tree_comparison (code1,
> > +		  HONOR_NANS (gimple_arg (gcond1, 0))) == code2
> > +	     && operand_equal_p (gimple_arg (gcond1, 0),
> > +				 gimple_arg (gcond2, 0), 0)
> > +	     && operand_equal_p (gimple_arg (gcond1, 1),
> > +				 gimple_arg (gcond2, 1), 0);
> > +    }
> > +
> > +  return false;
> 
> if we do extend inverse_condition_p please add an overload like

Done. 

> 
> bool
> inverse_condition_p (enum tree_code, tree op00, tree op01,
>                      enum tree_code, tree op10, tree op11)
> 
> so you can avoid some code duplication here.
> 
> >  }
> >
> >  /* Return a tree for the comparison which is the combination of diff
> > --git a/gcc/match.pd b/gcc/match.pd index
> >
> 6d691d302b339c0e4556b40af158b5208c12d08f..bad49dd348add751d9ec1e30
> 23e3
> > 4d9ac123194f 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -1160,6 +1160,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >       (convert (bit_and (negate (convert:utype { pmop[0]; }))
> >  		       (convert:utype @1)))))))
> >
> > +/* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.
> > +*/ (simplify  (bit_ior
> > +  (bit_and:c (convert? @0) @2)
> > +  (bit_and:c (convert? @1) @3))
> 
> in case the comparison returns a signed bool this might turn out wrong.
> Maybe simply use zero_one_valued_p@0 here instead of (convert? @0)?

I think I still need the convert as the comparison gets promoted to int and
the predicate doesn't handle extensions.

So I left the convert but added the zero_one_valued_p@0 such that it's
checking that the result of the comparison itself is at least 0 or 1.

> 
> > +   (if (inverse_conditions_p (@0, @1)
> > +	/* The scalar version has to be canonicalized after vectorization
> > +	   because it makes unconditional loads conditional ones, which
> > +	   means we lose vectorization because the loads may trap.  */
> > +	&& canonicalize_math_after_vectorization_p ())
> > +    (bit_and (cond @0 @2 @3) { build_each_one_cst (type); })))
> 
> I think you should restrict this to INTEGRAL_TYPE_P and use build_one_cst
> (type) (also see below).
> 
> you can do inverse_onditions_p with lock-step for over tcc_comparison and
> inverted_tcc_comparison{,_with_nans} (see existing examples).

I couldn't figure out how to combine this approach with the zero_one_valued_p
predicate. The zero_one_valued_p would need to be on (cmp @0 @1) but don't
think that is allowed.  

> 
> > +(simplify
> > + (bit_ior
> > +  (bit_and:c (convert? (vec_cond:s @0 @4 integer_zerop)) @2)
> > +  (bit_and:c (convert? (vec_cond:s @1 @4 integer_zerop)) @3))
> > +   (if (inverse_conditions_p (@0, @1)
> > +	&& integer_onep (@4))
> > +    (bit_and (vec_cond @0 @2 @3) @4)))
> > +/* Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d.  */
> > +(simplify  (bit_ior
> > +  (bit_and:c (convert? (vec_cond:s @0 integer_minus_onep
> > +integer_zerop)) @2)
> > +  (bit_and:c (convert? (vec_cond:s @1 integer_minus_onep
> integer_zerop)) @3))
> > +   (if (inverse_conditions_p (@0, @1))
> > +    (vec_cond @0 @2 @3)))
> 
> I think the duplication is pre-mature optimization - we should get the
> (bit_and (..) integer_minus_onep) simplified.  Also didn't we have (vec_cond
> @0 -1 0) -> (view_convert @0) when the modes match?

This wasn't in my tree at the time, I could use this representation instead but it
wouldn't shorten the match tree. Combining them as you suggested below seems
most optimal.

> We might want to add (match zero_minus_one_valued_p) or use
> truth_valued_p (with appropriate vector semantics, plus extend it).
> Why do you need (convert? ...) for the vector case btw?
> 

Since we have to defer the scalar version then the vectorizer won't 

> I guess the integral type and vector type cases are similar enough that the
> patterns can be merged with conditionalizing only the replacement.
> 

Merged. 


Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* fold-const.cc (inverse_conditions_p): Traverse if SSA_NAME.
	(inverse_conditions_p_1): New.
	* match.pd: Add new rule.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/if-compare_1.c: New test.
	* gcc.target/aarch64/if-compare_2.c: New test.

--- inline copy of patch ---

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 99021a82df4977b179b45db04e3083012c63067a..0628ffd395416d74bc031e3e4ac8246894ca564d 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -2829,20 +2829,55 @@ compcode_to_comparison (enum comparison_code code)
     }
 }
 
+
+/* Helper of inverse_condition_p.  Returns TRUE if CODE1 is the inverse of
+   CODE2 and OP00 == OP10 and OP01 == OP11.  */
+
+static bool
+inverse_conditions_p_1 (enum tree_code code1, tree op00, tree op01,
+			enum tree_code code2, tree op10, tree op11)
+{
+  return (invert_tree_comparison (code1, HONOR_NANS (op00)) == code2)
+      && operand_equal_p (op00, op10, 0)
+      && operand_equal_p (op01, op11, 0);
+}
+
 /* Return true if COND1 tests the opposite condition of COND2.  */
 
 bool
 inverse_conditions_p (const_tree cond1, const_tree cond2)
 {
-  return (COMPARISON_CLASS_P (cond1)
-	  && COMPARISON_CLASS_P (cond2)
-	  && (invert_tree_comparison
-	      (TREE_CODE (cond1),
-	       HONOR_NANS (TREE_OPERAND (cond1, 0))) == TREE_CODE (cond2))
-	  && operand_equal_p (TREE_OPERAND (cond1, 0),
-			      TREE_OPERAND (cond2, 0), 0)
-	  && operand_equal_p (TREE_OPERAND (cond1, 1),
-			      TREE_OPERAND (cond2, 1), 0));
+  if (COMPARISON_CLASS_P (cond1)
+      && COMPARISON_CLASS_P (cond2)
+      && inverse_conditions_p_1 (TREE_CODE (cond1),
+				 TREE_OPERAND (cond1, 0),
+				 TREE_OPERAND (cond1, 1),
+				 TREE_CODE (cond2),
+				 TREE_OPERAND (cond2, 0),
+				 TREE_OPERAND (cond2, 1)))
+    return true;
+
+  if (TREE_CODE (cond1) == SSA_NAME
+      && TREE_CODE (cond2) == SSA_NAME)
+    {
+      gimple *gcond1 = SSA_NAME_DEF_STMT (cond1);
+      gimple *gcond2 = SSA_NAME_DEF_STMT (cond2);
+      if (!is_gimple_assign (gcond1) || !is_gimple_assign (gcond2))
+	return false;
+
+      tree_code code1 = gimple_assign_rhs_code (gcond1);
+      tree_code code2 = gimple_assign_rhs_code (gcond2);
+      return TREE_CODE_CLASS (code1) == tcc_comparison
+	     && TREE_CODE_CLASS (code2) == tcc_comparison
+	     && inverse_conditions_p_1 (code1,
+					gimple_arg (gcond1, 0),
+					gimple_arg (gcond1, 1),
+					code2,
+					gimple_arg (gcond2, 0),
+					gimple_arg (gcond2, 1));
+    }
+
+  return false;
 }
 
 /* Return a tree for the comparison which is the combination of
diff --git a/gcc/match.pd b/gcc/match.pd
index 24cbbbb5bc1d718bd03af7712fc7255213f2a742..0a459f2804e296f4336aee70e351ddc45486c867 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1872,6 +1872,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type))
   (bit_and @0 @1)))
 
+/* Fold (((a < b) & c) | ((a >= b) & d)) into (a < b ? c : d) & 1.
+   and vector Fold (((a < b) & c) | ((a >= b) & d)) into a < b ? c : d. */
+(simplify
+ (bit_ior
+  (bit_and:c (convert? zero_one_valued_p@0) @2)
+  (bit_and:c (convert? zero_one_valued_p@1) @3))
+   (if (INTEGRAL_TYPE_P (type)
+	&& inverse_conditions_p (@0, @1)
+	/* The scalar version has to be canonicalized after vectorization
+	   because it makes unconditional loads conditional ones, which
+	   means we lose vectorization because the loads may trap.  */
+	&& canonicalize_math_after_vectorization_p ())
+    (bit_and (cond @0 @2 @3) { build_one_cst (type); })))
+(simplify
+ (bit_ior
+  (bit_and:c (vec_cond:s @0 @4 integer_zerop) @2)
+  (bit_and:c (vec_cond:s @1 @4 integer_zerop) @3))
+   (if (inverse_conditions_p (@0, @1))
+    (switch
+     (if (integer_onep (@4))
+      (bit_and (vec_cond @0 @2 @3) @4))
+     (if (integer_minus_onep (@4))
+      (vec_cond @0 @2 @3)))))
+
 /* Transform X & -Y into X * Y when Y is { 0 or 1 }.  */
 (simplify
  (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_1.c b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..05a1292fa90c70b14a7985122f43711f55d047ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/*
+**zoo2:
+**	cmp	w0, w1
+**	csel	w0, w2, w3, lt
+**	and	w0, w0, 1
+**	ret
+*/
+int zoo2 (int a, int b, int c, int d)
+{
+   return ((a < b) & c) | ((a >= b) & d);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/if-compare_2.c b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..34bc65f5db10eae81b8dee3316dfb7d12bf471c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/if-compare_2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -std=c99" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+/*
+**foo:
+**	cmgt	v0.4s, v1.4s, v0.4s
+**	bsl	v0.16b, v2.16b, v3.16b
+**	ret
+*/
+v4si foo (v4si a, v4si b, v4si c, v4si d) {
+    return ((a < b) & c) | ((a >= b) & d);
+}
+
+
+/**
+**bar:
+**...
+**	cmge	v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**	bsl	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**	and	v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**...
+*/
+void bar (int * restrict a, int * restrict b, int * restrict c,
+	  int * restrict d, int * restrict res, int n)
+{
+  for (int i = 0; i < (n & -4); i++)
+    res[i] = ((a[i] < b[i]) & c[i]) | ((a[i] >= b[i]) & d[i]);
+}
+