public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
@ 2019-01-04 11:43 Richard Sandiford
  2019-01-04 12:04 ` Eric Botcazou
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 11:43 UTC (permalink / raw)
  To: gcc-patches

The PR has:

    vect__6.24_42 = vect__5.23_41 * { 0.0, 1.0e+0, 0.0, 0.0 };

which for -fno-signed-zeros -fno-signaling-nans can be simplified to:

    vect__6.24_42 = vect__5.23_41 & { 0, -1, 0, 0 };

I deliberately didn't handle COMPLEX_CST or CONSTRUCTOR in
initializer_each_zero_or_onep since there are no current use cases.

The patch also makes (un)signed_type_for handle floating-point types.
I tried to audit all callers and the few that handle null returns would
be unaffected.

Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?

Richard


2019-01-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/88598
	* tree.h (initializer_each_zero_or_onep): Declare.
	* tree.c (initializer_each_a_or_bp): New function.
	(initializer_each_zero_or_onep): Likewise.
	(signed_or_unsigned_type_for): Handle float types too.
	(unsigned_type_for, signed_type_for): Update comments accordingly.
	* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
	x & { 0 or -1, 0 or -1, ... }.

gcc/testsuite/
	PR tree-optimization/88598
	* gcc.dg/pr88598-1.c: New test.
	* gcc.dg/pr88598-2.c: Likewise.
	* gcc.dg/pr88598-3.c: Likewise.
	* gcc.dg/pr88598-4.c: Likewise.
	* gcc.dg/pr88598-5.c: Likewise.

Index: gcc/tree.h
===================================================================
--- gcc/tree.h	2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.h	2019-01-04 11:40:33.141683783 +0000
@@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
    combinations indicate definitive answers.  */
 
 extern bool initializer_zerop (const_tree, bool * = NULL);
+extern bool initializer_each_zero_or_onep (const_tree);
 
 extern wide_int vector_cst_int_elt (const_tree, unsigned int);
 extern tree vector_cst_elt (const_tree, unsigned int);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.c	2019-01-04 11:40:33.141683783 +0000
@@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
     }
 }
 
+/* Return true if EXPR is an initializer expression that consists only
+   of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
+   RP0 or RP1 holds.  The choice between IP0 and IP1, and between
+   RP0 and RP1, can vary from one element to the next.  */
+
+template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
+	 bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
+bool
+initializer_each_a_or_bp (const_tree expr)
+{
+#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
+
+  STRIP_ANY_LOCATION_WRAPPER (expr);
+
+  switch (TREE_CODE (expr))
+    {
+    case INTEGER_CST:
+      return IP0 (expr) || IP1 (expr);
+
+    case REAL_CST:
+      return RP0 (expr) || RP1 (expr);
+
+    case VECTOR_CST:
+      {
+	unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
+	if (VECTOR_CST_STEPPED_P (expr)
+	    && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
+	  return false;
+
+	for (unsigned int i = 0; i < nelts; ++i)
+	  if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
+	    return false;
+
+	return true;
+      }
+
+    default:
+      return false;
+    }
+
+#undef RECURSE
+}
+
+/* Return true if EXPR is an initializer expression in which every element
+   is a constant that is numerically equal to 0 or 1.  The elements do not
+   need to be equal to each other.  */
+
+bool
+initializer_each_zero_or_onep (const_tree expr)
+{
+  return initializer_each_a_or_bp<integer_zerop, integer_onep,
+				  real_zerop, real_onep> (expr);
+}
+
 /* Check if vector VEC consists of all the equal elements and
    that the number of elements corresponds to the type of VEC.
    The function returns first element of the vector
@@ -11672,7 +11726,10 @@ int_cst_value (const_tree x)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned iff UNSIGNEDP is true, or itself
-   if TYPE is already an integer type of signedness UNSIGNEDP.  */
+   if TYPE is already an integer type of signedness UNSIGNEDP.
+   If TYPE is a floating-point type, return an integer type with the same
+   bitsize and with the signedness given by UNSIGNEDP; this is useful
+   when doing bit-level operations on a floating-point value.  */
 
 tree
 signed_or_unsigned_type_for (int unsignedp, tree type)
@@ -11702,17 +11759,23 @@ signed_or_unsigned_type_for (int unsigne
       return build_complex_type (inner2);
     }
 
-  if (!INTEGRAL_TYPE_P (type)
-      && !POINTER_TYPE_P (type)
-      && TREE_CODE (type) != OFFSET_TYPE)
+  unsigned int bits;
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == OFFSET_TYPE)
+    bits = TYPE_PRECISION (type);
+  else if (TREE_CODE (type) == REAL_TYPE)
+    bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
+  else
     return NULL_TREE;
 
-  return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
+  return build_nonstandard_integer_type (bits, unsignedp);
 }
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned, or itself if TYPE is already an
-   unsigned integer type.  */
+   unsigned integer type.  If TYPE is a floating-point type, return an
+   unsigned integer type with the same bitsize as TYPE.  */
 
 tree
 unsigned_type_for (tree type)
@@ -11722,7 +11785,8 @@ unsigned_type_for (tree type)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is signed, or itself if TYPE is already a
-   signed integer type.  */
+   signed integer type.  If TYPE is a floating-point type, return a
+   signed integer type with the same bitsize as TYPE.  */
 
 tree
 signed_type_for (tree type)
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	2019-01-04 11:39:25.798258529 +0000
+++ gcc/match.pd	2019-01-04 11:40:33.137683817 +0000
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
    integer_each_onep integer_truep integer_nonzerop
    real_zerop real_onep real_minus_onep
    zerop
+   initializer_each_zero_or_onep
    CONSTANT_CLASS_P
    tree_expr_nonnegative_p
    tree_expr_nonzero_p
@@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
            || !COMPLEX_FLOAT_TYPE_P (type)))
    (negate @0)))
 
+/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+   unless the target has native support for the former but not the latter.  */
+(simplify
+ (mult @0 VECTOR_CST@1)
+ (if (initializer_each_zero_or_onep (@1)
+      && !HONOR_SNANS (type)
+      && !HONOR_SIGNED_ZEROS (type))
+  (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
+   (if (itype
+	&& (!VECTOR_MODE_P (TYPE_MODE (type))
+	    || (VECTOR_MODE_P (TYPE_MODE (itype))
+		&& optab_handler (and_optab,
+				  TYPE_MODE (itype)) != CODE_FOR_nothing)))
+    (view_convert (bit_and:itype (view_convert @0)
+				 (ne @1 { build_zero_cst (type); })))))))
+
 (for cmp (gt ge lt le)
      outp (convert convert negate negate)
      outn (negate negate convert convert)
Index: gcc/testsuite/gcc.dg/pr88598-1.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-1.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 1, 0 };
+  x2 *= (v4si) { 1, 0, 0, 1 };
+  x3 *= (v4si) { 0, 0, 1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-2.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-2.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,30 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-3.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-3.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-4.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-4.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 2, 3 };
+  x2 *= (v4si) { 1, 0, 2, 0 };
+  x3 *= (v4si) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-5.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-5.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, 7 };
+  volatile v4df x2 = { 10, 11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 2, 3 };
+  x2 *= (v4df) { 1, 0, 2, 0 };
+  x3 *= (v4df) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 11:43 [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... } Richard Sandiford
@ 2019-01-04 12:04 ` Eric Botcazou
  2019-01-04 12:13   ` Richard Sandiford
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Botcazou @ 2019-01-04 12:04 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches

> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c	2019-01-04 11:39:24.810266962 +0000
> +++ gcc/tree.c	2019-01-04 11:40:33.141683783 +0000
> @@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
>      }
>  }
> 
> +/* Return true if EXPR is an initializer expression that consists only
> +   of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
> +   RP0 or RP1 holds.  The choice between IP0 and IP1, and between
> +   RP0 and RP1, can vary from one element to the next.  */
> +
> +template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
> +	 bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
> +bool
> +initializer_each_a_or_bp (const_tree expr)
> +{
> +#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
> +
> +  STRIP_ANY_LOCATION_WRAPPER (expr);
> +
> +  switch (TREE_CODE (expr))
> +    {
> +    case INTEGER_CST:
> +      return IP0 (expr) || IP1 (expr);
> +
> +    case REAL_CST:
> +      return RP0 (expr) || RP1 (expr);
> +
> +    case VECTOR_CST:
> +      {
> +	unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
> +	if (VECTOR_CST_STEPPED_P (expr)
> +	    && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
> +	  return false;
> +
> +	for (unsigned int i = 0; i < nelts; ++i)
> +	  if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
> +	    return false;
> +
> +	return true;
> +      }
> +
> +    default:
> +      return false;
> +    }
> +
> +#undef RECURSE

Can we avoid the gratuitous use of template here?  We were told that C++ would 
be used only when it makes things more straightforward and it's the contrary 
in this case, to wit the need for the ugly RECURSE macro in the middle.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 12:04 ` Eric Botcazou
@ 2019-01-04 12:13   ` Richard Sandiford
  2019-01-04 12:19     ` Jakub Jelinek
  2019-01-04 12:25     ` Eric Botcazou
  0 siblings, 2 replies; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 12:13 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

Eric Botcazou <ebotcazou@adacore.com> writes:
>> Index: gcc/tree.c
>> ===================================================================
>> --- gcc/tree.c	2019-01-04 11:39:24.810266962 +0000
>> +++ gcc/tree.c	2019-01-04 11:40:33.141683783 +0000
>> @@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
>>      }
>>  }
>> 
>> +/* Return true if EXPR is an initializer expression that consists only
>> +   of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
>> +   RP0 or RP1 holds.  The choice between IP0 and IP1, and between
>> +   RP0 and RP1, can vary from one element to the next.  */
>> +
>> +template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
>> +	 bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
>> +bool
>> +initializer_each_a_or_bp (const_tree expr)
>> +{
>> +#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
>> +
>> +  STRIP_ANY_LOCATION_WRAPPER (expr);
>> +
>> +  switch (TREE_CODE (expr))
>> +    {
>> +    case INTEGER_CST:
>> +      return IP0 (expr) || IP1 (expr);
>> +
>> +    case REAL_CST:
>> +      return RP0 (expr) || RP1 (expr);
>> +
>> +    case VECTOR_CST:
>> +      {
>> +	unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
>> +	if (VECTOR_CST_STEPPED_P (expr)
>> +	    && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
>> +	  return false;
>> +
>> +	for (unsigned int i = 0; i < nelts; ++i)
>> +	  if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
>> +	    return false;
>> +
>> +	return true;
>> +      }
>> +
>> +    default:
>> +      return false;
>> +    }
>> +
>> +#undef RECURSE
>
> Can we avoid the gratuitous use of template here?  We were told that C++ would 
> be used only when it makes things more straightforward and it's the contrary 
> in this case, to wit the need for the ugly RECURSE macro in the middle.

I did it that way so that it would be easy to add things like
zero_or_minus_onep without cut-&-pasting the whole structure.
The way to do that in C would be to use a macro for the full
function, but that's even uglier due to the extra backslashes.

I can change it to:

	for (unsigned int i = 0; i < nelts; ++i)
	  {
	    tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
	    if (!initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (elt))
	      return false;
	  }

if we want to avoid macros.

I was actually worried that this wouldn't be C++ enough, due to not
using a function template to combine each pair of functions. :-)

Richard

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 12:13   ` Richard Sandiford
@ 2019-01-04 12:19     ` Jakub Jelinek
  2019-01-04 12:44       ` Richard Sandiford
  2019-01-04 12:25     ` Eric Botcazou
  1 sibling, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-01-04 12:19 UTC (permalink / raw)
  To: Eric Botcazou, gcc-patches, richard.sandiford

On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
> > Can we avoid the gratuitous use of template here?  We were told that C++ would 
> > be used only when it makes things more straightforward and it's the contrary 
> > in this case, to wit the need for the ugly RECURSE macro in the middle.
> 
> I did it that way so that it would be easy to add things like
> zero_or_minus_onep without cut-&-pasting the whole structure.

IMHO we can make such a change only when it is needed.

> The way to do that in C would be to use a macro for the full
> function, but that's even uglier due to the extra backslashes.

Or just make the function static inline and pass the function pointers
to it as arguments?  If it is inlined, it will be the same, it could be
even always_inline if that is really needed.

	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 12:13   ` Richard Sandiford
  2019-01-04 12:19     ` Jakub Jelinek
@ 2019-01-04 12:25     ` Eric Botcazou
  1 sibling, 0 replies; 7+ messages in thread
From: Eric Botcazou @ 2019-01-04 12:25 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches

> I did it that way so that it would be easy to add things like
> zero_or_minus_onep without cut-&-pasting the whole structure.

Yes, I inferred that, but people can still templatize afterward if need be.

Following this line of reasoning, why to limit yourself to this arbitrary 
number of 2 values in the template, I'm sure one can imagine the need in some 
distant future for initializer_each_minus_one_or_zero_or_onep. ;-)

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 12:19     ` Jakub Jelinek
@ 2019-01-04 12:44       ` Richard Sandiford
  2019-01-07 11:35         ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 12:44 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Eric Botcazou, gcc-patches

Jakub Jelinek <jakub@redhat.com> writes:
> On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
>> > Can we avoid the gratuitous use of template here?  We were told that C++ would 
>> > be used only when it makes things more straightforward and it's the contrary 
>> > in this case, to wit the need for the ugly RECURSE macro in the middle.
>> 
>> I did it that way so that it would be easy to add things like
>> zero_or_minus_onep without cut-&-pasting the whole structure.
>
> IMHO we can make such a change only when it is needed.

The other predicates in tree.c suggest that we won't though.
E.g. there was never any attempt to unify integer_zerop vs. integer_onep
and real_zerop vs. real_onep.

>> The way to do that in C would be to use a macro for the full
>> function, but that's even uglier due to the extra backslashes.
>
> Or just make the function static inline and pass the function pointers
> to it as arguments?  If it is inlined, it will be the same, it could be
> even always_inline if that is really needed.

For that to work for recursive functions I think we'd need to pass the
caller predicate in too, which means one more function pointer overall.

Anyway, here's the patch without the template.

Thanks,
Richard


2019-01-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/88598
	* tree.h (initializer_each_zero_or_onep): Declare.
	* tree.c (initializer_each_zero_or_onep): New function.
	(signed_or_unsigned_type_for): Handle float types too.
	(unsigned_type_for, signed_type_for): Update comments accordingly.
	* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
	x & { 0 or -1, 0 or -1, ... }.

gcc/testsuite/
	PR tree-optimization/88598
	* gcc.dg/pr88598-1.c: New test.
	* gcc.dg/pr88598-2.c: Likewise.
	* gcc.dg/pr88598-3.c: Likewise.
	* gcc.dg/pr88598-4.c: Likewise.
	* gcc.dg/pr88598-5.c: Likewise.

Index: gcc/tree.h
===================================================================
--- gcc/tree.h	2019-01-04 12:40:51.000000000 +0000
+++ gcc/tree.h	2019-01-04 12:40:51.990582844 +0000
@@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
    combinations indicate definitive answers.  */
 
 extern bool initializer_zerop (const_tree, bool * = NULL);
+extern bool initializer_each_zero_or_onep (const_tree);
 
 extern wide_int vector_cst_int_elt (const_tree, unsigned int);
 extern tree vector_cst_elt (const_tree, unsigned int);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	2019-01-04 12:40:51.000000000 +0000
+++ gcc/tree.c	2019-01-04 12:40:51.990582844 +0000
@@ -11229,6 +11229,45 @@ initializer_zerop (const_tree init, bool
     }
 }
 
+/* Return true if EXPR is an initializer expression in which every element
+   is a constant that is numerically equal to 0 or 1.  The elements do not
+   need to be equal to each other.  */
+
+bool
+initializer_each_zero_or_onep (const_tree expr)
+{
+  STRIP_ANY_LOCATION_WRAPPER (expr);
+
+  switch (TREE_CODE (expr))
+    {
+    case INTEGER_CST:
+      return integer_zerop (expr) || integer_onep (expr);
+
+    case REAL_CST:
+      return real_zerop (expr) || real_onep (expr);
+
+    case VECTOR_CST:
+      {
+	unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
+	if (VECTOR_CST_STEPPED_P (expr)
+	    && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
+	  return false;
+
+	for (unsigned int i = 0; i < nelts; ++i)
+	  {
+	    tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
+	    if (!initializer_each_zero_or_onep (elt))
+	      return false;
+	  }
+
+	return true;
+      }
+
+    default:
+      return false;
+    }
+}
+
 /* Check if vector VEC consists of all the equal elements and
    that the number of elements corresponds to the type of VEC.
    The function returns first element of the vector
@@ -11672,7 +11711,10 @@ int_cst_value (const_tree x)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned iff UNSIGNEDP is true, or itself
-   if TYPE is already an integer type of signedness UNSIGNEDP.  */
+   if TYPE is already an integer type of signedness UNSIGNEDP.
+   If TYPE is a floating-point type, return an integer type with the same
+   bitsize and with the signedness given by UNSIGNEDP; this is useful
+   when doing bit-level operations on a floating-point value.  */
 
 tree
 signed_or_unsigned_type_for (int unsignedp, tree type)
@@ -11702,17 +11744,23 @@ signed_or_unsigned_type_for (int unsigne
       return build_complex_type (inner2);
     }
 
-  if (!INTEGRAL_TYPE_P (type)
-      && !POINTER_TYPE_P (type)
-      && TREE_CODE (type) != OFFSET_TYPE)
+  unsigned int bits;
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == OFFSET_TYPE)
+    bits = TYPE_PRECISION (type);
+  else if (TREE_CODE (type) == REAL_TYPE)
+    bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
+  else
     return NULL_TREE;
 
-  return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
+  return build_nonstandard_integer_type (bits, unsignedp);
 }
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned, or itself if TYPE is already an
-   unsigned integer type.  */
+   unsigned integer type.  If TYPE is a floating-point type, return an
+   unsigned integer type with the same bitsize as TYPE.  */
 
 tree
 unsigned_type_for (tree type)
@@ -11722,7 +11770,8 @@ unsigned_type_for (tree type)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is signed, or itself if TYPE is already a
-   signed integer type.  */
+   signed integer type.  If TYPE is a floating-point type, return a
+   signed integer type with the same bitsize as TYPE.  */
 
 tree
 signed_type_for (tree type)
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	2019-01-04 12:40:51.000000000 +0000
+++ gcc/match.pd	2019-01-04 12:40:51.982582910 +0000
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
    integer_each_onep integer_truep integer_nonzerop
    real_zerop real_onep real_minus_onep
    zerop
+   initializer_each_zero_or_onep
    CONSTANT_CLASS_P
    tree_expr_nonnegative_p
    tree_expr_nonzero_p
@@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
            || !COMPLEX_FLOAT_TYPE_P (type)))
    (negate @0)))
 
+/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+   unless the target has native support for the former but not the latter.  */
+(simplify
+ (mult @0 VECTOR_CST@1)
+ (if (initializer_each_zero_or_onep (@1)
+      && !HONOR_SNANS (type)
+      && !HONOR_SIGNED_ZEROS (type))
+  (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
+   (if (itype
+	&& (!VECTOR_MODE_P (TYPE_MODE (type))
+	    || (VECTOR_MODE_P (TYPE_MODE (itype))
+		&& optab_handler (and_optab,
+				  TYPE_MODE (itype)) != CODE_FOR_nothing)))
+    (view_convert (bit_and:itype (view_convert @0)
+				 (ne @1 { build_zero_cst (type); })))))))
+
 (for cmp (gt ge lt le)
      outp (convert convert negate negate)
      outn (negate negate convert convert)
Index: gcc/testsuite/gcc.dg/pr88598-1.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-1.c	2019-01-04 12:40:51.982582910 +0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 1, 0 };
+  x2 *= (v4si) { 1, 0, 0, 1 };
+  x3 *= (v4si) { 0, 0, 1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-2.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-2.c	2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,30 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-3.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-3.c	2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-4.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-4.c	2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 2, 3 };
+  x2 *= (v4si) { 1, 0, 2, 0 };
+  x3 *= (v4si) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-5.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-5.c	2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, 7 };
+  volatile v4df x2 = { 10, 11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 2, 3 };
+  x2 *= (v4df) { 1, 0, 2, 0 };
+  x3 *= (v4df) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
  2019-01-04 12:44       ` Richard Sandiford
@ 2019-01-07 11:35         ` Richard Biener
  0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2019-01-07 11:35 UTC (permalink / raw)
  To: Jakub Jelinek, Eric Botcazou, GCC Patches, Richard Sandiford

On Fri, Jan 4, 2019 at 1:44 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Jakub Jelinek <jakub@redhat.com> writes:
> > On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
> >> > Can we avoid the gratuitous use of template here?  We were told that C++ would
> >> > be used only when it makes things more straightforward and it's the contrary
> >> > in this case, to wit the need for the ugly RECURSE macro in the middle.
> >>
> >> I did it that way so that it would be easy to add things like
> >> zero_or_minus_onep without cut-&-pasting the whole structure.
> >
> > IMHO we can make such a change only when it is needed.
>
> The other predicates in tree.c suggest that we won't though.
> E.g. there was never any attempt to unify integer_zerop vs. integer_onep
> and real_zerop vs. real_onep.
>
> >> The way to do that in C would be to use a macro for the full
> >> function, but that's even uglier due to the extra backslashes.
> >
> > Or just make the function static inline and pass the function pointers
> > to it as arguments?  If it is inlined, it will be the same, it could be
> > even always_inline if that is really needed.
>
> For that to work for recursive functions I think we'd need to pass the
> caller predicate in too, which means one more function pointer overall.
>
> Anyway, here's the patch without the template.

OK.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> 2019-01-04  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         PR tree-optimization/88598
>         * tree.h (initializer_each_zero_or_onep): Declare.
>         * tree.c (initializer_each_zero_or_onep): New function.
>         (signed_or_unsigned_type_for): Handle float types too.
>         (unsigned_type_for, signed_type_for): Update comments accordingly.
>         * match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
>         x & { 0 or -1, 0 or -1, ... }.
>
> gcc/testsuite/
>         PR tree-optimization/88598
>         * gcc.dg/pr88598-1.c: New test.
>         * gcc.dg/pr88598-2.c: Likewise.
>         * gcc.dg/pr88598-3.c: Likewise.
>         * gcc.dg/pr88598-4.c: Likewise.
>         * gcc.dg/pr88598-5.c: Likewise.
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h  2019-01-04 12:40:51.000000000 +0000
> +++ gcc/tree.h  2019-01-04 12:40:51.990582844 +0000
> @@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
>     combinations indicate definitive answers.  */
>
>  extern bool initializer_zerop (const_tree, bool * = NULL);
> +extern bool initializer_each_zero_or_onep (const_tree);
>
>  extern wide_int vector_cst_int_elt (const_tree, unsigned int);
>  extern tree vector_cst_elt (const_tree, unsigned int);
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  2019-01-04 12:40:51.000000000 +0000
> +++ gcc/tree.c  2019-01-04 12:40:51.990582844 +0000
> @@ -11229,6 +11229,45 @@ initializer_zerop (const_tree init, bool
>      }
>  }
>
> +/* Return true if EXPR is an initializer expression in which every element
> +   is a constant that is numerically equal to 0 or 1.  The elements do not
> +   need to be equal to each other.  */
> +
> +bool
> +initializer_each_zero_or_onep (const_tree expr)
> +{
> +  STRIP_ANY_LOCATION_WRAPPER (expr);
> +
> +  switch (TREE_CODE (expr))
> +    {
> +    case INTEGER_CST:
> +      return integer_zerop (expr) || integer_onep (expr);
> +
> +    case REAL_CST:
> +      return real_zerop (expr) || real_onep (expr);
> +
> +    case VECTOR_CST:
> +      {
> +       unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
> +       if (VECTOR_CST_STEPPED_P (expr)
> +           && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
> +         return false;
> +
> +       for (unsigned int i = 0; i < nelts; ++i)
> +         {
> +           tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
> +           if (!initializer_each_zero_or_onep (elt))
> +             return false;
> +         }
> +
> +       return true;
> +      }
> +
> +    default:
> +      return false;
> +    }
> +}
> +
>  /* Check if vector VEC consists of all the equal elements and
>     that the number of elements corresponds to the type of VEC.
>     The function returns first element of the vector
> @@ -11672,7 +11711,10 @@ int_cst_value (const_tree x)
>
>  /* If TYPE is an integral or pointer type, return an integer type with
>     the same precision which is unsigned iff UNSIGNEDP is true, or itself
> -   if TYPE is already an integer type of signedness UNSIGNEDP.  */
> +   if TYPE is already an integer type of signedness UNSIGNEDP.
> +   If TYPE is a floating-point type, return an integer type with the same
> +   bitsize and with the signedness given by UNSIGNEDP; this is useful
> +   when doing bit-level operations on a floating-point value.  */
>
>  tree
>  signed_or_unsigned_type_for (int unsignedp, tree type)
> @@ -11702,17 +11744,23 @@ signed_or_unsigned_type_for (int unsigne
>        return build_complex_type (inner2);
>      }
>
> -  if (!INTEGRAL_TYPE_P (type)
> -      && !POINTER_TYPE_P (type)
> -      && TREE_CODE (type) != OFFSET_TYPE)
> +  unsigned int bits;
> +  if (INTEGRAL_TYPE_P (type)
> +      || POINTER_TYPE_P (type)
> +      || TREE_CODE (type) == OFFSET_TYPE)
> +    bits = TYPE_PRECISION (type);
> +  else if (TREE_CODE (type) == REAL_TYPE)
> +    bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
> +  else
>      return NULL_TREE;
>
> -  return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
> +  return build_nonstandard_integer_type (bits, unsignedp);
>  }
>
>  /* If TYPE is an integral or pointer type, return an integer type with
>     the same precision which is unsigned, or itself if TYPE is already an
> -   unsigned integer type.  */
> +   unsigned integer type.  If TYPE is a floating-point type, return an
> +   unsigned integer type with the same bitsize as TYPE.  */
>
>  tree
>  unsigned_type_for (tree type)
> @@ -11722,7 +11770,8 @@ unsigned_type_for (tree type)
>
>  /* If TYPE is an integral or pointer type, return an integer type with
>     the same precision which is signed, or itself if TYPE is already a
> -   signed integer type.  */
> +   signed integer type.  If TYPE is a floating-point type, return a
> +   signed integer type with the same bitsize as TYPE.  */
>
>  tree
>  signed_type_for (tree type)
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd        2019-01-04 12:40:51.000000000 +0000
> +++ gcc/match.pd        2019-01-04 12:40:51.982582910 +0000
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
>     integer_each_onep integer_truep integer_nonzerop
>     real_zerop real_onep real_minus_onep
>     zerop
> +   initializer_each_zero_or_onep
>     CONSTANT_CLASS_P
>     tree_expr_nonnegative_p
>     tree_expr_nonzero_p
> @@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>             || !COMPLEX_FLOAT_TYPE_P (type)))
>     (negate @0)))
>
> +/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
> +   unless the target has native support for the former but not the latter.  */
> +(simplify
> + (mult @0 VECTOR_CST@1)
> + (if (initializer_each_zero_or_onep (@1)
> +      && !HONOR_SNANS (type)
> +      && !HONOR_SIGNED_ZEROS (type))
> +  (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
> +   (if (itype
> +       && (!VECTOR_MODE_P (TYPE_MODE (type))
> +           || (VECTOR_MODE_P (TYPE_MODE (itype))
> +               && optab_handler (and_optab,
> +                                 TYPE_MODE (itype)) != CODE_FOR_nothing)))
> +    (view_convert (bit_and:itype (view_convert @0)
> +                                (ne @1 { build_zero_cst (type); })))))))
> +
>  (for cmp (gt ge lt le)
>       outp (convert convert negate negate)
>       outn (negate negate convert convert)
> Index: gcc/testsuite/gcc.dg/pr88598-1.c
> ===================================================================
> --- /dev/null   2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-1.c    2019-01-04 12:40:51.982582910 +0000
> @@ -0,0 +1,27 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef int v4si __attribute__ ((vector_size (16)));
> +
> +int
> +main ()
> +{
> +  volatile v4si x1 = { 4, 5, 6, 7 };
> +  volatile v4si x2 = { 10, 11, 12, 13 };
> +  volatile v4si x3 = { 20, 21, 22, 23 };
> +
> +  x1 *= (v4si) { 0, 1, 1, 0 };
> +  x2 *= (v4si) { 1, 0, 0, 1 };
> +  x3 *= (v4si) { 0, 0, 1, 0 };
> +
> +  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
> +      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
> +                          sizeof (v4si))
> +      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
> +                          sizeof (v4si)))
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-2.c
> ===================================================================
> --- /dev/null   2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-2.c    2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,30 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> +  volatile v4df x1 = { 4, 5, 6, -7 };
> +  volatile v4df x2 = { 10, -11, 12, 13 };
> +  volatile v4df x3 = { 20, 21, 22, 23 };
> +
> +  x1 *= (v4df) { 0, 1, 1, 0 };
> +  x2 *= (v4df) { 1, 0, 0, 1 };
> +  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
> +
> +  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
> +                       sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
> +                          sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
> +                          sizeof (v4df)))
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-3.c
> ===================================================================
> --- /dev/null   2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-3.c    2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,29 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> +  volatile v4df x1 = { 4, 5, 6, -7 };
> +  volatile v4df x2 = { 10, -11, 12, 13 };
> +  volatile v4df x3 = { 20, 21, 22, 23 };
> +
> +  x1 *= (v4df) { 0, 1, 1, 0 };
> +  x2 *= (v4df) { 1, 0, 0, 1 };
> +  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
> +
> +  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
> +                       sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
> +                          sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
> +                          sizeof (v4df)))
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-4.c
> ===================================================================
> --- /dev/null   2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-4.c    2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,28 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef int v4si __attribute__ ((vector_size (16)));
> +
> +int
> +main ()
> +{
> +  volatile v4si x1 = { 4, 5, 6, 7 };
> +  volatile v4si x2 = { 10, 11, 12, 13 };
> +  volatile v4si x3 = { 20, 21, 22, 23 };
> +
> +  x1 *= (v4si) { 0, 1, 2, 3 };
> +  x2 *= (v4si) { 1, 0, 2, 0 };
> +  x3 *= (v4si) { 0, 0, -1, 0 };
> +
> +  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
> +      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
> +                          sizeof (v4si))
> +      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
> +                          sizeof (v4si)))
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-5.c
> ===================================================================
> --- /dev/null   2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-5.c    2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,29 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> +  volatile v4df x1 = { 4, 5, 6, 7 };
> +  volatile v4df x2 = { 10, 11, 12, 13 };
> +  volatile v4df x3 = { 20, 21, 22, 23 };
> +
> +  x1 *= (v4df) { 0, 1, 2, 3 };
> +  x2 *= (v4df) { 1, 0, 2, 0 };
> +  x3 *= (v4df) { 0, 0, -1, 0 };
> +
> +  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
> +                          sizeof (v4df))
> +      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
> +                          sizeof (v4df)))
> +    __builtin_abort ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-01-07 11:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-04 11:43 [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... } Richard Sandiford
2019-01-04 12:04 ` Eric Botcazou
2019-01-04 12:13   ` Richard Sandiford
2019-01-04 12:19     ` Jakub Jelinek
2019-01-04 12:44       ` Richard Sandiford
2019-01-07 11:35         ` Richard Biener
2019-01-04 12:25     ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).