* [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
@ 2019-01-04 11:43 Richard Sandiford
2019-01-04 12:04 ` Eric Botcazou
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 11:43 UTC (permalink / raw)
To: gcc-patches
The PR has:
vect__6.24_42 = vect__5.23_41 * { 0.0, 1.0e+0, 0.0, 0.0 };
which for -fno-signed-zeros -fno-signaling-nans can be simplified to:
vect__6.24_42 = vect__5.23_41 & { 0, -1, 0, 0 };
I deliberately didn't handle COMPLEX_CST or CONSTRUCTOR in
initializer_each_zero_or_onep since there are no current use cases.
The patch also makes (un)signed_type_for handle floating-point types.
I tried to audit all callers and the few that handle null returns would
be unaffected.
Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?
Richard
2019-01-04 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/88598
* tree.h (initializer_each_zero_or_onep): Declare.
* tree.c (initializer_each_a_or_bp): New function.
(initializer_each_zero_or_onep): Likewise.
(signed_or_unsigned_type_for): Handle float types too.
(unsigned_type_for, signed_type_for): Update comments accordingly.
* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
x & { 0 or -1, 0 or -1, ... }.
gcc/testsuite/
PR tree-optimization/88598
* gcc.dg/pr88598-1.c: New test.
* gcc.dg/pr88598-2.c: Likewise.
* gcc.dg/pr88598-3.c: Likewise.
* gcc.dg/pr88598-4.c: Likewise.
* gcc.dg/pr88598-5.c: Likewise.
Index: gcc/tree.h
===================================================================
--- gcc/tree.h 2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.h 2019-01-04 11:40:33.141683783 +0000
@@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
combinations indicate definitive answers. */
extern bool initializer_zerop (const_tree, bool * = NULL);
+extern bool initializer_each_zero_or_onep (const_tree);
extern wide_int vector_cst_int_elt (const_tree, unsigned int);
extern tree vector_cst_elt (const_tree, unsigned int);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c 2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.c 2019-01-04 11:40:33.141683783 +0000
@@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
}
}
+/* Return true if EXPR is an initializer expression that consists only
+ of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
+ RP0 or RP1 holds. The choice between IP0 and IP1, and between
+ RP0 and RP1, can vary from one element to the next. */
+
+template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
+ bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
+bool
+initializer_each_a_or_bp (const_tree expr)
+{
+#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
+
+ STRIP_ANY_LOCATION_WRAPPER (expr);
+
+ switch (TREE_CODE (expr))
+ {
+ case INTEGER_CST:
+ return IP0 (expr) || IP1 (expr);
+
+ case REAL_CST:
+ return RP0 (expr) || RP1 (expr);
+
+ case VECTOR_CST:
+ {
+ unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
+ if (VECTOR_CST_STEPPED_P (expr)
+ && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
+ return false;
+
+ for (unsigned int i = 0; i < nelts; ++i)
+ if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
+ return false;
+
+ return true;
+ }
+
+ default:
+ return false;
+ }
+
+#undef RECURSE
+}
+
+/* Return true if EXPR is an initializer expression in which every element
+ is a constant that is numerically equal to 0 or 1. The elements do not
+ need to be equal to each other. */
+
+bool
+initializer_each_zero_or_onep (const_tree expr)
+{
+ return initializer_each_a_or_bp<integer_zerop, integer_onep,
+ real_zerop, real_onep> (expr);
+}
+
/* Check if vector VEC consists of all the equal elements and
that the number of elements corresponds to the type of VEC.
The function returns first element of the vector
@@ -11672,7 +11726,10 @@ int_cst_value (const_tree x)
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is unsigned iff UNSIGNEDP is true, or itself
- if TYPE is already an integer type of signedness UNSIGNEDP. */
+ if TYPE is already an integer type of signedness UNSIGNEDP.
+ If TYPE is a floating-point type, return an integer type with the same
+ bitsize and with the signedness given by UNSIGNEDP; this is useful
+ when doing bit-level operations on a floating-point value. */
tree
signed_or_unsigned_type_for (int unsignedp, tree type)
@@ -11702,17 +11759,23 @@ signed_or_unsigned_type_for (int unsigne
return build_complex_type (inner2);
}
- if (!INTEGRAL_TYPE_P (type)
- && !POINTER_TYPE_P (type)
- && TREE_CODE (type) != OFFSET_TYPE)
+ unsigned int bits;
+ if (INTEGRAL_TYPE_P (type)
+ || POINTER_TYPE_P (type)
+ || TREE_CODE (type) == OFFSET_TYPE)
+ bits = TYPE_PRECISION (type);
+ else if (TREE_CODE (type) == REAL_TYPE)
+ bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
+ else
return NULL_TREE;
- return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
+ return build_nonstandard_integer_type (bits, unsignedp);
}
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is unsigned, or itself if TYPE is already an
- unsigned integer type. */
+ unsigned integer type. If TYPE is a floating-point type, return an
+ unsigned integer type with the same bitsize as TYPE. */
tree
unsigned_type_for (tree type)
@@ -11722,7 +11785,8 @@ unsigned_type_for (tree type)
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is signed, or itself if TYPE is already a
- signed integer type. */
+ signed integer type. If TYPE is a floating-point type, return a
+ signed integer type with the same bitsize as TYPE. */
tree
signed_type_for (tree type)
Index: gcc/match.pd
===================================================================
--- gcc/match.pd 2019-01-04 11:39:25.798258529 +0000
+++ gcc/match.pd 2019-01-04 11:40:33.137683817 +0000
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
integer_each_onep integer_truep integer_nonzerop
real_zerop real_onep real_minus_onep
zerop
+ initializer_each_zero_or_onep
CONSTANT_CLASS_P
tree_expr_nonnegative_p
tree_expr_nonzero_p
@@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| !COMPLEX_FLOAT_TYPE_P (type)))
(negate @0)))
+/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+ unless the target has native support for the former but not the latter. */
+(simplify
+ (mult @0 VECTOR_CST@1)
+ (if (initializer_each_zero_or_onep (@1)
+ && !HONOR_SNANS (type)
+ && !HONOR_SIGNED_ZEROS (type))
+ (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
+ (if (itype
+ && (!VECTOR_MODE_P (TYPE_MODE (type))
+ || (VECTOR_MODE_P (TYPE_MODE (itype))
+ && optab_handler (and_optab,
+ TYPE_MODE (itype)) != CODE_FOR_nothing)))
+ (view_convert (bit_and:itype (view_convert @0)
+ (ne @1 { build_zero_cst (type); })))))))
+
(for cmp (gt ge lt le)
outp (convert convert negate negate)
outn (negate negate convert convert)
Index: gcc/testsuite/gcc.dg/pr88598-1.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-1.c 2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+ volatile v4si x1 = { 4, 5, 6, 7 };
+ volatile v4si x2 = { 10, 11, 12, 13 };
+ volatile v4si x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4si) { 0, 1, 1, 0 };
+ x2 *= (v4si) { 1, 0, 0, 1 };
+ x3 *= (v4si) { 0, 0, 1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
+ || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
+ sizeof (v4si))
+ || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
+ sizeof (v4si)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-2.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-2.c 2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,30 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, -7 };
+ volatile v4df x2 = { 10, -11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 1, 0 };
+ x2 *= (v4df) { 1, 0, 0, 1 };
+ x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-3.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-3.c 2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, -7 };
+ volatile v4df x2 = { 10, -11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 1, 0 };
+ x2 *= (v4df) { 1, 0, 0, 1 };
+ x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-4.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-4.c 2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+ volatile v4si x1 = { 4, 5, 6, 7 };
+ volatile v4si x2 = { 10, 11, 12, 13 };
+ volatile v4si x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4si) { 0, 1, 2, 3 };
+ x2 *= (v4si) { 1, 0, 2, 0 };
+ x3 *= (v4si) { 0, 0, -1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
+ || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
+ sizeof (v4si))
+ || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
+ sizeof (v4si)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-5.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-5.c 2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, 7 };
+ volatile v4df x2 = { 10, 11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 2, 3 };
+ x2 *= (v4df) { 1, 0, 2, 0 };
+ x3 *= (v4df) { 0, 0, -1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 11:43 [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... } Richard Sandiford
@ 2019-01-04 12:04 ` Eric Botcazou
2019-01-04 12:13 ` Richard Sandiford
0 siblings, 1 reply; 7+ messages in thread
From: Eric Botcazou @ 2019-01-04 12:04 UTC (permalink / raw)
To: Richard Sandiford; +Cc: gcc-patches
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c 2019-01-04 11:39:24.810266962 +0000
> +++ gcc/tree.c 2019-01-04 11:40:33.141683783 +0000
> @@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
> }
> }
>
> +/* Return true if EXPR is an initializer expression that consists only
> + of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
> + RP0 or RP1 holds. The choice between IP0 and IP1, and between
> + RP0 and RP1, can vary from one element to the next. */
> +
> +template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
> + bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
> +bool
> +initializer_each_a_or_bp (const_tree expr)
> +{
> +#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
> +
> + STRIP_ANY_LOCATION_WRAPPER (expr);
> +
> + switch (TREE_CODE (expr))
> + {
> + case INTEGER_CST:
> + return IP0 (expr) || IP1 (expr);
> +
> + case REAL_CST:
> + return RP0 (expr) || RP1 (expr);
> +
> + case VECTOR_CST:
> + {
> + unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
> + if (VECTOR_CST_STEPPED_P (expr)
> + && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
> + return false;
> +
> + for (unsigned int i = 0; i < nelts; ++i)
> + if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
> + return false;
> +
> + return true;
> + }
> +
> + default:
> + return false;
> + }
> +
> +#undef RECURSE
Can we avoid the gratuitous use of template here? We were told that C++ would
be used only when it makes things more straightforward and it's the contrary
in this case, to wit the need for the ugly RECURSE macro in the middle.
--
Eric Botcazou
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 12:04 ` Eric Botcazou
@ 2019-01-04 12:13 ` Richard Sandiford
2019-01-04 12:19 ` Jakub Jelinek
2019-01-04 12:25 ` Eric Botcazou
0 siblings, 2 replies; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 12:13 UTC (permalink / raw)
To: Eric Botcazou; +Cc: gcc-patches
Eric Botcazou <ebotcazou@adacore.com> writes:
>> Index: gcc/tree.c
>> ===================================================================
>> --- gcc/tree.c 2019-01-04 11:39:24.810266962 +0000
>> +++ gcc/tree.c 2019-01-04 11:40:33.141683783 +0000
>> @@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
>> }
>> }
>>
>> +/* Return true if EXPR is an initializer expression that consists only
>> + of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
>> + RP0 or RP1 holds. The choice between IP0 and IP1, and between
>> + RP0 and RP1, can vary from one element to the next. */
>> +
>> +template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
>> + bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
>> +bool
>> +initializer_each_a_or_bp (const_tree expr)
>> +{
>> +#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
>> +
>> + STRIP_ANY_LOCATION_WRAPPER (expr);
>> +
>> + switch (TREE_CODE (expr))
>> + {
>> + case INTEGER_CST:
>> + return IP0 (expr) || IP1 (expr);
>> +
>> + case REAL_CST:
>> + return RP0 (expr) || RP1 (expr);
>> +
>> + case VECTOR_CST:
>> + {
>> + unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
>> + if (VECTOR_CST_STEPPED_P (expr)
>> + && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
>> + return false;
>> +
>> + for (unsigned int i = 0; i < nelts; ++i)
>> + if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
>> + return false;
>> +
>> + return true;
>> + }
>> +
>> + default:
>> + return false;
>> + }
>> +
>> +#undef RECURSE
>
> Can we avoid the gratuitous use of template here? We were told that C++ would
> be used only when it makes things more straightforward and it's the contrary
> in this case, to wit the need for the ugly RECURSE macro in the middle.
I did it that way so that it would be easy to add things like
zero_or_minus_onep without cut-&-pasting the whole structure.
The way to do that in C would be to use a macro for the full
function, but that's even uglier due to the extra backslashes.
I can change it to:
for (unsigned int i = 0; i < nelts; ++i)
{
tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
if (!initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (elt))
return false;
}
if we want to avoid macros.
I was actually worried that this wouldn't be C++ enough, due to not
using a function template to combine each pair of functions. :-)
Richard
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 12:13 ` Richard Sandiford
@ 2019-01-04 12:19 ` Jakub Jelinek
2019-01-04 12:44 ` Richard Sandiford
2019-01-04 12:25 ` Eric Botcazou
1 sibling, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2019-01-04 12:19 UTC (permalink / raw)
To: Eric Botcazou, gcc-patches, richard.sandiford
On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
> > Can we avoid the gratuitous use of template here? We were told that C++ would
> > be used only when it makes things more straightforward and it's the contrary
> > in this case, to wit the need for the ugly RECURSE macro in the middle.
>
> I did it that way so that it would be easy to add things like
> zero_or_minus_onep without cut-&-pasting the whole structure.
IMHO we can make such a change only when it is needed.
> The way to do that in C would be to use a macro for the full
> function, but that's even uglier due to the extra backslashes.
Or just make the function static inline and pass the function pointers
to it as arguments? If it is inlined, it will be the same, it could be
even always_inline if that is really needed.
Jakub
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 12:13 ` Richard Sandiford
2019-01-04 12:19 ` Jakub Jelinek
@ 2019-01-04 12:25 ` Eric Botcazou
1 sibling, 0 replies; 7+ messages in thread
From: Eric Botcazou @ 2019-01-04 12:25 UTC (permalink / raw)
To: Richard Sandiford; +Cc: gcc-patches
> I did it that way so that it would be easy to add things like
> zero_or_minus_onep without cut-&-pasting the whole structure.
Yes, I inferred that, but people can still templatize afterward if need be.
Following this line of reasoning, why to limit yourself to this arbitrary
number of 2 values in the template, I'm sure one can imagine the need in some
distant future for initializer_each_minus_one_or_zero_or_onep. ;-)
--
Eric Botcazou
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 12:19 ` Jakub Jelinek
@ 2019-01-04 12:44 ` Richard Sandiford
2019-01-07 11:35 ` Richard Biener
0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 12:44 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Eric Botcazou, gcc-patches
Jakub Jelinek <jakub@redhat.com> writes:
> On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
>> > Can we avoid the gratuitous use of template here? We were told that C++ would
>> > be used only when it makes things more straightforward and it's the contrary
>> > in this case, to wit the need for the ugly RECURSE macro in the middle.
>>
>> I did it that way so that it would be easy to add things like
>> zero_or_minus_onep without cut-&-pasting the whole structure.
>
> IMHO we can make such a change only when it is needed.
The other predicates in tree.c suggest that we won't though.
E.g. there was never any attempt to unify integer_zerop vs. integer_onep
and real_zerop vs. real_onep.
>> The way to do that in C would be to use a macro for the full
>> function, but that's even uglier due to the extra backslashes.
>
> Or just make the function static inline and pass the function pointers
> to it as arguments? If it is inlined, it will be the same, it could be
> even always_inline if that is really needed.
For that to work for recursive functions I think we'd need to pass the
caller predicate in too, which means one more function pointer overall.
Anyway, here's the patch without the template.
Thanks,
Richard
2019-01-04 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/88598
* tree.h (initializer_each_zero_or_onep): Declare.
* tree.c (initializer_each_zero_or_onep): New function.
(signed_or_unsigned_type_for): Handle float types too.
(unsigned_type_for, signed_type_for): Update comments accordingly.
* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
x & { 0 or -1, 0 or -1, ... }.
gcc/testsuite/
PR tree-optimization/88598
* gcc.dg/pr88598-1.c: New test.
* gcc.dg/pr88598-2.c: Likewise.
* gcc.dg/pr88598-3.c: Likewise.
* gcc.dg/pr88598-4.c: Likewise.
* gcc.dg/pr88598-5.c: Likewise.
Index: gcc/tree.h
===================================================================
--- gcc/tree.h 2019-01-04 12:40:51.000000000 +0000
+++ gcc/tree.h 2019-01-04 12:40:51.990582844 +0000
@@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
combinations indicate definitive answers. */
extern bool initializer_zerop (const_tree, bool * = NULL);
+extern bool initializer_each_zero_or_onep (const_tree);
extern wide_int vector_cst_int_elt (const_tree, unsigned int);
extern tree vector_cst_elt (const_tree, unsigned int);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c 2019-01-04 12:40:51.000000000 +0000
+++ gcc/tree.c 2019-01-04 12:40:51.990582844 +0000
@@ -11229,6 +11229,45 @@ initializer_zerop (const_tree init, bool
}
}
+/* Return true if EXPR is an initializer expression in which every element
+ is a constant that is numerically equal to 0 or 1. The elements do not
+ need to be equal to each other. */
+
+bool
+initializer_each_zero_or_onep (const_tree expr)
+{
+ STRIP_ANY_LOCATION_WRAPPER (expr);
+
+ switch (TREE_CODE (expr))
+ {
+ case INTEGER_CST:
+ return integer_zerop (expr) || integer_onep (expr);
+
+ case REAL_CST:
+ return real_zerop (expr) || real_onep (expr);
+
+ case VECTOR_CST:
+ {
+ unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
+ if (VECTOR_CST_STEPPED_P (expr)
+ && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
+ return false;
+
+ for (unsigned int i = 0; i < nelts; ++i)
+ {
+ tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
+ if (!initializer_each_zero_or_onep (elt))
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
+
/* Check if vector VEC consists of all the equal elements and
that the number of elements corresponds to the type of VEC.
The function returns first element of the vector
@@ -11672,7 +11711,10 @@ int_cst_value (const_tree x)
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is unsigned iff UNSIGNEDP is true, or itself
- if TYPE is already an integer type of signedness UNSIGNEDP. */
+ if TYPE is already an integer type of signedness UNSIGNEDP.
+ If TYPE is a floating-point type, return an integer type with the same
+ bitsize and with the signedness given by UNSIGNEDP; this is useful
+ when doing bit-level operations on a floating-point value. */
tree
signed_or_unsigned_type_for (int unsignedp, tree type)
@@ -11702,17 +11744,23 @@ signed_or_unsigned_type_for (int unsigne
return build_complex_type (inner2);
}
- if (!INTEGRAL_TYPE_P (type)
- && !POINTER_TYPE_P (type)
- && TREE_CODE (type) != OFFSET_TYPE)
+ unsigned int bits;
+ if (INTEGRAL_TYPE_P (type)
+ || POINTER_TYPE_P (type)
+ || TREE_CODE (type) == OFFSET_TYPE)
+ bits = TYPE_PRECISION (type);
+ else if (TREE_CODE (type) == REAL_TYPE)
+ bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
+ else
return NULL_TREE;
- return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
+ return build_nonstandard_integer_type (bits, unsignedp);
}
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is unsigned, or itself if TYPE is already an
- unsigned integer type. */
+ unsigned integer type. If TYPE is a floating-point type, return an
+ unsigned integer type with the same bitsize as TYPE. */
tree
unsigned_type_for (tree type)
@@ -11722,7 +11770,8 @@ unsigned_type_for (tree type)
/* If TYPE is an integral or pointer type, return an integer type with
the same precision which is signed, or itself if TYPE is already a
- signed integer type. */
+ signed integer type. If TYPE is a floating-point type, return a
+ signed integer type with the same bitsize as TYPE. */
tree
signed_type_for (tree type)
Index: gcc/match.pd
===================================================================
--- gcc/match.pd 2019-01-04 12:40:51.000000000 +0000
+++ gcc/match.pd 2019-01-04 12:40:51.982582910 +0000
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
integer_each_onep integer_truep integer_nonzerop
real_zerop real_onep real_minus_onep
zerop
+ initializer_each_zero_or_onep
CONSTANT_CLASS_P
tree_expr_nonnegative_p
tree_expr_nonzero_p
@@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| !COMPLEX_FLOAT_TYPE_P (type)))
(negate @0)))
+/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+ unless the target has native support for the former but not the latter. */
+(simplify
+ (mult @0 VECTOR_CST@1)
+ (if (initializer_each_zero_or_onep (@1)
+ && !HONOR_SNANS (type)
+ && !HONOR_SIGNED_ZEROS (type))
+ (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
+ (if (itype
+ && (!VECTOR_MODE_P (TYPE_MODE (type))
+ || (VECTOR_MODE_P (TYPE_MODE (itype))
+ && optab_handler (and_optab,
+ TYPE_MODE (itype)) != CODE_FOR_nothing)))
+ (view_convert (bit_and:itype (view_convert @0)
+ (ne @1 { build_zero_cst (type); })))))))
+
(for cmp (gt ge lt le)
outp (convert convert negate negate)
outn (negate negate convert convert)
Index: gcc/testsuite/gcc.dg/pr88598-1.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-1.c 2019-01-04 12:40:51.982582910 +0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+ volatile v4si x1 = { 4, 5, 6, 7 };
+ volatile v4si x2 = { 10, 11, 12, 13 };
+ volatile v4si x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4si) { 0, 1, 1, 0 };
+ x2 *= (v4si) { 1, 0, 0, 1 };
+ x3 *= (v4si) { 0, 0, 1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
+ || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
+ sizeof (v4si))
+ || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
+ sizeof (v4si)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-2.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-2.c 2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,30 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, -7 };
+ volatile v4df x2 = { 10, -11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 1, 0 };
+ x2 *= (v4df) { 1, 0, 0, 1 };
+ x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-3.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-3.c 2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, -7 };
+ volatile v4df x2 = { 10, -11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 1, 0 };
+ x2 *= (v4df) { 1, 0, 0, 1 };
+ x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-4.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-4.c 2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+ volatile v4si x1 = { 4, 5, 6, 7 };
+ volatile v4si x2 = { 10, 11, 12, 13 };
+ volatile v4si x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4si) { 0, 1, 2, 3 };
+ x2 *= (v4si) { 1, 0, 2, 0 };
+ x3 *= (v4si) { 0, 0, -1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
+ || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
+ sizeof (v4si))
+ || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
+ sizeof (v4si)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-5.c
===================================================================
--- /dev/null 2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-5.c 2019-01-04 12:40:51.986582877 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+ volatile v4df x1 = { 4, 5, 6, 7 };
+ volatile v4df x2 = { 10, 11, 12, 13 };
+ volatile v4df x3 = { 20, 21, 22, 23 };
+
+ x1 *= (v4df) { 0, 1, 2, 3 };
+ x2 *= (v4df) { 1, 0, 2, 0 };
+ x3 *= (v4df) { 0, 0, -1, 0 };
+
+ if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
+ || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
+ sizeof (v4df))
+ || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
+ sizeof (v4df)))
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
2019-01-04 12:44 ` Richard Sandiford
@ 2019-01-07 11:35 ` Richard Biener
0 siblings, 0 replies; 7+ messages in thread
From: Richard Biener @ 2019-01-07 11:35 UTC (permalink / raw)
To: Jakub Jelinek, Eric Botcazou, GCC Patches, Richard Sandiford
On Fri, Jan 4, 2019 at 1:44 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Jakub Jelinek <jakub@redhat.com> writes:
> > On Fri, Jan 04, 2019 at 12:13:13PM +0000, Richard Sandiford wrote:
> >> > Can we avoid the gratuitous use of template here? We were told that C++ would
> >> > be used only when it makes things more straightforward and it's the contrary
> >> > in this case, to wit the need for the ugly RECURSE macro in the middle.
> >>
> >> I did it that way so that it would be easy to add things like
> >> zero_or_minus_onep without cut-&-pasting the whole structure.
> >
> > IMHO we can make such a change only when it is needed.
>
> The other predicates in tree.c suggest that we won't though.
> E.g. there was never any attempt to unify integer_zerop vs. integer_onep
> and real_zerop vs. real_onep.
>
> >> The way to do that in C would be to use a macro for the full
> >> function, but that's even uglier due to the extra backslashes.
> >
> > Or just make the function static inline and pass the function pointers
> > to it as arguments? If it is inlined, it will be the same, it could be
> > even always_inline if that is really needed.
>
> For that to work for recursive functions I think we'd need to pass the
> caller predicate in too, which means one more function pointer overall.
>
> Anyway, here's the patch without the template.
OK.
Thanks,
Richard.
> Thanks,
> Richard
>
>
> 2019-01-04 Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/
> PR tree-optimization/88598
> * tree.h (initializer_each_zero_or_onep): Declare.
> * tree.c (initializer_each_zero_or_onep): New function.
> (signed_or_unsigned_type_for): Handle float types too.
> (unsigned_type_for, signed_type_for): Update comments accordingly.
> * match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
> x & { 0 or -1, 0 or -1, ... }.
>
> gcc/testsuite/
> PR tree-optimization/88598
> * gcc.dg/pr88598-1.c: New test.
> * gcc.dg/pr88598-2.c: Likewise.
> * gcc.dg/pr88598-3.c: Likewise.
> * gcc.dg/pr88598-4.c: Likewise.
> * gcc.dg/pr88598-5.c: Likewise.
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h 2019-01-04 12:40:51.000000000 +0000
> +++ gcc/tree.h 2019-01-04 12:40:51.990582844 +0000
> @@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
> combinations indicate definitive answers. */
>
> extern bool initializer_zerop (const_tree, bool * = NULL);
> +extern bool initializer_each_zero_or_onep (const_tree);
>
> extern wide_int vector_cst_int_elt (const_tree, unsigned int);
> extern tree vector_cst_elt (const_tree, unsigned int);
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c 2019-01-04 12:40:51.000000000 +0000
> +++ gcc/tree.c 2019-01-04 12:40:51.990582844 +0000
> @@ -11229,6 +11229,45 @@ initializer_zerop (const_tree init, bool
> }
> }
>
> +/* Return true if EXPR is an initializer expression in which every element
> + is a constant that is numerically equal to 0 or 1. The elements do not
> + need to be equal to each other. */
> +
> +bool
> +initializer_each_zero_or_onep (const_tree expr)
> +{
> + STRIP_ANY_LOCATION_WRAPPER (expr);
> +
> + switch (TREE_CODE (expr))
> + {
> + case INTEGER_CST:
> + return integer_zerop (expr) || integer_onep (expr);
> +
> + case REAL_CST:
> + return real_zerop (expr) || real_onep (expr);
> +
> + case VECTOR_CST:
> + {
> + unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
> + if (VECTOR_CST_STEPPED_P (expr)
> + && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
> + return false;
> +
> + for (unsigned int i = 0; i < nelts; ++i)
> + {
> + tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
> + if (!initializer_each_zero_or_onep (elt))
> + return false;
> + }
> +
> + return true;
> + }
> +
> + default:
> + return false;
> + }
> +}
> +
> /* Check if vector VEC consists of all the equal elements and
> that the number of elements corresponds to the type of VEC.
> The function returns first element of the vector
> @@ -11672,7 +11711,10 @@ int_cst_value (const_tree x)
>
> /* If TYPE is an integral or pointer type, return an integer type with
> the same precision which is unsigned iff UNSIGNEDP is true, or itself
> - if TYPE is already an integer type of signedness UNSIGNEDP. */
> + if TYPE is already an integer type of signedness UNSIGNEDP.
> + If TYPE is a floating-point type, return an integer type with the same
> + bitsize and with the signedness given by UNSIGNEDP; this is useful
> + when doing bit-level operations on a floating-point value. */
>
> tree
> signed_or_unsigned_type_for (int unsignedp, tree type)
> @@ -11702,17 +11744,23 @@ signed_or_unsigned_type_for (int unsigne
> return build_complex_type (inner2);
> }
>
> - if (!INTEGRAL_TYPE_P (type)
> - && !POINTER_TYPE_P (type)
> - && TREE_CODE (type) != OFFSET_TYPE)
> + unsigned int bits;
> + if (INTEGRAL_TYPE_P (type)
> + || POINTER_TYPE_P (type)
> + || TREE_CODE (type) == OFFSET_TYPE)
> + bits = TYPE_PRECISION (type);
> + else if (TREE_CODE (type) == REAL_TYPE)
> + bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
> + else
> return NULL_TREE;
>
> - return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
> + return build_nonstandard_integer_type (bits, unsignedp);
> }
>
> /* If TYPE is an integral or pointer type, return an integer type with
> the same precision which is unsigned, or itself if TYPE is already an
> - unsigned integer type. */
> + unsigned integer type. If TYPE is a floating-point type, return an
> + unsigned integer type with the same bitsize as TYPE. */
>
> tree
> unsigned_type_for (tree type)
> @@ -11722,7 +11770,8 @@ unsigned_type_for (tree type)
>
> /* If TYPE is an integral or pointer type, return an integer type with
> the same precision which is signed, or itself if TYPE is already a
> - signed integer type. */
> + signed integer type. If TYPE is a floating-point type, return a
> + signed integer type with the same bitsize as TYPE. */
>
> tree
> signed_type_for (tree type)
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd 2019-01-04 12:40:51.000000000 +0000
> +++ gcc/match.pd 2019-01-04 12:40:51.982582910 +0000
> @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
> integer_each_onep integer_truep integer_nonzerop
> real_zerop real_onep real_minus_onep
> zerop
> + initializer_each_zero_or_onep
> CONSTANT_CLASS_P
> tree_expr_nonnegative_p
> tree_expr_nonzero_p
> @@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> || !COMPLEX_FLOAT_TYPE_P (type)))
> (negate @0)))
>
> +/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
> + unless the target has native support for the former but not the latter. */
> +(simplify
> + (mult @0 VECTOR_CST@1)
> + (if (initializer_each_zero_or_onep (@1)
> + && !HONOR_SNANS (type)
> + && !HONOR_SIGNED_ZEROS (type))
> + (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
> + (if (itype
> + && (!VECTOR_MODE_P (TYPE_MODE (type))
> + || (VECTOR_MODE_P (TYPE_MODE (itype))
> + && optab_handler (and_optab,
> + TYPE_MODE (itype)) != CODE_FOR_nothing)))
> + (view_convert (bit_and:itype (view_convert @0)
> + (ne @1 { build_zero_cst (type); })))))))
> +
> (for cmp (gt ge lt le)
> outp (convert convert negate negate)
> outn (negate negate convert convert)
> Index: gcc/testsuite/gcc.dg/pr88598-1.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-1.c 2019-01-04 12:40:51.982582910 +0000
> @@ -0,0 +1,27 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef int v4si __attribute__ ((vector_size (16)));
> +
> +int
> +main ()
> +{
> + volatile v4si x1 = { 4, 5, 6, 7 };
> + volatile v4si x2 = { 10, 11, 12, 13 };
> + volatile v4si x3 = { 20, 21, 22, 23 };
> +
> + x1 *= (v4si) { 0, 1, 1, 0 };
> + x2 *= (v4si) { 1, 0, 0, 1 };
> + x3 *= (v4si) { 0, 0, 1, 0 };
> +
> + if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
> + || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
> + sizeof (v4si))
> + || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
> + sizeof (v4si)))
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-2.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-2.c 2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,30 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> + volatile v4df x1 = { 4, 5, 6, -7 };
> + volatile v4df x2 = { 10, -11, 12, 13 };
> + volatile v4df x3 = { 20, 21, 22, 23 };
> +
> + x1 *= (v4df) { 0, 1, 1, 0 };
> + x2 *= (v4df) { 1, 0, 0, 1 };
> + x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
> +
> + if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
> + sizeof (v4df))
> + || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
> + sizeof (v4df))
> + || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
> + sizeof (v4df)))
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-3.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-3.c 2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,29 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> + volatile v4df x1 = { 4, 5, 6, -7 };
> + volatile v4df x2 = { 10, -11, 12, 13 };
> + volatile v4df x3 = { 20, 21, 22, 23 };
> +
> + x1 *= (v4df) { 0, 1, 1, 0 };
> + x2 *= (v4df) { 1, 0, 0, 1 };
> + x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
> +
> + if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
> + sizeof (v4df))
> + || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
> + sizeof (v4df))
> + || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
> + sizeof (v4df)))
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-4.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-4.c 2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,28 @@
> +/* { dg-do run } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef int v4si __attribute__ ((vector_size (16)));
> +
> +int
> +main ()
> +{
> + volatile v4si x1 = { 4, 5, 6, 7 };
> + volatile v4si x2 = { 10, 11, 12, 13 };
> + volatile v4si x3 = { 20, 21, 22, 23 };
> +
> + x1 *= (v4si) { 0, 1, 2, 3 };
> + x2 *= (v4si) { 1, 0, 2, 0 };
> + x3 *= (v4si) { 0, 0, -1, 0 };
> +
> + if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
> + || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
> + sizeof (v4si))
> + || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
> + sizeof (v4si)))
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
> Index: gcc/testsuite/gcc.dg/pr88598-5.c
> ===================================================================
> --- /dev/null 2018-12-31 11:20:29.178325188 +0000
> +++ gcc/testsuite/gcc.dg/pr88598-5.c 2019-01-04 12:40:51.986582877 +0000
> @@ -0,0 +1,29 @@
> +/* { dg-do run { target double64 } } */
> +/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
> +/* { dg-add-options ieee } */
> +
> +typedef double v4df __attribute__ ((vector_size (32)));
> +
> +int
> +main ()
> +{
> + volatile v4df x1 = { 4, 5, 6, 7 };
> + volatile v4df x2 = { 10, 11, 12, 13 };
> + volatile v4df x3 = { 20, 21, 22, 23 };
> +
> + x1 *= (v4df) { 0, 1, 2, 3 };
> + x2 *= (v4df) { 1, 0, 2, 0 };
> + x3 *= (v4df) { 0, 0, -1, 0 };
> +
> + if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
> + || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
> + sizeof (v4df))
> + || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
> + sizeof (v4df)))
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
> +/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2019-01-07 11:35 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-04 11:43 [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... } Richard Sandiford
2019-01-04 12:04 ` Eric Botcazou
2019-01-04 12:13 ` Richard Sandiford
2019-01-04 12:19 ` Jakub Jelinek
2019-01-04 12:44 ` Richard Sandiford
2019-01-07 11:35 ` Richard Biener
2019-01-04 12:25 ` Eric Botcazou
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).