public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
@ 2019-01-04 11:43 Richard Sandiford
  2019-01-04 12:04 ` Eric Botcazou
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-01-04 11:43 UTC (permalink / raw)
  To: gcc-patches

The PR has:

    vect__6.24_42 = vect__5.23_41 * { 0.0, 1.0e+0, 0.0, 0.0 };

which for -fno-signed-zeros -fno-signaling-nans can be simplified to:

    vect__6.24_42 = vect__5.23_41 & { 0, -1, 0, 0 };

I deliberately didn't handle COMPLEX_CST or CONSTRUCTOR in
initializer_each_zero_or_onep since there are no current use cases.

The patch also makes (un)signed_type_for handle floating-point types.
I tried to audit all callers and the few that handle null returns would
be unaffected.

Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu.
OK to install?

Richard


2019-01-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/88598
	* tree.h (initializer_each_zero_or_onep): Declare.
	* tree.c (initializer_each_a_or_bp): New function.
	(initializer_each_zero_or_onep): Likewise.
	(signed_or_unsigned_type_for): Handle float types too.
	(unsigned_type_for, signed_type_for): Update comments accordingly.
	* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
	x & { 0 or -1, 0 or -1, ... }.

gcc/testsuite/
	PR tree-optimization/88598
	* gcc.dg/pr88598-1.c: New test.
	* gcc.dg/pr88598-2.c: Likewise.
	* gcc.dg/pr88598-3.c: Likewise.
	* gcc.dg/pr88598-4.c: Likewise.
	* gcc.dg/pr88598-5.c: Likewise.

Index: gcc/tree.h
===================================================================
--- gcc/tree.h	2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.h	2019-01-04 11:40:33.141683783 +0000
@@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
    combinations indicate definitive answers.  */
 
 extern bool initializer_zerop (const_tree, bool * = NULL);
+extern bool initializer_each_zero_or_onep (const_tree);
 
 extern wide_int vector_cst_int_elt (const_tree, unsigned int);
 extern tree vector_cst_elt (const_tree, unsigned int);
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	2019-01-04 11:39:24.810266962 +0000
+++ gcc/tree.c	2019-01-04 11:40:33.141683783 +0000
@@ -11229,6 +11229,60 @@ initializer_zerop (const_tree init, bool
     }
 }
 
+/* Return true if EXPR is an initializer expression that consists only
+   of INTEGER_CSTs for which IP0 or IP1 holds and REAL_CSTs for which
+   RP0 or RP1 holds.  The choice between IP0 and IP1, and between
+   RP0 and RP1, can vary from one element to the next.  */
+
+template<bool (*IP0) (const_tree), bool (*IP1) (const_tree),
+	 bool (*RP0) (const_tree), bool (*RP1) (const_tree)>
+bool
+initializer_each_a_or_bp (const_tree expr)
+{
+#define RECURSE(X) initializer_each_a_or_bp<IP0, IP1, RP0, RP1> (X)
+
+  STRIP_ANY_LOCATION_WRAPPER (expr);
+
+  switch (TREE_CODE (expr))
+    {
+    case INTEGER_CST:
+      return IP0 (expr) || IP1 (expr);
+
+    case REAL_CST:
+      return RP0 (expr) || RP1 (expr);
+
+    case VECTOR_CST:
+      {
+	unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
+	if (VECTOR_CST_STEPPED_P (expr)
+	    && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
+	  return false;
+
+	for (unsigned int i = 0; i < nelts; ++i)
+	  if (!RECURSE (VECTOR_CST_ENCODED_ELT (expr, i)))
+	    return false;
+
+	return true;
+      }
+
+    default:
+      return false;
+    }
+
+#undef RECURSE
+}
+
+/* Return true if EXPR is an initializer expression in which every element
+   is a constant that is numerically equal to 0 or 1.  The elements do not
+   need to be equal to each other.  */
+
+bool
+initializer_each_zero_or_onep (const_tree expr)
+{
+  return initializer_each_a_or_bp<integer_zerop, integer_onep,
+				  real_zerop, real_onep> (expr);
+}
+
 /* Check if vector VEC consists of all the equal elements and
    that the number of elements corresponds to the type of VEC.
    The function returns first element of the vector
@@ -11672,7 +11726,10 @@ int_cst_value (const_tree x)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned iff UNSIGNEDP is true, or itself
-   if TYPE is already an integer type of signedness UNSIGNEDP.  */
+   if TYPE is already an integer type of signedness UNSIGNEDP.
+   If TYPE is a floating-point type, return an integer type with the same
+   bitsize and with the signedness given by UNSIGNEDP; this is useful
+   when doing bit-level operations on a floating-point value.  */
 
 tree
 signed_or_unsigned_type_for (int unsignedp, tree type)
@@ -11702,17 +11759,23 @@ signed_or_unsigned_type_for (int unsigne
       return build_complex_type (inner2);
     }
 
-  if (!INTEGRAL_TYPE_P (type)
-      && !POINTER_TYPE_P (type)
-      && TREE_CODE (type) != OFFSET_TYPE)
+  unsigned int bits;
+  if (INTEGRAL_TYPE_P (type)
+      || POINTER_TYPE_P (type)
+      || TREE_CODE (type) == OFFSET_TYPE)
+    bits = TYPE_PRECISION (type);
+  else if (TREE_CODE (type) == REAL_TYPE)
+    bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
+  else
     return NULL_TREE;
 
-  return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
+  return build_nonstandard_integer_type (bits, unsignedp);
 }
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is unsigned, or itself if TYPE is already an
-   unsigned integer type.  */
+   unsigned integer type.  If TYPE is a floating-point type, return an
+   unsigned integer type with the same bitsize as TYPE.  */
 
 tree
 unsigned_type_for (tree type)
@@ -11722,7 +11785,8 @@ unsigned_type_for (tree type)
 
 /* If TYPE is an integral or pointer type, return an integer type with
    the same precision which is signed, or itself if TYPE is already a
-   signed integer type.  */
+   signed integer type.  If TYPE is a floating-point type, return a
+   signed integer type with the same bitsize as TYPE.  */
 
 tree
 signed_type_for (tree type)
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	2019-01-04 11:39:25.798258529 +0000
+++ gcc/match.pd	2019-01-04 11:40:33.137683817 +0000
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.
    integer_each_onep integer_truep integer_nonzerop
    real_zerop real_onep real_minus_onep
    zerop
+   initializer_each_zero_or_onep
    CONSTANT_CLASS_P
    tree_expr_nonnegative_p
    tree_expr_nonzero_p
@@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
            || !COMPLEX_FLOAT_TYPE_P (type)))
    (negate @0)))
 
+/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
+   unless the target has native support for the former but not the latter.  */
+(simplify
+ (mult @0 VECTOR_CST@1)
+ (if (initializer_each_zero_or_onep (@1)
+      && !HONOR_SNANS (type)
+      && !HONOR_SIGNED_ZEROS (type))
+  (with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
+   (if (itype
+	&& (!VECTOR_MODE_P (TYPE_MODE (type))
+	    || (VECTOR_MODE_P (TYPE_MODE (itype))
+		&& optab_handler (and_optab,
+				  TYPE_MODE (itype)) != CODE_FOR_nothing)))
+    (view_convert (bit_and:itype (view_convert @0)
+				 (ne @1 { build_zero_cst (type); })))))))
+
 (for cmp (gt ge lt le)
      outp (convert convert negate negate)
      outn (negate negate convert convert)
Index: gcc/testsuite/gcc.dg/pr88598-1.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-1.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,27 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 1, 0 };
+  x2 *= (v4si) { 1, 0, 0, 1 };
+  x3 *= (v4si) { 0, 0, 1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-2.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-2.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,30 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-3.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-3.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, -7 };
+  volatile v4df x2 = { 10, -11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 1, 0 };
+  x2 *= (v4df) { 1, 0, 0, 1 };
+  x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
+			sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-4.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-4.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+main ()
+{
+  volatile v4si x1 = { 4, 5, 6, 7 };
+  volatile v4si x2 = { 10, 11, 12, 13 };
+  volatile v4si x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4si) { 0, 1, 2, 3 };
+  x2 *= (v4si) { 1, 0, 2, 0 };
+  x3 *= (v4si) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
+      || __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
+			   sizeof (v4si))
+      || __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
+			   sizeof (v4si)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
Index: gcc/testsuite/gcc.dg/pr88598-5.c
===================================================================
--- /dev/null	2018-12-31 11:20:29.178325188 +0000
+++ gcc/testsuite/gcc.dg/pr88598-5.c	2019-01-04 11:40:33.137683817 +0000
@@ -0,0 +1,29 @@
+/* { dg-do run { target double64 } } */
+/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
+/* { dg-add-options ieee } */
+
+typedef double v4df __attribute__ ((vector_size (32)));
+
+int
+main ()
+{
+  volatile v4df x1 = { 4, 5, 6, 7 };
+  volatile v4df x2 = { 10, 11, 12, 13 };
+  volatile v4df x3 = { 20, 21, 22, 23 };
+
+  x1 *= (v4df) { 0, 1, 2, 3 };
+  x2 *= (v4df) { 1, 0, 2, 0 };
+  x3 *= (v4df) { 0, 0, -1, 0 };
+
+  if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
+      || __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
+			   sizeof (v4df))
+      || __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
+			   sizeof (v4df)))
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
+/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-01-07 11:35 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-04 11:43 [1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... } Richard Sandiford
2019-01-04 12:04 ` Eric Botcazou
2019-01-04 12:13   ` Richard Sandiford
2019-01-04 12:19     ` Jakub Jelinek
2019-01-04 12:44       ` Richard Sandiford
2019-01-07 11:35         ` Richard Biener
2019-01-04 12:25     ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).