public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize VEC_PERM_EXPR with same permutation index and operation [PR98167]
@ 2022-11-04  0:04 Hongyu Wang
  2022-11-04  6:43 ` Prathamesh Kulkarni
  2022-11-16 19:21 ` Marc Glisse
  0 siblings, 2 replies; 16+ messages in thread
From: Hongyu Wang @ 2022-11-04  0:04 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.guenther, hongtao.liu

Hi,

This is a follow-up patch for PR98167

The sequence
     c1 = VEC_PERM_EXPR (a, a, mask)
     c2 = VEC_PERM_EXPR (b, b, mask)
     c3 = c1 op c2
can be optimized to
     c = a op b
     c3 = VEC_PERM_EXPR (c, c, mask)
for all integer vector operation, and float operation with
full permutation.

Bootstrapped & regrtested on x86_64-pc-linux-gnu.

Ok for trunk?

gcc/ChangeLog:

	PR target/98167
	* match.pd: New perm + vector op patterns for int and fp vector.

gcc/testsuite/ChangeLog:

	PR target/98167
	* gcc.target/i386/pr98167.c: New test.
---
 gcc/match.pd                            | 49 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr98167.c | 44 ++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr98167.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 194ba8f5188..b85ad34f609 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8189,3 +8189,52 @@ and,
  (bit_and (negate @0) integer_onep@1)
  (if (!TYPE_OVERFLOW_SANITIZED (type))
   (bit_and @0 @1)))
+
+/* Optimize
+   c1 = VEC_PERM_EXPR (a, a, mask)
+   c2 = VEC_PERM_EXPR (b, b, mask)
+   c3 = c1 op c2
+   -->
+   c = a op b
+   c3 = VEC_PERM_EXPR (c, c, mask)
+   For all integer non-div operations.  */
+(for op (plus minus mult bit_and bit_ior bit_xor
+	 lshift rshift)
+ (simplify
+  (op (vec_perm @0 @0 VECTOR_CST@2) (vec_perm @1 @1 VECTOR_CST@2))
+    (if (VECTOR_INTEGER_TYPE_P (type))
+     (vec_perm (op @0 @1) (op @0 @1) @2))))
+
+/* Similar for float arithmetic when permutation constant covers
+   all vector elements.  */
+(for op (plus minus mult)
+ (simplify
+  (op (vec_perm @0 @0 VECTOR_CST@2) (vec_perm @1 @1 VECTOR_CST@2))
+    (if (VECTOR_FLOAT_TYPE_P (type))
+     (with
+      {
+	tree perm_cst = @2;
+	vec_perm_builder builder;
+	bool full_perm_p = false;
+	if (tree_to_vec_perm_builder (&builder, perm_cst))
+	  {
+	    /* Create a vec_perm_indices for the integer vector.  */
+	    int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+	    vec_perm_indices sel (builder, 1, nelts);
+
+	    /* Check if perm indices covers all vector elements.  */
+	    int count = 0, i, j;
+	    for (i = 0; i < nelts; i++)
+	      for (j = 0; j < nelts; j++)
+		{
+		  if (sel[j].to_constant () == i)
+		    {
+		      count++;
+		      break;
+		    }
+		}
+	    full_perm_p = count == nelts;
+	  }
+       }
+       (if (full_perm_p)
+	(vec_perm (op @0 @1) (op @0 @1) @2))))))
diff --git a/gcc/testsuite/gcc.target/i386/pr98167.c b/gcc/testsuite/gcc.target/i386/pr98167.c
new file mode 100644
index 00000000000..40e0ac11332
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98167.c
@@ -0,0 +1,44 @@
+/* PR target/98167 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2" } */
+
+/* { dg-final { scan-assembler-times "vpshufd\t" 8 } } */
+/* { dg-final { scan-assembler-times "vpermilps\t" 3 } } */
+
+#define VEC_PERM_4 \
+  2, 3, 1, 0
+#define VEC_PERM_8 \
+  4, 5, 6, 7, 3, 2, 1, 0
+#define VEC_PERM_16 \
+  8, 9, 10, 11, 12, 13, 14, 15, 7, 6, 5, 4, 3, 2, 1, 0
+
+#define TYPE_PERM_OP(type, size, op, name) \
+  typedef type v##size##s##type __attribute__ ((vector_size(4*size))); \
+  v##size##s##type type##foo##size##i_##name (v##size##s##type a, \
+					      v##size##s##type b) \
+  { \
+    v##size##s##type a1 = __builtin_shufflevector (a, a, \
+						   VEC_PERM_##size); \
+    v##size##s##type b1 = __builtin_shufflevector (b, b, \
+						   VEC_PERM_##size); \
+    return a1 op b1; \
+  }
+
+#define INT_PERMS(op, name) \
+  TYPE_PERM_OP (int, 4, op, name) \
+
+#define FP_PERMS(op, name) \
+  TYPE_PERM_OP (float, 4, op, name) \
+
+INT_PERMS (+, add)
+INT_PERMS (-, sub)
+INT_PERMS (*, mul)
+INT_PERMS (|, ior)
+INT_PERMS (^, xor)
+INT_PERMS (&, and)
+INT_PERMS (<<, shl)
+INT_PERMS (>>, shr)
+FP_PERMS (+, add)
+FP_PERMS (-, sub)
+FP_PERMS (*, mul)
+
-- 
2.18.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-11-17  6:10 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-04  0:04 [PATCH] Optimize VEC_PERM_EXPR with same permutation index and operation [PR98167] Hongyu Wang
2022-11-04  6:43 ` Prathamesh Kulkarni
2022-11-08 14:37   ` Richard Biener
2022-11-10  2:22     ` Hongyu Wang
2022-11-10  8:56       ` Richard Biener
2022-11-10 14:27         ` Hongyu Wang
2022-11-14 14:53           ` Richard Biener
2022-11-16 15:25             ` Tamar Christina
2022-11-16 15:29               ` Richard Biener
2022-11-16 15:30                 ` Richard Biener
2022-11-16 15:34                   ` Tamar Christina
2022-11-16 15:37                   ` Jakub Jelinek
2022-11-16 15:40                     ` Tamar Christina
2022-11-16 15:41                       ` Jakub Jelinek
2022-11-16 19:21 ` Marc Glisse
2022-11-17  6:05   ` Hongyu Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).