public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1384] i386: Try to avoid variable permutation instruction [PR101021]
@ 2021-06-11 10:32 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2021-06-11 10:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1fa991d1d74cb1ce96c48ede70ae0be7a9683ce3

commit r12-1384-g1fa991d1d74cb1ce96c48ede70ae0be7a9683ce3
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Fri Jun 11 12:31:42 2021 +0200

    i386: Try to avoid variable permutation instruction [PR101021]
    
    Some permutations can be implemented without costly PSHUFB instruction, e.g.:
    
    { 8,9,10,11,12,13,14,15, 0,1,2,3,4,5,6,7 } with PALIGNR,
    
    { 0,1,2,3, 4,5,6,7, 4,5,6,7, 12,13,14,15 } with PSHUFD,
    
    { 0,1, 2,3, 2,3, 6,7, 8,9,10,11,12,13,14,15 } with PSHUFLW and
    
    { 0,1,2,3,4,5,6,7, 8,9, 10,11, 10,11, 14,15 } with PSHUFHW.
    
    All these instructions have constant shuffle control mask and do not
    need to load shuffle mask from a memory to a temporary XMM register.
    
    2021-06-11  Uroš Bizjak  <ubizjak@gmail.com>
    
    gcc/
            PR target/101021
            * config/i386/i386-expand.c (expand_vec_perm_pshufb): Return
            false if the permutation can be implemented with constant
            permutation instruction in wider mode.
            (canonicalize_vector_int_perm): Move above expand_vec_perm_pshufb.
            Handle V8QImode and V4HImode.
    
    gcc/testsuite/
    
            PR target/101021
            * gcc.target/i386/pr101021-1.c: New test.
            * gcc.target/i386/pr101021-2.c: Ditto.

Diff:
---
 gcc/config/i386/i386-expand.c              | 109 +++++++++++++++--------------
 gcc/testsuite/gcc.target/i386/pr101021-1.c |  35 +++++++++
 gcc/testsuite/gcc.target/i386/pr101021-2.c |  21 ++++++
 3 files changed, 114 insertions(+), 51 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 9ee5257adf9..2fa3a18dc6a 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -17354,6 +17354,59 @@ expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* For V*[QHS]Imode permutations, check if the same permutation
+   can't be performed in a 2x, 4x or 8x wider inner mode.  */
+
+static bool
+canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
+			      struct expand_vec_perm_d *nd)
+{
+  int i;
+  machine_mode mode = VOIDmode;
+
+  switch (d->vmode)
+    {
+    case E_V8QImode: mode = V4HImode; break;
+    case E_V16QImode: mode = V8HImode; break;
+    case E_V32QImode: mode = V16HImode; break;
+    case E_V64QImode: mode = V32HImode; break;
+    case E_V4HImode: mode = V2SImode; break;
+    case E_V8HImode: mode = V4SImode; break;
+    case E_V16HImode: mode = V8SImode; break;
+    case E_V32HImode: mode = V16SImode; break;
+    case E_V4SImode: mode = V2DImode; break;
+    case E_V8SImode: mode = V4DImode; break;
+    case E_V16SImode: mode = V8DImode; break;
+    default: return false;
+    }
+  for (i = 0; i < d->nelt; i += 2)
+    if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
+      return false;
+  nd->vmode = mode;
+  nd->nelt = d->nelt / 2;
+  for (i = 0; i < nd->nelt; i++)
+    nd->perm[i] = d->perm[2 * i] / 2;
+  if (GET_MODE_INNER (mode) != DImode)
+    canonicalize_vector_int_perm (nd, nd);
+  if (nd != d)
+    {
+      nd->one_operand_p = d->one_operand_p;
+      nd->testing_p = d->testing_p;
+      if (d->op0 == d->op1)
+	nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
+      else
+	{
+	  nd->op0 = gen_lowpart (nd->vmode, d->op0);
+	  nd->op1 = gen_lowpart (nd->vmode, d->op1);
+	}
+      if (d->testing_p)
+	nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
+      else
+	nd->target = gen_reg_rtx (nd->vmode);
+    }
+  return true;
+}
+
 /* Return true if permutation D can be performed as VMODE permutation
    instead.  */
 
@@ -17391,6 +17444,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
   unsigned i, nelt, eltsz, mask;
   unsigned char perm[64];
   machine_mode vmode = V16QImode;
+  struct expand_vec_perm_d nd;
   rtx rperm[64], vperm, target, op0, op1;
 
   nelt = d->nelt;
@@ -17539,6 +17593,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
 	return false;
     }
 
+  /* Try to avoid variable permutation instruction.  */
+  if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
+    return false;
+
   if (d->testing_p)
     return true;
 
@@ -17617,57 +17675,6 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
   return true;
 }
 
-/* For V*[QHS]Imode permutations, check if the same permutation
-   can't be performed in a 2x, 4x or 8x wider inner mode.  */
-
-static bool
-canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
-			      struct expand_vec_perm_d *nd)
-{
-  int i;
-  machine_mode mode = VOIDmode;
-
-  switch (d->vmode)
-    {
-    case E_V16QImode: mode = V8HImode; break;
-    case E_V32QImode: mode = V16HImode; break;
-    case E_V64QImode: mode = V32HImode; break;
-    case E_V8HImode: mode = V4SImode; break;
-    case E_V16HImode: mode = V8SImode; break;
-    case E_V32HImode: mode = V16SImode; break;
-    case E_V4SImode: mode = V2DImode; break;
-    case E_V8SImode: mode = V4DImode; break;
-    case E_V16SImode: mode = V8DImode; break;
-    default: return false;
-    }
-  for (i = 0; i < d->nelt; i += 2)
-    if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
-      return false;
-  nd->vmode = mode;
-  nd->nelt = d->nelt / 2;
-  for (i = 0; i < nd->nelt; i++)
-    nd->perm[i] = d->perm[2 * i] / 2;
-  if (GET_MODE_INNER (mode) != DImode)
-    canonicalize_vector_int_perm (nd, nd);
-  if (nd != d)
-    {
-      nd->one_operand_p = d->one_operand_p;
-      nd->testing_p = d->testing_p;
-      if (d->op0 == d->op1)
-	nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
-      else
-	{
-	  nd->op0 = gen_lowpart (nd->vmode, d->op0);
-	  nd->op1 = gen_lowpart (nd->vmode, d->op1);
-	}
-      if (d->testing_p)
-	nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
-      else
-	nd->target = gen_reg_rtx (nd->vmode);
-    }
-  return true;
-}
-
 /* Try to expand one-operand permutation with constant mask.  */
 
 static bool
diff --git a/gcc/testsuite/gcc.target/i386/pr101021-1.c b/gcc/testsuite/gcc.target/i386/pr101021-1.c
new file mode 100644
index 00000000000..f4649c00338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101021-1.c
@@ -0,0 +1,35 @@
+/* PR target/101021 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-not "vpshufb" } } */
+
+typedef char S;
+typedef S V __attribute__((vector_size(16 * sizeof(S))));
+
+V t1 (V x)
+{
+  return __builtin_shuffle (x, (V) { 8,9,10,11,12,13,14,15, 0,1,2,3,4,5,6,7 });
+}
+
+/* { dg-final { scan-assembler "vpalignr" } } */
+
+V t2 (V x)
+{
+  return __builtin_shuffle (x, (V) { 0,1,2,3, 4,5,6,7, 4,5,6,7, 12,13,14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshufd" } } */
+
+V t3 (V x)
+{
+  return __builtin_shuffle (x, (V) { 0,1, 2,3, 2,3, 6,7, 8,9,10,11,12,13,14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshuflw" } } */
+
+V t4 (V x)
+{
+  return __builtin_shuffle (x, (V) { 0,1,2,3,4,5,6,7, 8,9, 10,11, 10,11, 14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshufhw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101021-2.c b/gcc/testsuite/gcc.target/i386/pr101021-2.c
new file mode 100644
index 00000000000..1e046f7d990
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101021-2.c
@@ -0,0 +1,21 @@
+/* PR target/101021 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-not "vpshufb" } } */
+
+typedef char S;
+typedef S V __attribute__((vector_size(8 * sizeof(S))));
+
+V t1 (V x)
+{
+  return __builtin_shuffle (x, (V) { 4,5,6,7, 0,1,2,3 });
+}
+
+/* { dg-final { scan-assembler "vpshufd" } } */
+
+V t2 (V x)
+{
+  return __builtin_shuffle (x, (V) { 0,1, 2,3, 2,3, 6,7 });
+}
+
+/* { dg-final { scan-assembler "vpshuflw" } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-11 10:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-11 10:32 [gcc r12-1384] i386: Try to avoid variable permutation instruction [PR101021] Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).