public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1384] i386: Try to avoid variable permutation instruction [PR101021]
@ 2021-06-11 10:32 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2021-06-11 10:32 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:1fa991d1d74cb1ce96c48ede70ae0be7a9683ce3
commit r12-1384-g1fa991d1d74cb1ce96c48ede70ae0be7a9683ce3
Author: Uros Bizjak <ubizjak@gmail.com>
Date: Fri Jun 11 12:31:42 2021 +0200
i386: Try to avoid variable permutation instruction [PR101021]
Some permutations can be implemented without costly PSHUFB instruction, e.g.:
{ 8,9,10,11,12,13,14,15, 0,1,2,3,4,5,6,7 } with PALIGNR,
{ 0,1,2,3, 4,5,6,7, 4,5,6,7, 12,13,14,15 } with PSHUFD,
{ 0,1, 2,3, 2,3, 6,7, 8,9,10,11,12,13,14,15 } with PSHUFLW and
{ 0,1,2,3,4,5,6,7, 8,9, 10,11, 10,11, 14,15 } with PSHUFHW.
All these instructions have constant shuffle control mask and do not
need to load shuffle mask from a memory to a temporary XMM register.
2021-06-11 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/101021
* config/i386/i386-expand.c (expand_vec_perm_pshufb): Return
false if the permutation can be implemented with constant
permutation instruction in wider mode.
(canonicalize_vector_int_perm): Move above expand_vec_perm_pshufb.
Handle V8QImode and V4HImode.
gcc/testsuite/
PR target/101021
* gcc.target/i386/pr101021-1.c: New test.
* gcc.target/i386/pr101021-2.c: Ditto.
Diff:
---
gcc/config/i386/i386-expand.c | 109 +++++++++++++++--------------
gcc/testsuite/gcc.target/i386/pr101021-1.c | 35 +++++++++
gcc/testsuite/gcc.target/i386/pr101021-2.c | 21 ++++++
3 files changed, 114 insertions(+), 51 deletions(-)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 9ee5257adf9..2fa3a18dc6a 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -17354,6 +17354,59 @@ expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
return true;
}
+/* For V*[QHS]Imode permutations, check if the same permutation
+ can't be performed in a 2x, 4x or 8x wider inner mode. */
+
+static bool
+canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
+ struct expand_vec_perm_d *nd)
+{
+ int i;
+ machine_mode mode = VOIDmode;
+
+ switch (d->vmode)
+ {
+ case E_V8QImode: mode = V4HImode; break;
+ case E_V16QImode: mode = V8HImode; break;
+ case E_V32QImode: mode = V16HImode; break;
+ case E_V64QImode: mode = V32HImode; break;
+ case E_V4HImode: mode = V2SImode; break;
+ case E_V8HImode: mode = V4SImode; break;
+ case E_V16HImode: mode = V8SImode; break;
+ case E_V32HImode: mode = V16SImode; break;
+ case E_V4SImode: mode = V2DImode; break;
+ case E_V8SImode: mode = V4DImode; break;
+ case E_V16SImode: mode = V8DImode; break;
+ default: return false;
+ }
+ for (i = 0; i < d->nelt; i += 2)
+ if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
+ return false;
+ nd->vmode = mode;
+ nd->nelt = d->nelt / 2;
+ for (i = 0; i < nd->nelt; i++)
+ nd->perm[i] = d->perm[2 * i] / 2;
+ if (GET_MODE_INNER (mode) != DImode)
+ canonicalize_vector_int_perm (nd, nd);
+ if (nd != d)
+ {
+ nd->one_operand_p = d->one_operand_p;
+ nd->testing_p = d->testing_p;
+ if (d->op0 == d->op1)
+ nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
+ else
+ {
+ nd->op0 = gen_lowpart (nd->vmode, d->op0);
+ nd->op1 = gen_lowpart (nd->vmode, d->op1);
+ }
+ if (d->testing_p)
+ nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
+ else
+ nd->target = gen_reg_rtx (nd->vmode);
+ }
+ return true;
+}
+
/* Return true if permutation D can be performed as VMODE permutation
instead. */
@@ -17391,6 +17444,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
unsigned i, nelt, eltsz, mask;
unsigned char perm[64];
machine_mode vmode = V16QImode;
+ struct expand_vec_perm_d nd;
rtx rperm[64], vperm, target, op0, op1;
nelt = d->nelt;
@@ -17539,6 +17593,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return false;
}
+ /* Try to avoid variable permutation instruction. */
+ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
+ return false;
+
if (d->testing_p)
return true;
@@ -17617,57 +17675,6 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return true;
}
-/* For V*[QHS]Imode permutations, check if the same permutation
- can't be performed in a 2x, 4x or 8x wider inner mode. */
-
-static bool
-canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
- struct expand_vec_perm_d *nd)
-{
- int i;
- machine_mode mode = VOIDmode;
-
- switch (d->vmode)
- {
- case E_V16QImode: mode = V8HImode; break;
- case E_V32QImode: mode = V16HImode; break;
- case E_V64QImode: mode = V32HImode; break;
- case E_V8HImode: mode = V4SImode; break;
- case E_V16HImode: mode = V8SImode; break;
- case E_V32HImode: mode = V16SImode; break;
- case E_V4SImode: mode = V2DImode; break;
- case E_V8SImode: mode = V4DImode; break;
- case E_V16SImode: mode = V8DImode; break;
- default: return false;
- }
- for (i = 0; i < d->nelt; i += 2)
- if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
- return false;
- nd->vmode = mode;
- nd->nelt = d->nelt / 2;
- for (i = 0; i < nd->nelt; i++)
- nd->perm[i] = d->perm[2 * i] / 2;
- if (GET_MODE_INNER (mode) != DImode)
- canonicalize_vector_int_perm (nd, nd);
- if (nd != d)
- {
- nd->one_operand_p = d->one_operand_p;
- nd->testing_p = d->testing_p;
- if (d->op0 == d->op1)
- nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
- else
- {
- nd->op0 = gen_lowpart (nd->vmode, d->op0);
- nd->op1 = gen_lowpart (nd->vmode, d->op1);
- }
- if (d->testing_p)
- nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
- else
- nd->target = gen_reg_rtx (nd->vmode);
- }
- return true;
-}
-
/* Try to expand one-operand permutation with constant mask. */
static bool
diff --git a/gcc/testsuite/gcc.target/i386/pr101021-1.c b/gcc/testsuite/gcc.target/i386/pr101021-1.c
new file mode 100644
index 00000000000..f4649c00338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101021-1.c
@@ -0,0 +1,35 @@
+/* PR target/101021 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-not "vpshufb" } } */
+
+typedef char S;
+typedef S V __attribute__((vector_size(16 * sizeof(S))));
+
+V t1 (V x)
+{
+ return __builtin_shuffle (x, (V) { 8,9,10,11,12,13,14,15, 0,1,2,3,4,5,6,7 });
+}
+
+/* { dg-final { scan-assembler "vpalignr" } } */
+
+V t2 (V x)
+{
+ return __builtin_shuffle (x, (V) { 0,1,2,3, 4,5,6,7, 4,5,6,7, 12,13,14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshufd" } } */
+
+V t3 (V x)
+{
+ return __builtin_shuffle (x, (V) { 0,1, 2,3, 2,3, 6,7, 8,9,10,11,12,13,14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshuflw" } } */
+
+V t4 (V x)
+{
+ return __builtin_shuffle (x, (V) { 0,1,2,3,4,5,6,7, 8,9, 10,11, 10,11, 14,15 });
+}
+
+/* { dg-final { scan-assembler "vpshufhw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101021-2.c b/gcc/testsuite/gcc.target/i386/pr101021-2.c
new file mode 100644
index 00000000000..1e046f7d990
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101021-2.c
@@ -0,0 +1,21 @@
+/* PR target/101021 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-not "vpshufb" } } */
+
+typedef char S;
+typedef S V __attribute__((vector_size(8 * sizeof(S))));
+
+V t1 (V x)
+{
+ return __builtin_shuffle (x, (V) { 4,5,6,7, 0,1,2,3 });
+}
+
+/* { dg-final { scan-assembler "vpshufd" } } */
+
+V t2 (V x)
+{
+ return __builtin_shuffle (x, (V) { 0,1, 2,3, 2,3, 6,7 });
+}
+
+/* { dg-final { scan-assembler "vpshuflw" } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-06-11 10:32 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-11 10:32 [gcc r12-1384] i386: Try to avoid variable permutation instruction [PR101021] Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).