public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-2837] i386: Allow some V32HImode and V64QImode permutations even without AVX512BW [PR80355]
Date: Tue, 10 Aug 2021 10:38:33 +0000 (GMT)	[thread overview]
Message-ID: <20210810103833.870323858007@sourceware.org> (raw)

https://gcc.gnu.org/g:50b5877925ef5ae8e9f913d6d2b5ce0204ebc588

commit r12-2837-g50b5877925ef5ae8e9f913d6d2b5ce0204ebc588
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Aug 10 12:38:00 2021 +0200

    i386: Allow some V32HImode and V64QImode permutations even without AVX512BW [PR80355]
    
    When working on the PR, I've noticed we generate terrible code for
    V32HImode or V64QImode permutations for -mavx512f -mno-avx512bw.
    Generally we can't do much with such permutations, but since PR68655
    we can handle at least some, those expressible using V16SImode or V8DImode
    permutations, but that wasn't reachable, because ix86_vectorize_vec_perm_const
    didn't even try, it said without TARGET_AVX512BW it can't do anything, and
    with it can do everything, no d.testing_p attempts.
    
    This patch makes it try it for TARGET_AVX512F && !TARGET_AVX512BW.
    
    The first hunk is to avoid ICE, expand_vec_perm_even_odd_1 asserts d->vmode
    isn't V32HImode because expand_vec_perm_1 for AVX512BW handles already
    all permutations, but when we let it through without !TARGET_AVX512BW,
    expand_vec_perm_1 doesn't handle it.
    
    If we want, that hunk can be dropped if we implement in
    expand_vec_perm_even_odd_1 and its helper the even permutation as
    vpmovdw + vpmovdw + vinserti64x4 and odd permutation as
    vpsrld $16 + vpsrld $16 + vpmovdw + vpmovdw + vinserti64x4.
    
    2021-08-10  Jakub Jelinek  <jakub@redhat.com>
    
            PR target/80355
            * config/i386/i386-expand.c (expand_vec_perm_even_odd): Return false
            for V32HImode if !TARGET_AVX512BW.
            (ix86_vectorize_vec_perm_const) <case E_V32HImode, case E_V64QImode>:
            If !TARGET_AVX512BW and TARGET_AVX512F and d.testing_p, don't fail
            early, but actually check the permutation.
    
            * gcc.target/i386/avx512f-pr80355-2.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.c                     | 13 +++++++++----
 gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c | 23 +++++++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index bd21efa9530..c708b33b786 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -20337,6 +20337,11 @@ expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
     if (d->perm[i] != 2 * i + odd)
       return false;
 
+  if (d->vmode == E_V32HImode
+      && d->testing_p
+      && !TARGET_AVX512BW)
+    return false;
+
   return expand_vec_perm_even_odd_1 (d, odd);
 }
 
@@ -20877,16 +20882,16 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
 	return true;
       break;
     case E_V32HImode:
-      if (!TARGET_AVX512BW)
+      if (!TARGET_AVX512F)
 	return false;
-      if (d.testing_p)
+      if (d.testing_p && TARGET_AVX512BW)
 	/* All implementable with a single vperm[it]2 insn.  */
 	return true;
       break;
     case E_V64QImode:
-      if (!TARGET_AVX512BW)
+      if (!TARGET_AVX512F)
 	return false;
-      if (d.testing_p)
+      if (d.testing_p && TARGET_AVX512BW)
 	/* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn.  */
 	return true;
       break;
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c
new file mode 100644
index 00000000000..c510b2f1769
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr80355-2.c
@@ -0,0 +1,23 @@
+/* PR target/80355 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mno-avx512vl -mno-avx512dq -mno-avx512bw" } */
+/* { dg-final { scan-assembler-times "\tvshufi(?:32x4|64x2)\t" 2 } } */
+
+typedef short V __attribute__((vector_size (64)));
+typedef char W __attribute__((vector_size (64)));
+
+W
+f0 (W x)
+{
+  return __builtin_shuffle (x, (W) { 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+				     48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+				     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+				     17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 });
+}
+
+V
+f1 (V x)
+{
+  return __builtin_shuffle (x, (V) { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+				     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+}


                 reply	other threads:[~2021-08-10 10:38 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210810103833.870323858007@sourceware.org \
    --to=jakub@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).