public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "Kong, Lingling" <lingling.kong@intel.com>
To: "Liu, Hongtao" <hongtao.liu@intel.com>
Cc: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	"Kong, Lingling" <lingling.kong@intel.com>
Subject: [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
Date: Fri, 27 Aug 2021 02:02:54 +0000	[thread overview]
Message-ID: <DM4PR11MB548729B6E846AB7E3787524EECC89@DM4PR11MB5487.namprd11.prod.outlook.com> (raw)
In-Reply-To: <20210825081029.3623-1-lingling.kong@intel.com>

Hi,

For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we need to refine the pattern to let gcc know mask register also affect the dest.
So we put mask operand into UNSPEC_VSIBADDR.

Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
Ok for master?

gcc/ChangeLog:

	PR target/101472
	* config/i386/sse.md: (<avx512>scattersi<mode>): Add mask operand to
	UNSPEC_VSIBADDR.
	(<avx512>scattersi<mode>): Likewise.
	(*avx512f_scattersi<VI48F:mode>): Merge mask operand to set_dest.
	(*avx512f_scatterdi<VI48F:mode>): Likewise

gcc/testsuite/ChangeLog:

	PR target/101472
	* gcc.target/i386/avx512f-pr101472.c: New test.
	* gcc.target/i386/avx512vl-pr101472.c: New test.
---
 gcc/config/i386/sse.md                        | 20 +++--
 .../gcc.target/i386/avx512f-pr101472.c        | 49 ++++++++++++
 .../gcc.target/i386/avx512vl-pr101472.c       | 79 +++++++++++++++++++
 3 files changed, 140 insertions(+), 8 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 03fc2df1fb0..a3055dbd316 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -24205,8 +24205,9 @@
   "TARGET_AVX512F"
 {
   operands[5]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
-					operands[4]), UNSPEC_VSIBADDR);
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+					operands[4], operands[1]), 
+					UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx512f_scattersi<VI48F:mode>"
@@ -24214,10 +24215,11 @@
 	  [(unspec:P
 	     [(match_operand:P 0 "vsib_address_operand" "Tv")
 	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
-	      (match_operand:SI 4 "const1248_operand" "n")]
+	      (match_operand:SI 4 "const1248_operand" "n")
+	      (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
 	     UNSPEC_VSIBADDR)])
 	(unspec:VI48F
-	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+	  [(match_dup 6)
 	   (match_operand:VI48F 3 "register_operand" "v")]
 	  UNSPEC_SCATTER))
    (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -24243,8 +24245,9 @@
   "TARGET_AVX512F"
 {
   operands[5]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
-					operands[4]), UNSPEC_VSIBADDR);
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+					operands[4], operands[1]), 
+					UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx512f_scatterdi<VI48F:mode>"
@@ -24252,10 +24255,11 @@
 	  [(unspec:P
 	     [(match_operand:P 0 "vsib_address_operand" "Tv")
 	      (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
-	      (match_operand:SI 4 "const1248_operand" "n")]
+	      (match_operand:SI 4 "const1248_operand" "n")
+	      (match_operand:QI 6 "register_operand" "1")]
 	     UNSPEC_VSIBADDR)])
 	(unspec:VI48F
-	  [(match_operand:QI 6 "register_operand" "1")
+	  [(match_dup 6)
 	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
 	  UNSPEC_SCATTER))
    (clobber (match_scratch:QI 1 "=&Yk"))] diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
new file mode 100644
index 00000000000..89c6603c2ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
@@ -0,0 +1,49 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ 
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, 
+			 __m256i a, __m512i b)
+{
+    _mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
+    _mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
+    _mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
+    _mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1); }
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, 
+			 __m256i idx, __m512i a)
+{
+    _mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
+    _mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
+    _mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
+    _mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1); }
+
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, 
+		      __m256 a, __m512 b)
+{
+    _mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
+    _mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
+    _mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
+    _mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1); }
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, 
+		      __m256i idx, __m512d a)
+{
+    _mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
+    _mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
+    _mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
+    _mm512_mask_i32scatter_pd(addr, k2, idx, a, 1); }
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
new file mode 100644
index 00000000000..6df59a2eb7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
@@ -0,0 +1,79 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ 
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ 
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ 
+\\t\]+#)" 2 } } */
+
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, 
+			 __m256i vindex2, __m128i src_epi32, 
+		         __m256i src_i32_epi32)
+{
+    _mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+    _mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+    _mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
+    _mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
+
+    _mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+    _mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+    _mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
+    _mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1); 
+}
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, 
+		         __m256i vindex2, __m128i src_epi64_mm, 
+			 __m256i src_epi64)
+{
+    _mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
+    _mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
+    _mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
+    _mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
+
+    _mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
+    _mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
+    _mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
+    _mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1); } 
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
+		      __m256i vindex2, __m128 src_ps, __m256 src_i32_ps) {
+    _mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
+    _mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
+    _mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
+    _mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
+
+    _mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
+    _mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
+    _mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
+    _mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1); }
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2,  __m128i vindex1,
+		      __m256i vindex2, __m128d src_pd_mm, __m256d src_pd) {
+    _mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
+    _mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
+    _mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
+    _mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
+
+    _mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
+    _mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
+    _mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
+    _mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1); }
--
2.18.1


       reply	other threads:[~2021-08-27  2:02 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20210825081029.3623-1-lingling.kong@intel.com>
2021-08-27  2:02 ` Kong, Lingling [this message]
2021-08-27  5:14   ` Hongtao Liu
2021-08-25  6:14 Kong, Lingling
2021-08-25  6:50 ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM4PR11MB548729B6E846AB7E3787524EECC89@DM4PR11MB5487.namprd11.prod.outlook.com \
    --to=lingling.kong@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).