* [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
[not found] <20210825081029.3623-1-lingling.kong@intel.com>
@ 2021-08-27 2:02 ` Kong, Lingling
2021-08-27 5:14 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Kong, Lingling @ 2021-08-27 2:02 UTC (permalink / raw)
To: Liu, Hongtao; +Cc: gcc-patches, Kong, Lingling
Hi,
For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we need to refine the pattern to let gcc know mask register also affect the dest.
So we put mask operand into UNSPEC_VSIBADDR.
Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
Ok for master?
gcc/ChangeLog:
PR target/101472
* config/i386/sse.md: (<avx512>scattersi<mode>): Add mask operand to
UNSPEC_VSIBADDR.
(<avx512>scattersi<mode>): Likewise.
(*avx512f_scattersi<VI48F:mode>): Merge mask operand to set_dest.
(*avx512f_scatterdi<VI48F:mode>): Likewise
gcc/testsuite/ChangeLog:
PR target/101472
* gcc.target/i386/avx512f-pr101472.c: New test.
* gcc.target/i386/avx512vl-pr101472.c: New test.
---
gcc/config/i386/sse.md | 20 +++--
.../gcc.target/i386/avx512f-pr101472.c | 49 ++++++++++++
.../gcc.target/i386/avx512vl-pr101472.c | 79 +++++++++++++++++++
3 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 03fc2df1fb0..a3055dbd316 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -24205,8 +24205,9 @@
"TARGET_AVX512F"
{
operands[5]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scattersi<VI48F:mode>"
@@ -24214,10 +24215,11 @@
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand" "n")]
+ (match_operand:SI 4 "const1248_operand" "n")
+ (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
UNSPEC_VSIBADDR)])
(unspec:VI48F
- [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+ [(match_dup 6)
(match_operand:VI48F 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -24243,8 +24245,9 @@
"TARGET_AVX512F"
{
operands[5]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scatterdi<VI48F:mode>"
@@ -24252,10 +24255,11 @@
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand" "n")]
+ (match_operand:SI 4 "const1248_operand" "n")
+ (match_operand:QI 6 "register_operand" "1")]
UNSPEC_VSIBADDR)])
(unspec:VI48F
- [(match_operand:QI 6 "register_operand" "1")
+ [(match_dup 6)
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:QI 1 "=&Yk"))] diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
new file mode 100644
index 00000000000..89c6603c2ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
@@ -0,0 +1,49 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[
+\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
+ __m256i a, __m512i b)
+{
+ _mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
+ _mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1); }
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
+ __m256i idx, __m512i a)
+{
+ _mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
+ _mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1); }
+
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
+ __m256 a, __m512 b)
+{
+ _mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
+ _mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1); }
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
+ __m256i idx, __m512d a)
+{
+ _mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
+ _mm512_mask_i32scatter_pd(addr, k2, idx, a, 1); }
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
new file mode 100644
index 00000000000..6df59a2eb7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
@@ -0,0 +1,79 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[
+\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[
+\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
+\\t\]+#)" 2 } } */
+
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
+ __m256i vindex2, __m128i src_epi32,
+ __m256i src_i32_epi32)
+{
+ _mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+ _mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+ _mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
+ _mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
+
+ _mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+ _mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+ _mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
+ _mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1);
+}
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
+ __m256i vindex2, __m128i src_epi64_mm,
+ __m256i src_epi64)
+{
+ _mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
+ _mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
+ _mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
+ _mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
+
+ _mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
+ _mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
+ _mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
+ _mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1); }
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
+ __m256i vindex2, __m128 src_ps, __m256 src_i32_ps) {
+ _mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
+ _mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
+ _mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
+ _mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
+
+ _mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
+ _mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
+ _mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
+ _mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1); }
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
+ __m256i vindex2, __m128d src_pd_mm, __m256d src_pd) {
+ _mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
+ _mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
+ _mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
+ _mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
+
+ _mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
+ _mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
+ _mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
+ _mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1); }
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
2021-08-27 2:02 ` [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472] Kong, Lingling
@ 2021-08-27 5:14 ` Hongtao Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2021-08-27 5:14 UTC (permalink / raw)
To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches
On Fri, Aug 27, 2021 at 10:03 AM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we need to refine the pattern to let gcc know mask register also affect the dest.
> So we put mask operand into UNSPEC_VSIBADDR.
>
> Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
> Ok for master?
Ok.
>
> gcc/ChangeLog:
>
> PR target/101472
> * config/i386/sse.md: (<avx512>scattersi<mode>): Add mask operand to
> UNSPEC_VSIBADDR.
> (<avx512>scattersi<mode>): Likewise.
> (*avx512f_scattersi<VI48F:mode>): Merge mask operand to set_dest.
> (*avx512f_scatterdi<VI48F:mode>): Likewise
>
> gcc/testsuite/ChangeLog:
>
> PR target/101472
> * gcc.target/i386/avx512f-pr101472.c: New test.
> * gcc.target/i386/avx512vl-pr101472.c: New test.
> ---
> gcc/config/i386/sse.md | 20 +++--
> .../gcc.target/i386/avx512f-pr101472.c | 49 ++++++++++++
> .../gcc.target/i386/avx512vl-pr101472.c | 79 +++++++++++++++++++
> 3 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 03fc2df1fb0..a3055dbd316 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -24205,8 +24205,9 @@
> "TARGET_AVX512F"
> {
> operands[5]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
> - operands[4]), UNSPEC_VSIBADDR);
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
> + operands[4], operands[1]),
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx512f_scattersi<VI48F:mode>"
> @@ -24214,10 +24215,11 @@
> [(unspec:P
> [(match_operand:P 0 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
> - (match_operand:SI 4 "const1248_operand" "n")]
> + (match_operand:SI 4 "const1248_operand" "n")
> + (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
> UNSPEC_VSIBADDR)])
> (unspec:VI48F
> - [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
> + [(match_dup 6)
> (match_operand:VI48F 3 "register_operand" "v")]
> UNSPEC_SCATTER))
> (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))] @@ -24243,8 +24245,9 @@
> "TARGET_AVX512F"
> {
> operands[5]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
> - operands[4]), UNSPEC_VSIBADDR);
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
> + operands[4], operands[1]),
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx512f_scatterdi<VI48F:mode>"
> @@ -24252,10 +24255,11 @@
> [(unspec:P
> [(match_operand:P 0 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
> - (match_operand:SI 4 "const1248_operand" "n")]
> + (match_operand:SI 4 "const1248_operand" "n")
> + (match_operand:QI 6 "register_operand" "1")]
> UNSPEC_VSIBADDR)])
> (unspec:VI48F
> - [(match_operand:QI 6 "register_operand" "1")
> + [(match_dup 6)
> (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
> UNSPEC_SCATTER))
> (clobber (match_scratch:QI 1 "=&Yk"))] diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
> new file mode 100644
> index 00000000000..89c6603c2ff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
> @@ -0,0 +1,49 @@
> +/* PR target/101472 */
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -O2" } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +
> +#include <immintrin.h>
> +
> +void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> + __m256i a, __m512i b)
> +{
> + _mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
> + _mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
> + _mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
> + _mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1); }
> +
> +void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> + __m256i idx, __m512i a)
> +{
> + _mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
> + _mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
> + _mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
> + _mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1); }
> +
> +void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> + __m256 a, __m512 b)
> +{
> + _mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
> + _mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
> + _mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
> + _mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1); }
> +
> +void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex,
> + __m256i idx, __m512d a)
> +{
> + _mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
> + _mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
> + _mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
> + _mm512_mask_i32scatter_pd(addr, k2, idx, a, 1); }
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
> new file mode 100644
> index 00000000000..6df59a2eb7f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
> @@ -0,0 +1,79 @@
> +/* PR target/101472 */
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512vl -O2" } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterqq\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vpscatterdq\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdps\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterqpd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +/* { dg-final { scan-assembler-times "vscatterdpd\[
> +\\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[
> +\\t\]+#)" 2 } } */
> +
> +
> +#include <immintrin.h>
> +
> +void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
> + __m256i vindex2, __m128i src_epi32,
> + __m256i src_i32_epi32)
> +{
> + _mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
> + _mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
> + _mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
> + _mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
> +
> + _mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
> + _mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
> + _mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
> + _mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1);
> +}
> +
> +void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
> + __m256i vindex2, __m128i src_epi64_mm,
> + __m256i src_epi64)
> +{
> + _mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
> + _mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
> + _mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
> + _mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
> +
> + _mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
> + _mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
> + _mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
> + _mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1); }
> +void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
> + __m256i vindex2, __m128 src_ps, __m256 src_i32_ps) {
> + _mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
> + _mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
> + _mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
> + _mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
> +
> + _mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
> + _mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
> + _mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
> + _mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1); }
> +
> +void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1,
> + __m256i vindex2, __m128d src_pd_mm, __m256d src_pd) {
> + _mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
> + _mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
> + _mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
> + _mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
> +
> + _mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
> + _mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
> + _mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
> + _mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1); }
> --
> 2.18.1
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
2021-08-25 6:14 Kong, Lingling
@ 2021-08-25 6:50 ` Hongtao Liu
0 siblings, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2021-08-25 6:50 UTC (permalink / raw)
To: Kong, Lingling; +Cc: Liu, Hongtao, gcc-patches
On Wed, Aug 25, 2021 at 2:14 PM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we
> need to refine the pattern to let gcc know mask register also affect the dest.
> So we put mask operand into UNSPEC_VSIBADDR.
>
> Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
> Ok for master?
>
> gcc/ChangeLog:
>
> *config/i386/sse.md (<avx512>scattersi<mode>): Add mask operand to
> UNSPEC_VSIBADDR.
> (<avx512>scattersi<mode>): Likewise.
> (*avx512f_scattersi<VI48F:mode>): Merge mask operand
> to set_dest.
> (*avx512f_scatterdi<VI48F:mode>): Likewise
>
> gcc/testsuite/ChangeLog:
>
> *gcc.target/i386/avx512f-pr101472.c: New test.
> *gcc.target/i386/avx512vl-pr101472.c: Ditto.
Please follow GCC Coding Convention ChanLog which is described in
https://gcc.gnu.org/codingconventions.html#ChangeLogs.
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]),
UNSPEC_VSIBADDR);
Lines shall be at most 80 columns.
})
(define_insn "*avx512f_scattersi<VI48F:mode>"
@@ -24214,10 +24214,11 @@
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand" "n")]
+ (match_operand:SI 4 "const1248_operand" "n")
+ (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
UNSPEC_VSIBADDR)])
(unspec:VI48F
- [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+ [(match_dup 6)
(match_operand:VI48F 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
@@ -24243,8 +24244,8 @@
"TARGET_AVX512F"
{
operands[5]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]),
UNSPEC_VSIBADDR);
Ditto.
})
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472]
@ 2021-08-25 6:14 Kong, Lingling
2021-08-25 6:50 ` Hongtao Liu
0 siblings, 1 reply; 4+ messages in thread
From: Kong, Lingling @ 2021-08-25 6:14 UTC (permalink / raw)
To: Liu, Hongtao; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 672 bytes --]
Hi,
For avx512f_scattersi<VI48F:mode>, mask operand only affect set src, we
need to refine the pattern to let gcc know mask register also affect the dest.
So we put mask operand into UNSPEC_VSIBADDR.
Bootstrapped and regression tested on x86_64-linux-gnu{-m32,-m64}.
Ok for master?
gcc/ChangeLog:
*config/i386/sse.md (<avx512>scattersi<mode>): Add mask operand to
UNSPEC_VSIBADDR.
(<avx512>scattersi<mode>): Likewise.
(*avx512f_scattersi<VI48F:mode>): Merge mask operand
to set_dest.
(*avx512f_scatterdi<VI48F:mode>): Likewise
gcc/testsuite/ChangeLog:
*gcc.target/i386/avx512f-pr101472.c: New test.
*gcc.target/i386/avx512vl-pr101472.c: Ditto.
[-- Attachment #2: 0001-i386-Fix-wrong-optimization-for-consecutive-masked-s.patch --]
[-- Type: application/octet-stream, Size: 11378 bytes --]
From 413aeca79ea070d570082abe1b2c4788e29cb23b Mon Sep 17 00:00:00 2001
From: konglin1 <lingling.kong@intel.com>
Date: Mon, 9 Aug 2021 11:37:52 +0800
Subject: [PATCH] i386: Fix wrong optimization for consecutive masked scatters
[PR 101472]
gcc/ChangeLog:
*config/i386/sse.md (<avx512>scattersi<mode>): Add mask operand to
UNSPEC_VSIBADDR.
(<avx512>scattersi<mode>): Likewise.
(*avx512f_scattersi<VI48F:mode>): Merge mask operand
to set_dest.
(*avx512f_scatterdi<VI48F:mode>): Likewise
gcc/testsuite/ChangeLog:
*gcc.target/i386/avx512f-pr101472.c: New test.
*gcc.target/i386/avx512vl-pr101472.c: Ditto.
---
gcc/config/i386/sse.md | 18 +++--
.../gcc.target/i386/avx512f-pr101472.c | 49 ++++++++++++
.../gcc.target/i386/avx512vl-pr101472.c | 77 +++++++++++++++++++
3 files changed, 136 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 03fc2df1fb0..1d17c13c744 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -24205,8 +24205,8 @@
"TARGET_AVX512F"
{
operands[5]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]), UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scattersi<VI48F:mode>"
@@ -24214,10 +24214,11 @@
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand" "n")]
+ (match_operand:SI 4 "const1248_operand" "n")
+ (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
UNSPEC_VSIBADDR)])
(unspec:VI48F
- [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+ [(match_dup 6)
(match_operand:VI48F 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
@@ -24243,8 +24244,8 @@
"TARGET_AVX512F"
{
operands[5]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
- operands[4]), UNSPEC_VSIBADDR);
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
+ operands[4], operands[1]), UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scatterdi<VI48F:mode>"
@@ -24252,10 +24253,11 @@
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand" "n")]
+ (match_operand:SI 4 "const1248_operand" "n")
+ (match_operand:QI 6 "register_operand" "1")]
UNSPEC_VSIBADDR)])
(unspec:VI48F
- [(match_operand:QI 6 "register_operand" "1")
+ [(match_dup 6)
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:QI 1 "=&Yk"))]
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
new file mode 100644
index 00000000000..977f6e8eea8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr101472.c
@@ -0,0 +1,49 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, __m256i a,
+ __m512i b)
+{
+ _mm512_mask_i64scatter_epi32(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_epi32(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_epi32(addr, k1, vindex, b, 1);
+ _mm512_mask_i32scatter_epi32(addr, k2, vindex, b, 1);
+}
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, __m256i idx,
+ __m512i a)
+{
+ _mm512_mask_i64scatter_epi64(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_epi64(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_epi64(addr, k1, idx, a, 1);
+ _mm512_mask_i32scatter_epi64(addr, k2, idx, a, 1);
+}
+
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, __m256 a,
+ __m512 b)
+{
+ _mm512_mask_i64scatter_ps(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_ps(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_ps(addr, k1, vindex, b, 1);
+ _mm512_mask_i32scatter_ps(addr, k2, vindex, b, 1);
+}
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m512i vindex, __m256i idx,
+ __m512d a)
+{
+ _mm512_mask_i64scatter_pd(addr, k1, vindex, a, 1);
+ _mm512_mask_i64scatter_pd(addr, k2, vindex, a, 1);
+ _mm512_mask_i32scatter_pd(addr, k1, idx, a, 1);
+ _mm512_mask_i32scatter_pd(addr, k2, idx, a, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
new file mode 100644
index 00000000000..791b9e3ba6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr101472.c
@@ -0,0 +1,77 @@
+/* PR target/101472 */
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */
+
+
+#include <immintrin.h>
+
+void two_scatters_epi32(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, __m256i vindex2,
+ __m128i src_epi32, __m256i src_i32_epi32)
+{
+ _mm_mask_i64scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+ _mm_mask_i64scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+ _mm256_mask_i64scatter_epi32(addr, k1, vindex2, src_epi32, 1);
+ _mm256_mask_i64scatter_epi32(addr, k2, vindex2, src_epi32, 1);
+
+ _mm_mask_i32scatter_epi32(addr, k1, vindex1, src_epi32, 1);
+ _mm_mask_i32scatter_epi32(addr, k2, vindex1, src_epi32, 1);
+ _mm256_mask_i32scatter_epi32(addr, k1, vindex2, src_i32_epi32, 1);
+ _mm256_mask_i32scatter_epi32(addr, k2, vindex2, src_i32_epi32, 1);
+}
+
+void two_scatters_epi64(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, __m256i vindex2,
+ __m128i src_epi64_mm, __m256i src_epi64)
+{
+ _mm_mask_i64scatter_epi64(addr, k1, vindex1, src_epi64_mm, 1);
+ _mm_mask_i64scatter_epi64(addr, k2, vindex1, src_epi64_mm, 1);
+ _mm256_mask_i64scatter_epi64(addr, k1, vindex2, src_epi64, 1);
+ _mm256_mask_i64scatter_epi64(addr, k2, vindex2, src_epi64, 1);
+
+ _mm_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64_mm, 8);
+ _mm_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64_mm, 8);
+ _mm256_mask_i32scatter_epi64(addr, k1, vindex1, src_epi64, 1);
+ _mm256_mask_i32scatter_epi64(addr, k2, vindex1, src_epi64, 1);
+}
+void two_scatters_ps(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, __m256i vindex2,
+ __m128 src_ps, __m256 src_i32_ps)
+{
+ _mm_mask_i64scatter_ps(addr, k1, vindex1, src_ps, 1);
+ _mm_mask_i64scatter_ps(addr, k2, vindex1, src_ps, 1);
+ _mm256_mask_i64scatter_ps(addr, k1, vindex2, src_ps, 1);
+ _mm256_mask_i64scatter_ps(addr, k2, vindex2, src_ps, 1);
+
+ _mm_mask_i32scatter_ps(addr, k1, vindex1, src_ps, 8);
+ _mm_mask_i32scatter_ps(addr, k2, vindex1, src_ps, 8);
+ _mm256_mask_i32scatter_ps(addr, k1, vindex2, src_i32_ps, 1);
+ _mm256_mask_i32scatter_ps(addr, k2, vindex2, src_i32_ps, 1);
+}
+
+void two_scatters_pd(void* addr, __mmask8 k1, __mmask8 k2, __m128i vindex1, __m256i vindex2,
+ __m128d src_pd_mm, __m256d src_pd)
+{
+ _mm_mask_i64scatter_pd(addr, k1, vindex1, src_pd_mm, 1);
+ _mm_mask_i64scatter_pd(addr, k2, vindex1, src_pd_mm, 1);
+ _mm256_mask_i64scatter_pd(addr, k1, vindex2, src_pd, 1);
+ _mm256_mask_i64scatter_pd(addr, k2, vindex2, src_pd, 1);
+
+ _mm_mask_i32scatter_pd(addr, k1, vindex1, src_pd_mm, 8);
+ _mm_mask_i32scatter_pd(addr, k2, vindex1, src_pd_mm, 8);
+ _mm256_mask_i32scatter_pd(addr, k1, vindex1, src_pd, 1);
+ _mm256_mask_i32scatter_pd(addr, k2, vindex1, src_pd, 1);
+}
--
2.18.1
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-08-27 5:08 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20210825081029.3623-1-lingling.kong@intel.com>
2021-08-27 2:02 ` [PATCH] i386: Fix wrong optimization for consecutive masked scatters [PR 101472] Kong, Lingling
2021-08-27 5:14 ` Hongtao Liu
2021-08-25 6:14 Kong, Lingling
2021-08-25 6:50 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).