* [PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd}.
@ 2021-12-21 6:26 liuhongt
2021-12-23 5:41 ` Hongtao Liu
0 siblings, 1 reply; 2+ messages in thread
From: liuhongt @ 2021-12-21 6:26 UTC (permalink / raw)
To: gcc-patches
The purpose of those define_insn_and_split:
1. Combine vpcmpuw and zero_extend into vpcmpuw.
2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just kmov
3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant kmov.
It should partially fix the issue in PR.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.
gcc/ChangeLog:
PR target/103750
* config/i386/sse.md
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
New define_insn_and_split.
(*<avx512>_cmp<mode>3): Ditto.
(*<avx512>_cmp<mode>3_zero_extenddi): New define_insn.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
New define_insn_and_split.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<mode>3): Ditto.
(*<avx512>_ucmp<mode>3_zero_extenddi): New define_insn.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
New define_insn_and_split.
gcc/testsuite/ChangeLog:
* gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
* g++.target/i386/pr103750-1.C: New test.
---
gcc/config/i386/sse.md | 267 ++++++++++++++++++
gcc/testsuite/g++.target/i386/pr103750-1.C | 50 ++++
.../gcc.target/i386/bitwise_mask_op-3.c | 6 +-
3 files changed, 320 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..fb885d58272 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,75 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can transfrom
+;; duplicated vpcmpuw to vpcmpuw and kmov
+;; Choose biggest mode(DImode) as dest, so kmov can be optimized by cprop_hardreg.
+(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
+ (zero_extend:SWI248x
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<V48H_AVX512VL:cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512BW
+ && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "&& <SWI248x:MODE>mode != E_DImode"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<mode>3"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<MODE>mode) < 64"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) < 64"
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn_and_split "*<avx512>_cmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(not:<avx512fmaskmode>
@@ -3735,6 +3804,72 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<VI12_AVX512VL:cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512BW
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "&& <SWI248x:MODE>mode != E_DImode"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<mode>3"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+ "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_int_iterator UNSPEC_PCMP_ITER
[UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
@@ -3771,6 +3906,72 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512BW
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "&& <SWI248x:MODE>mode != E_DImode"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<mode>3"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_UNSIGNED_PCMP))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3785,6 +3986,72 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
+ (zero_extend:SWI248x
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512BW
+ && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "&& <SWI248x:MODE>mode != E_DImode"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<mode>3"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_UNSIGNED_PCMP))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (zero_extend:DI
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512BW
+ && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn_and_split "*<avx512>_ucmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(not:<avx512fmaskmode>
diff --git a/gcc/testsuite/g++.target/i386/pr103750-1.C b/gcc/testsuite/g++.target/i386/pr103750-1.C
new file mode 100644
index 00000000000..83f471331b3
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103750-1.C
@@ -0,0 +1,50 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=cannonlake -maes -std=c++1y" } */
+/* { dg-final { scan-assembler-times "kmovw" 2 } } */
+/* { dg-final { scan-assembler-times "kmovd" 2 } } */
+/* There shouldn't be any kmovw/kmovd inside the loop. */
+#include <immintrin.h>
+
+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+ __m256i mch256 = _mm256_set1_epi16(c);
+ for ( ; n < e; n += 32) {
+ __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+ __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+ if (_kortestz_mask16_u8(mask1, mask2))
+ continue;
+
+ unsigned idx = _tzcnt_u32(mask1);
+ if (mask1 == 0) {
+ idx = __tzcnt_u16(mask2);
+ n += 16;
+ }
+ return n + idx;
+ }
+ return e;
+}
+
+const char16_t *qustrchr1(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+ __m256i mch256 = _mm256_set1_epi16(c);
+ for ( ; n < e; n += 32) {
+ __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+ __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+ if (_kortestz_mask32_u8(mask1, mask2))
+ continue;
+
+ unsigned idx = _tzcnt_u32(mask1);
+ if (mask1 == 0) {
+ idx = __tzcnt_u16(mask2);
+ n += 16;
+ }
+ return n + idx;
+ }
+ return e;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
index 352c49d6c6b..82bb99e30af 100644
--- a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
+++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
@@ -12,7 +12,7 @@ foo_orb (__m512i a, __m512i b)
foo = m1 | m2;
}
-/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail { *-*-* && { ! ia32 } } } } } */
void
foo_xorb (__m512i a, __m512i b)
@@ -22,7 +22,7 @@ foo_xorb (__m512i a, __m512i b)
foo = m1 ^ m2;
}
-/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail { *-*-* && { ! ia32 } } } } } */
void
foo_andb (__m512i a, __m512i b)
@@ -40,4 +40,4 @@ foo_andnb (__m512i a, __m512i b)
foo = m1 & ~m2;
}
-/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail { *-*-* && { ! ia32 } } } } } */
--
2.18.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd}.
2021-12-21 6:26 [PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd} liuhongt
@ 2021-12-23 5:41 ` Hongtao Liu
0 siblings, 0 replies; 2+ messages in thread
From: Hongtao Liu @ 2021-12-23 5:41 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, H. J. Lu
[-- Attachment #1: Type: text/plain, Size: 17963 bytes --]
On Tue, Dec 21, 2021 at 2:27 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> The purpose of those define_insn_and_split:
> 1. Combine vpcmpuw and zero_extend into vpcmpuw.
> 2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just kmov
> 3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant kmov.
Use DImode as dest of zero_extend is too aggressive which causes
several regression.
New patch add define_insn_and_split just combine vpcmpuw and
zero_extend into vpcmpuw.
Here's the patch i'm checking in.
>
> It should partially fix the issue in PR.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ready to push to trunk.
>
> gcc/ChangeLog:
>
> PR target/103750
> * config/i386/sse.md
> (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
> New define_insn_and_split.
> (*<avx512>_cmp<mode>3): Ditto.
> (*<avx512>_cmp<mode>3_zero_extenddi): New define_insn.
> (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
> New define_insn_and_split.
> (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
> Ditto.
> (*<avx512>_ucmp<mode>3): Ditto.
> (*<avx512>_ucmp<mode>3_zero_extenddi): New define_insn.
> (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
> New define_insn_and_split.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
> * g++.target/i386/pr103750-1.C: New test.
> ---
> gcc/config/i386/sse.md | 267 ++++++++++++++++++
> gcc/testsuite/g++.target/i386/pr103750-1.C | 50 ++++
> .../gcc.target/i386/bitwise_mask_op-3.c | 6 +-
> 3 files changed, 320 insertions(+), 3 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 5196149ee32..fb885d58272 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -3702,6 +3702,75 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can transfrom
> +;; duplicated vpcmpuw to vpcmpuw and kmov
> +;; Choose biggest mode(DImode) as dest, so kmov can be optimized by cprop_hardreg.
> +(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
> + [(set (match_operand:SWI248x 0 "register_operand" "=k")
> + (zero_extend:SWI248x
> + (unspec:<V48H_AVX512VL:avx512fmaskmode>
> + [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<V48H_AVX512VL:cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP)))]
> + "TARGET_AVX512BW
> + && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
> + < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
> + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + "&& <SWI248x:MODE>mode != E_DImode"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<V48H_AVX512VL:avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
> +
> +(define_insn_and_split "*<avx512>_cmp<mode>3"
> + [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<MODE>mode) < 64"
> + "#"
> + "&& 1"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
> + [(set (match_operand:DI 0 "register_operand" "=k")
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
> + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP)))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) < 64"
> + "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn_and_split "*<avx512>_cmp<mode>3"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
> (not:<avx512fmaskmode>
> @@ -3735,6 +3804,72 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
> + [(set (match_operand:SWI248x 0 "register_operand" "=k")
> + (zero_extend:SWI248x
> + (unspec:<VI12_AVX512VL:avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<VI12_AVX512VL:cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP)))]
> + "TARGET_AVX512BW
> + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
> + < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
> + "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + "&& <SWI248x:MODE>mode != E_DImode"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<VI12_AVX512VL:avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
> +
> +(define_insn_and_split "*<avx512>_cmp<mode>3"
> + [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
> + "#"
> + "&& 1"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "*<avx512>_cmp<mode>3_zero_extenddi"
> + [(set (match_operand:DI 0 "register_operand" "=k")
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_PCMP)))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
> + "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_int_iterator UNSPEC_PCMP_ITER
> [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
>
> @@ -3771,6 +3906,72 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
> + [(set (match_operand:SWI248x 0 "register_operand" "=k")
> + (zero_extend:SWI248x
> + (unspec:<VI12_AVX512VL:avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "const_0_to_7_operand" "n")]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "TARGET_AVX512BW
> + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
> + < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
> + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + "&& <SWI248x:MODE>mode != E_DImode"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<VI12_AVX512VL:avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
> +
> +(define_insn_and_split "*<avx512>_ucmp<mode>3"
> + [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_UNSIGNED_PCMP))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
> + "#"
> + "&& 1"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
> + [(set (match_operand:DI 0 "register_operand" "=k")
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) < 64"
> + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> (unspec:<avx512fmaskmode>
> @@ -3785,6 +3986,72 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
> (set_attr "prefix" "evex")
> (set_attr "mode" "<sseinsnmode>")])
>
> +(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
> + [(set (match_operand:SWI248x 0 "register_operand" "=k")
> + (zero_extend:SWI248x
> + (unspec:<VI48_AVX512VL:avx512fmaskmode>
> + [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "const_0_to_7_operand" "n")]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "TARGET_AVX512BW
> + && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
> + < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
> + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + "&& <SWI248x:MODE>mode != E_DImode"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<VI48_AVX512VL:avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <SWI248x:MODE>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
> +
> +(define_insn_and_split "*<avx512>_ucmp<mode>3"
> + [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_UNSIGNED_PCMP))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
> + "#"
> + "&& 1"
> + [(set (match_dup 0)
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_dup 1)
> + (match_dup 2)
> + (match_dup 3)]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "operands[0] = lowpart_subreg (DImode, operands[0], <avx512fmaskmode>mode);"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> +(define_insn "*<avx512>_ucmp<mode>3_zero_extenddi"
> + [(set (match_operand:DI 0 "register_operand" "=k")
> + (zero_extend:DI
> + (unspec:<avx512fmaskmode>
> + [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
> + UNSPEC_UNSIGNED_PCMP)))]
> + "TARGET_AVX512BW
> + && GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) < 64"
> + "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + [(set_attr "type" "ssecmp")
> + (set_attr "length_immediate" "1")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "<sseinsnmode>")])
> +
> (define_insn_and_split "*<avx512>_ucmp<mode>3"
> [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
> (not:<avx512fmaskmode>
> diff --git a/gcc/testsuite/g++.target/i386/pr103750-1.C b/gcc/testsuite/g++.target/i386/pr103750-1.C
> new file mode 100644
> index 00000000000..83f471331b3
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr103750-1.C
> @@ -0,0 +1,50 @@
> +/* PR target/103750 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=cannonlake -maes -std=c++1y" } */
> +/* { dg-final { scan-assembler-times "kmovw" 2 } } */
> +/* { dg-final { scan-assembler-times "kmovd" 2 } } */
> +/* There shouldn't be any kmovw/kmovd inside the loop. */
> +#include <immintrin.h>
> +
> +const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
> +{
> + __m256i mch256 = _mm256_set1_epi16(c);
> + for ( ; n < e; n += 32) {
> + __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
> + __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
> + __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
> + __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
> + if (_kortestz_mask16_u8(mask1, mask2))
> + continue;
> +
> + unsigned idx = _tzcnt_u32(mask1);
> + if (mask1 == 0) {
> + idx = __tzcnt_u16(mask2);
> + n += 16;
> + }
> + return n + idx;
> + }
> + return e;
> +}
> +
> +const char16_t *qustrchr1(char16_t *n, char16_t *e, char16_t c) noexcept
> +{
> + __m256i mch256 = _mm256_set1_epi16(c);
> + for ( ; n < e; n += 32) {
> + __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
> + __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
> + __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
> + __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
> + if (_kortestz_mask32_u8(mask1, mask2))
> + continue;
> +
> + unsigned idx = _tzcnt_u32(mask1);
> + if (mask1 == 0) {
> + idx = __tzcnt_u16(mask2);
> + n += 16;
> + }
> + return n + idx;
> + }
> + return e;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
> index 352c49d6c6b..82bb99e30af 100644
> --- a/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
> +++ b/gcc/testsuite/gcc.target/i386/bitwise_mask_op-3.c
> @@ -12,7 +12,7 @@ foo_orb (__m512i a, __m512i b)
> foo = m1 | m2;
> }
>
> -/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "korb\[\t \]" "1" { xfail { *-*-* && { ! ia32 } } } } } */
>
> void
> foo_xorb (__m512i a, __m512i b)
> @@ -22,7 +22,7 @@ foo_xorb (__m512i a, __m512i b)
> foo = m1 ^ m2;
> }
>
> -/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "kxorb\[\t \]" "1" { xfail { *-*-* && { ! ia32 } } } } } */
>
> void
> foo_andb (__m512i a, __m512i b)
> @@ -40,4 +40,4 @@ foo_andnb (__m512i a, __m512i b)
> foo = m1 & ~m2;
> }
>
> -/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "kmovb\[\t \]" "4" { xfail { *-*-* && { ! ia32 } } } } } */
> --
> 2.18.1
>
--
BR,
Hongtao
[-- Attachment #2: 0001-i386-Combine-vpcmpuw-zero_extend-to-vpcmpuw.patch --]
[-- Type: application/octet-stream, Size: 51133 bytes --]
From 19c4a95245aff5aa0d11832211164c9f7e600aed Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 22 Dec 2021 16:48:54 +0800
Subject: [PATCH] [i386] Combine vpcmpuw + zero_extend to vpcmpuw.
vcmp{ps,ph,pd} and vpcmp{,u}{b,w,d,q} implicitly clear the upper bits
of dest.
gcc/ChangeLog:
PR target/103750
* config/i386/sse.md
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
New pre_reload define_insn_and_split.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512bw-pr103750-1.c: New test.
* gcc.target/i386/avx512bw-pr103750-2.c: New test.
* gcc.target/i386/avx512f-pr103750-1.c: New test.
* gcc.target/i386/avx512f-pr103750-2.c: New test.
* gcc.target/i386/avx512fp16-pr103750-1.c: New test.
* gcc.target/i386/avx512fp16-pr103750-2.c: New test.
---
gcc/config/i386/sse.md | 275 ++++++++++
.../gcc.target/i386/avx512bw-pr103750-1.c | 154 ++++++
.../gcc.target/i386/avx512bw-pr103750-2.c | 173 +++++++
.../gcc.target/i386/avx512f-pr103750-1.c | 426 ++++++++++++++++
.../gcc.target/i386/avx512f-pr103750-2.c | 478 ++++++++++++++++++
.../gcc.target/i386/avx512fp16-pr103750-1.c | 58 +++
.../gcc.target/i386/avx512fp16-pr103750-2.c | 71 +++
7 files changed, 1635 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..72cfe85a4b7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,77 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+;; Since vpcmpd implicitly clear the upper bits of dest, transform
+;; vpcmpd + zero_extend to vpcmpd since the instruction
+(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512F
+ && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+ && ix86_pre_reload_split ()
+ && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))]
+{
+ operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))
+ (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512F
+ && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+ && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))
+ (set (match_dup 4) (match_dup 0))]
+{
+ operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
(define_insn_and_split "*<avx512>_cmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(not:<avx512fmaskmode>
@@ -3735,6 +3806,73 @@ (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512BW
+ && ix86_pre_reload_split ()
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))]
+{
+ operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))
+ (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512BW
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP))
+ (set (match_dup 4) (match_dup 0))]
+{
+ operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
(define_int_iterator UNSPEC_PCMP_ITER
[UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
@@ -3771,6 +3909,74 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512BW
+ && ix86_pre_reload_split ()
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))]
+{
+ operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_UNSIGNED_PCMP)))
+ (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))]
+ "TARGET_AVX512BW
+ && ix86_pre_reload_split ()
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))
+ (set (match_dup 4) (match_dup 0))]
+{
+ operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
@@ -3785,6 +3991,75 @@ (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_UNSIGNED_PCMP)))]
+ "TARGET_AVX512F
+ && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+ && ix86_pre_reload_split ()
+ && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))]
+{
+ operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+ [(set (match_operand:SWI248x 0 "register_operand")
+ (zero_extend:SWI248x
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_7_operand")]
+ UNSPEC_UNSIGNED_PCMP)))
+ (set (match_operand:<VI48_AVX512VL:avx512fmaskmode> 4 "register_operand")
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))]
+ "TARGET_AVX512F
+ && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+ && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP))
+ (set (match_dup 4) (match_dup 0))]
+{
+ operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
+ operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+ operands[0], <SWI248x:MODE>mode);
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
(define_insn_and_split "*<avx512>_ucmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(not:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
new file mode 100644
index 00000000000..b1165f069bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
@@ -0,0 +1,154 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+unsigned char
+foo ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo1 ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo2 ()
+{
+ __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]);
+ __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo3 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo4 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo5 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]);
+ __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo1 ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo2 ()
+{
+ __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]);
+ __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]);
+ __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
new file mode 100644
index 00000000000..7303f5403ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -0,0 +1,173 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern char a, b;
+void
+foo ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo1 ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo2 ()
+{
+ __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]);
+ __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+foo3 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo4 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo5 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]);
+ __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+sign_foo ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo1 ()
+{
+ __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+ __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo2 ()
+{
+ __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]);
+ __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+ __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]);
+ __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask32_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c
new file mode 100644
index 00000000000..613efe0f926
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c
@@ -0,0 +1,426 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern __m128* ps128;
+extern __m256* ps256;
+extern __m512* ps512;
+
+extern __m128d* pd128;
+extern __m256d* pd256;
+extern __m512d* pd512;
+
+unsigned char
+foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo2 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo8 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo9 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo10 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo2 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo9 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo10 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo1 ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo2 ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+ __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+ __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo8 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo9 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo10 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c
new file mode 100644
index 00000000000..a6c2b06747d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c
@@ -0,0 +1,478 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern __m128* ps128;
+extern __m256* ps256;
+extern __m512* ps512;
+
+extern __m128d* pd128;
+extern __m256d* pd256;
+extern __m512d* pd512;
+
+extern char a, b;
+void
+foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo2 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo8 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo9 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo10 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo1 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo2 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+ __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo9 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo10 ()
+{
+ __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+ __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+ __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+ __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo1 ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo2 ()
+{
+ __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo3 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo4 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo5 ()
+{
+ __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo6 ()
+{
+ __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+ __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+float_foo7 ()
+{
+ __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+ __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+float_foo8 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo9 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo10 ()
+{
+ __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+ __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo11 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo12 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo13 ()
+{
+ __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+ __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo14 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo15 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo16 ()
+{
+ __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+ __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c
new file mode 100644
index 00000000000..eaf6d1e9819
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c
@@ -0,0 +1,58 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128h* ph128;
+extern __m256h* ph256;
+extern __m512h* ph512;
+
+unsigned char
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+ return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+ __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+ return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+ __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1);
+ __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1);
+ return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c
new file mode 100644
index 00000000000..3d3a033fe64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c
@@ -0,0 +1,71 @@
+/* PR target/103750 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed. */
+
+#include <immintrin.h>
+extern __m128h* ph128;
+extern __m256h* ph256;
+extern __m512h* ph512;
+
+extern char a, b;
+void
+sign_foo3 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+ a = _kortestz_mask16_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+ __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+ __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+ __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+ __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+
+ a = _kortestz_mask32_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+ __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+ __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+ __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1);
+ __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1);
+
+ a = _kortestz_mask64_u8 (mask1, mask2);
+ b = _kortestz_mask32_u8 (mask1, mask2);
+}
--
2.18.1
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-12-23 5:41 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-21 6:26 [PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd} liuhongt
2021-12-23 5:41 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).