public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6103] Combine vpcmpuw + zero_extend to vpcmpuw.
@ 2021-12-23  5:43 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-12-23  5:43 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1a7ce8570997eb1596c803443d20687b43fa2e47

commit r12-6103-g1a7ce8570997eb1596c803443d20687b43fa2e47
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed Dec 22 16:48:54 2021 +0800

    Combine vpcmpuw + zero_extend to vpcmpuw.
    
    vcmp{ps,ph,pd} and vpcmp{,u}{b,w,d,q} implicitly clear the upper bits
    of dest.
    
    gcc/ChangeLog:
    
            PR target/103750
            * config/i386/sse.md
            (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
            New pre_reload define_insn_and_split.
            (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
            Ditto.
            (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
            Ditto.
            (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
            Ditto.
            (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
            Ditto.
            (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
            Ditto.
            (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
            Ditto.
            (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
            Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/avx512bw-pr103750-1.c: New test.
            * gcc.target/i386/avx512bw-pr103750-2.c: New test.
            * gcc.target/i386/avx512f-pr103750-1.c: New test.
            * gcc.target/i386/avx512f-pr103750-2.c: New test.
            * gcc.target/i386/avx512fp16-pr103750-1.c: New test.
            * gcc.target/i386/avx512fp16-pr103750-2.c: New test.

Diff:
---
 gcc/config/i386/sse.md                             | 275 ++++++++++++
 .../gcc.target/i386/avx512bw-pr103750-1.c          | 154 +++++++
 .../gcc.target/i386/avx512bw-pr103750-2.c          | 173 ++++++++
 gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c | 426 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c | 478 +++++++++++++++++++++
 .../gcc.target/i386/avx512fp16-pr103750-1.c        |  58 +++
 .../gcc.target/i386/avx512fp16-pr103750-2.c        |  71 +++
 7 files changed, 1635 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index cb1c0b1edec..69c754751a8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,77 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+;; Since vpcmpd implicitly clear the upper bits of dest, transform
+;; vpcmpd + zero_extend to vpcmpd since the instruction
+(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
+	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	    UNSPEC_PCMP)))]
+  "TARGET_AVX512F
+   && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+   && ix86_pre_reload_split ()
+   && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<V48H_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMP))]
+{
+  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+				 operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
+	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_PCMP)))
+   (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
+	(unspec:<V48H_AVX512VL:avx512fmaskmode>
+	    [(match_dup 1)
+	     (match_dup 2)
+	     (match_dup 3)]
+	    UNSPEC_PCMP))]
+  "TARGET_AVX512F
+   && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+   && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<V48H_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMP))
+   (set (match_dup 4) (match_dup 0))]
+{
+  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+
 (define_insn_and_split "*<avx512>_cmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(not:<avx512fmaskmode>
@@ -3735,6 +3806,73 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+  && ix86_pre_reload_split ()
+  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMP))]
+{
+  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_PCMP)))
+   (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_dup 1)
+	     (match_dup 2)
+	     (match_dup 3)]
+	    UNSPEC_PCMP))]
+  "TARGET_AVX512BW
+  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_PCMP))
+   (set (match_dup 4) (match_dup 0))]
+{
+  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
 (define_int_iterator UNSPEC_PCMP_ITER
   [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
 
@@ -3771,6 +3909,74 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512BW
+  && ix86_pre_reload_split ()
+  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_UNSIGNED_PCMP))]
+{
+  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_UNSIGNED_PCMP)))
+   (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	    [(match_dup 1)
+	     (match_dup 2)
+	     (match_dup 3)]
+	    UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512BW
+   && ix86_pre_reload_split ()
+   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI12_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_UNSIGNED_PCMP))
+   (set (match_dup 4) (match_dup 0))]
+{
+  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
+
 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -3785,6 +3991,75 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI48_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512F
+   && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+   && ix86_pre_reload_split ()
+   && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI48_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_UNSIGNED_PCMP))]
+{
+  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
+(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
+  [(set (match_operand:SWI248x 0 "register_operand")
+	(zero_extend:SWI248x
+	  (unspec:<VI48_AVX512VL:avx512fmaskmode>
+	    [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
+	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
+	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    UNSPEC_UNSIGNED_PCMP)))
+   (set (match_operand:<VI48_AVX512VL:avx512fmaskmode> 4 "register_operand")
+	(unspec:<VI48_AVX512VL:avx512fmaskmode>
+	    [(match_dup 1)
+	     (match_dup 2)
+	     (match_dup 3)]
+	    UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512F
+   && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
+   && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:<VI48_AVX512VL:avx512fmaskmode>
+	  [(match_dup 1)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_UNSIGNED_PCMP))
+   (set (match_dup 4) (match_dup 0))]
+{
+  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
+  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+				operands[0], <SWI248x:MODE>mode);
+}
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
+
 (define_insn_and_split "*<avx512>_ucmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(not:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
new file mode 100644
index 00000000000..b1165f069bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
@@ -0,0 +1,154 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+unsigned char
+foo ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo1 ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo2 ()
+{
+  __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]);
+  __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo3 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo4 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo5 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]);
+  __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo1 ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo2 ()
+{
+  __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]);
+  __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]);
+  __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
new file mode 100644
index 00000000000..7303f5403ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -0,0 +1,173 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern char a, b;
+void
+foo ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo1 ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo2 ()
+{
+  __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]);
+  __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+foo3 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo4 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo5 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]);
+  __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+sign_foo ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo1 ()
+{
+  __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]);
+  __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo2 ()
+{
+  __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]);
+  __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask32_u8 (mask1, mask2);
+}
+
+void
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]);
+  __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]);
+  __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask32_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c
new file mode 100644
index 00000000000..613efe0f926
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c
@@ -0,0 +1,426 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512f -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern __m128* ps128;
+extern __m256* ps256;
+extern __m512* ps512;
+
+extern __m128d* pd128;
+extern __m256d* pd256;
+extern __m512d* pd512;
+
+unsigned char
+foo ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo1 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo2 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo8 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo9 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo10 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo1 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo2 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo9 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo10 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo1 ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo2 ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+  __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+  __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo8 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo9 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo10 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+float_foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c
new file mode 100644
index 00000000000..a6c2b06747d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c
@@ -0,0 +1,478 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128i* pi128;
+extern __m256i* pi256;
+extern __m512i* pi512;
+
+extern __m128* ps128;
+extern __m256* ps256;
+extern __m512* ps512;
+
+extern __m128d* pd128;
+extern __m256d* pd256;
+extern __m512d* pd512;
+
+extern char a, b;
+void
+foo ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo1 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo2 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+foo8 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo9 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo10 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo1 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo2 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]);
+  __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo9 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo10 ()
+{
+  __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]);
+  __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]);
+  __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]);
+  __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo1 ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo2 ()
+{
+  __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo3 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo4 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo5 ()
+{
+  __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo6 ()
+{
+  __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+  __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+float_foo7 ()
+{
+  __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1);
+  __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+float_foo8 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo9 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo10 ()
+{
+  __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1);
+  __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo11 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo12 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo13 ()
+{
+  __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1);
+  __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo14 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo15 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  a = _kortestz_mask32_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+float_foo16 ()
+{
+  __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1);
+  __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1);
+  a = _kortestz_mask64_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c
new file mode 100644
index 00000000000..eaf6d1e9819
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c
@@ -0,0 +1,58 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128h* ph128;
+extern __m256h* ph256;
+extern __m512h* ph512;
+
+unsigned char
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+  return _kortestz_mask16_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+  __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+  return _kortestz_mask32_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+  __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
+
+unsigned char
+sign_foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1);
+  __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1);
+  return _kortestz_mask64_u8 (mask1, mask2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c
new file mode 100644
index 00000000000..3d3a033fe64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c
@@ -0,0 +1,71 @@
+/* PR target/103750 */
+/* { dg-do compile }  */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
+
+#include <immintrin.h>
+extern __m128h* ph128;
+extern __m256h* ph256;
+extern __m512h* ph512;
+
+extern char a, b;
+void
+sign_foo3 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+  a = _kortestz_mask16_u8 (mask1, mask2);
+  b = _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo4 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+  a =  _kortestz_mask32_u8 (mask1, mask2);
+  b =  _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo5 ()
+{
+  __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1);
+  __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1);
+
+  a =  _kortestz_mask64_u8 (mask1, mask2);
+  b =  _kortestz_mask8_u8 (mask1, mask2);
+}
+
+void
+sign_foo6 ()
+{
+  __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+  __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+
+  a =  _kortestz_mask32_u8 (mask1, mask2);
+  b =  _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo7 ()
+{
+  __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1);
+  __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1);
+
+  a =  _kortestz_mask64_u8 (mask1, mask2);
+  b =  _kortestz_mask16_u8 (mask1, mask2);
+}
+
+void
+sign_foo8 ()
+{
+  __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1);
+  __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1);
+
+  a =  _kortestz_mask64_u8 (mask1, mask2);
+  b =  _kortestz_mask32_u8 (mask1, mask2);
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-12-23  5:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-23  5:43 [gcc r12-6103] Combine vpcmpuw + zero_extend to vpcmpuw hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).