public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-6103] Combine vpcmpuw + zero_extend to vpcmpuw. Date: Thu, 23 Dec 2021 05:43:05 +0000 (GMT) [thread overview] Message-ID: <20211223054305.122693858C60@sourceware.org> (raw) https://gcc.gnu.org/g:1a7ce8570997eb1596c803443d20687b43fa2e47 commit r12-6103-g1a7ce8570997eb1596c803443d20687b43fa2e47 Author: liuhongt <hongtao.liu@intel.com> Date: Wed Dec 22 16:48:54 2021 +0800 Combine vpcmpuw + zero_extend to vpcmpuw. vcmp{ps,ph,pd} and vpcmp{,u}{b,w,d,q} implicitly clear the upper bits of dest. gcc/ChangeLog: PR target/103750 * config/i386/sse.md (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>): New pre_reload define_insn_and_split. (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>): Ditto. (*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. (*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. (*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. (*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-pr103750-1.c: New test. * gcc.target/i386/avx512bw-pr103750-2.c: New test. * gcc.target/i386/avx512f-pr103750-1.c: New test. * gcc.target/i386/avx512f-pr103750-2.c: New test. * gcc.target/i386/avx512fp16-pr103750-1.c: New test. * gcc.target/i386/avx512fp16-pr103750-2.c: New test. Diff: --- gcc/config/i386/sse.md | 275 ++++++++++++ .../gcc.target/i386/avx512bw-pr103750-1.c | 154 +++++++ .../gcc.target/i386/avx512bw-pr103750-2.c | 173 ++++++++ gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c | 426 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c | 478 +++++++++++++++++++++ .../gcc.target/i386/avx512fp16-pr103750-1.c | 58 +++ .../gcc.target/i386/avx512fp16-pr103750-2.c | 71 +++ 7 files changed, 1635 insertions(+) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index cb1c0b1edec..69c754751a8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3702,6 +3702,77 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +;; Since vpcmpd implicitly clear the upper bits of dest, transform +;; vpcmpd + zero_extend to vpcmpd since the instruction +(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand" "n")] + UNSPEC_PCMP)))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) + && ix86_pre_reload_split () + && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))] +{ + operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")]) + +(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_PCMP))) + (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand") + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) + && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode)) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<V48H_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP)) + (set (match_dup 4) (match_dup 0))] +{ + operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")]) + (define_insn_and_split "*<avx512>_cmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (not:<avx512fmaskmode> @@ -3735,6 +3806,73 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_PCMP)))] + "TARGET_AVX512BW + && ix86_pre_reload_split () + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))] +{ + operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) + +(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_PCMP))) + (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand") + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP))] + "TARGET_AVX512BW + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode)) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_PCMP)) + (set (match_dup 4) (match_dup 0))] +{ + operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) + (define_int_iterator UNSPEC_PCMP_ITER [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) @@ -3771,6 +3909,74 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_UNSIGNED_PCMP)))] + "TARGET_AVX512BW + && ix86_pre_reload_split () + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))] +{ + operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) + +(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_UNSIGNED_PCMP))) + (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand") + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))] + "TARGET_AVX512BW + && ix86_pre_reload_split () + && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode)) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI12_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP)) + (set (match_dup 4) (match_dup 0))] +{ + operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) + (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -3785,6 +3991,75 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_UNSIGNED_PCMP)))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) + && ix86_pre_reload_split () + && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))] +{ + operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")]) + +(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2" + [(set (match_operand:SWI248x 0 "register_operand") + (zero_extend:SWI248x + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_7_operand")] + UNSPEC_UNSIGNED_PCMP))) + (set (match_operand:<VI48_AVX512VL:avx512fmaskmode> 4 "register_operand") + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP))] + "TARGET_AVX512F + && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW) + && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode) + < GET_MODE_PRECISION (<SWI248x:MODE>mode)) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<VI48_AVX512VL:avx512fmaskmode> + [(match_dup 1) + (match_dup 2) + (match_dup 3)] + UNSPEC_UNSIGNED_PCMP)) + (set (match_dup 4) (match_dup 0))] +{ + operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]); + operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode, + operands[0], <SWI248x:MODE>mode); +} + [(set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")]) + (define_insn_and_split "*<avx512>_ucmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (not:<avx512fmaskmode> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c new file mode 100644 index 00000000000..b1165f069bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c @@ -0,0 +1,154 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +unsigned char +foo () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c new file mode 100644 index 00000000000..7303f5403ba --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c @@ -0,0 +1,173 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern char a, b; +void +foo () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epu8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epu8_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epu8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epu8_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epu16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu16_mask (pi128[1], pi128[2]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epu16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epu16_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epu16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epu16_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +sign_foo () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo1 () +{ + __mmask16 mask1 = _mm_cmpeq_epi8_mask (pi128[0], pi128[1]); + __mmask16 mask2 = _mm_cmpeq_epi8_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo2 () +{ + __mmask32 mask1 = _mm256_cmpeq_epi8_mask (pi256[0], pi256[1]); + __mmask32 mask2 = _mm256_cmpeq_epi8_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} + +void +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmpeq_epi16_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi16_mask (pi128[1], pi128[2]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmpeq_epi16_mask (pi256[0], pi256[1]); + __mmask16 mask2 = _mm256_cmpeq_epi16_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmpeq_epi16_mask (pi512[0], pi512[1]); + __mmask32 mask2 = _mm512_cmpeq_epi16_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c new file mode 100644 index 00000000000..613efe0f926 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-1.c @@ -0,0 +1,426 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern __m128* ps128; +extern __m256* ps256; +extern __m512* ps512; + +extern __m128d* pd128; +extern __m256d* pd256; +extern __m512d* pd512; + +unsigned char +foo () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo1 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo2 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo3 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo4 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo5 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo6 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo7 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo8 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo9 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo10 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo11 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo12 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo13 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +float_foo14 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +float_foo15 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +float_foo16 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c new file mode 100644 index 00000000000..a6c2b06747d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr103750-2.c @@ -0,0 +1,478 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128i* pi128; +extern __m256i* pi256; +extern __m512i* pi512; + +extern __m128* ps128; +extern __m256* ps256; +extern __m512* ps512; + +extern __m128d* pd128; +extern __m256d* pd256; +extern __m512d* pd512; + +extern char a, b; +void +foo () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epu32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu32_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu32_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epu32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epu32_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epu64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epu64_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epu64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epu64_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epu64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epu64_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo1 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo2 () +{ + __mmask8 mask1 = _mm_cmpeq_epi32_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi32_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo3 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi32_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi32_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm512_cmpeq_epi32_mask (pi512[0], pi512[1]); + __mmask16 mask2 = _mm512_cmpeq_epi32_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo9 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo10 () +{ + __mmask8 mask1 = _mm_cmpeq_epi64_mask (pi128[0], pi128[1]); + __mmask8 mask2 = _mm_cmpeq_epi64_mask (pi128[2], pi128[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo11 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo12 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo13 () +{ + __mmask8 mask1 = _mm256_cmpeq_epi64_mask (pi256[0], pi256[1]); + __mmask8 mask2 = _mm256_cmpeq_epi64_mask (pi256[2], pi256[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo14 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo15 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo16 () +{ + __mmask8 mask1 = _mm512_cmpeq_epi64_mask (pi512[0], pi512[1]); + __mmask8 mask2 = _mm512_cmpeq_epi64_mask (pi512[2], pi512[3]); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo1 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo2 () +{ + __mmask8 mask1 = _mm_cmp_ps_mask (ps128[0], ps128[1], 1); + __mmask8 mask2 = _mm_cmp_ps_mask (ps128[2], ps128[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo3 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo4 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo5 () +{ + __mmask8 mask1 = _mm256_cmp_ps_mask (ps256[0], ps256[1], 1); + __mmask8 mask2 = _mm256_cmp_ps_mask (ps256[2], ps256[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo6 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +float_foo7 () +{ + __mmask16 mask1 = _mm512_cmp_ps_mask (ps512[0], ps512[1], 1); + __mmask16 mask2 = _mm512_cmp_ps_mask (ps512[2], ps512[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +float_foo8 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo9 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo10 () +{ + __mmask8 mask1 = _mm_cmp_pd_mask (pd128[0], pd128[1], 1); + __mmask8 mask2 = _mm_cmp_pd_mask (pd128[2], pd128[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo11 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo12 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo13 () +{ + __mmask8 mask1 = _mm256_cmp_pd_mask (pd256[0], pd256[1], 1); + __mmask8 mask2 = _mm256_cmp_pd_mask (pd256[2], pd256[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo14 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo15 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +float_foo16 () +{ + __mmask8 mask1 = _mm512_cmp_pd_mask (pd512[0], pd512[1], 1); + __mmask8 mask2 = _mm512_cmp_pd_mask (pd512[2], pd512[3], 1); + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c new file mode 100644 index 00000000000..eaf6d1e9819 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-1.c @@ -0,0 +1,58 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128h* ph128; +extern __m256h* ph256; +extern __m512h* ph512; + +unsigned char +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask16_u8 (mask1, mask2); +} + +unsigned char +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + return _kortestz_mask32_u8 (mask1, mask2); +} + +unsigned char +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} + +unsigned char +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1); + __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1); + return _kortestz_mask64_u8 (mask1, mask2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c new file mode 100644 index 00000000000..3d3a033fe64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-pr103750-2.c @@ -0,0 +1,71 @@ +/* PR target/103750 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */ +/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ +/* xfail need to be fixed. */ + +#include <immintrin.h> +extern __m128h* ph128; +extern __m256h* ph256; +extern __m512h* ph512; + +extern char a, b; +void +sign_foo3 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask16_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo4 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo5 () +{ + __mmask8 mask1 = _mm_cmp_ph_mask (ph128[0], ph128[1], 1); + __mmask8 mask2 = _mm_cmp_ph_mask (ph128[1], ph128[2], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask8_u8 (mask1, mask2); +} + +void +sign_foo6 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + + a = _kortestz_mask32_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo7 () +{ + __mmask16 mask1 = _mm256_cmp_ph_mask (ph256[0], ph256[1], 1); + __mmask16 mask2 = _mm256_cmp_ph_mask (ph256[2], ph256[3], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask16_u8 (mask1, mask2); +} + +void +sign_foo8 () +{ + __mmask32 mask1 = _mm512_cmp_ph_mask (ph512[0], ph512[1], 1); + __mmask32 mask2 = _mm512_cmp_ph_mask (ph512[2], ph512[3], 1); + + a = _kortestz_mask64_u8 (mask1, mask2); + b = _kortestz_mask32_u8 (mask1, mask2); +}
reply other threads:[~2021-12-23 5:43 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20211223054305.122693858C60@sourceware.org \ --to=liuhongt@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).