From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 0E8DE3858D33; Sun, 28 Apr 2024 10:00:43 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0E8DE3858D33 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1714298444; bh=5B0Dz707IZ3y71IRPtBSMlF3AS9Vta/i2STnxMDTOzY=; h=From:To:Subject:Date:From; b=Mo8R5qXCoSaHXiueVUWyQyGNxSgJBQ7MyYvIp48jPXfyP9UhjikNhx4NsPqfMZ0PC A40PCvaq37QqphpABwOHhNHbE9llc24GiCX1y2+DgAebKmbyJDVQaXoffwvX0i1uEb OU4i766i9QISr86lTXOaVNZfX0Qo7PK7TQpH3grI= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r15-22] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: bc07fa6af54cc67f7fc85cc144f9686fad07f205 X-Git-Newrev: c19a674d03847b900919b97d0957c8ae5164f8f1 Message-Id: <20240428100044.0E8DE3858D33@sourceware.org> Date: Sun, 28 Apr 2024 10:00:43 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c19a674d03847b900919b97d0957c8ae5164f8f1 commit r15-22-gc19a674d03847b900919b97d0957c8ae5164f8f1 Author: liuhongt Date: Tue Apr 16 08:37:22 2024 +0800 Adjust alternative *k to ?k for avx512 mask in zero_extend patterns So when both source operand and dest operand require avx512 MASK_REGS, RA can allocate MASK_REGS register instead of GPR to avoid reload it from GPR to MASK_REGS. gcc/ChangeLog: * config/i386/i386.md: (zero_extendsidi2): Adjust alternative *k to ?k. (zero_extenddi2): Ditto. (*zero_extendsi2): Ditto. (*zero_extendqihi2): Ditto. Diff: --- gcc/config/i386/i386.md | 16 ++++----- gcc/testsuite/gcc.target/i386/zero_extendkmask.c | 43 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 80e64c603eb..764bfe20ff2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4569,10 +4569,10 @@ (define_insn "*zero_extendsidi2" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k") + "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k") (zero_extend:DI (match_operand:SI 1 "x86_64_zext_operand" - "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))] + "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,?k,?km")))] "" { switch (get_attr_type (insn)) @@ -4705,9 +4705,9 @@ [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")]) (define_insn "zero_extenddi2" - [(set (match_operand:DI 0 "register_operand" "=r,*r,*k") + [(set (match_operand:DI 0 "register_operand" "=r,?r,?k") (zero_extend:DI - (match_operand:SWI12 1 "nonimmediate_operand" "m,*k,*km")))] + (match_operand:SWI12 1 "nonimmediate_operand" "m,?k,?km")))] "TARGET_64BIT" "@ movz{l|x}\t{%1, %k0|%k0, %1} @@ -4760,9 +4760,9 @@ (set_attr "mode" "SI")]) (define_insn "*zero_extendsi2" - [(set (match_operand:SI 0 "register_operand" "=r,*r,*k") + [(set (match_operand:SI 0 "register_operand" "=r,?r,?k") (zero_extend:SI - (match_operand:SWI12 1 "nonimmediate_operand" "m,*k,*km")))] + (match_operand:SWI12 1 "nonimmediate_operand" "m,?k,?km")))] "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" "@ movz{l|x}\t{%1, %0|%0, %1} @@ -4815,8 +4815,8 @@ ; zero extend to SImode to avoid partial register stalls (define_insn "*zero_extendqihi2" - [(set (match_operand:HI 0 "register_operand" "=r,*r,*k") - (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))] + [(set (match_operand:HI 0 "register_operand" "=r,?r,?k") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))] "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" "@ movz{bl|x}\t{%1, %k0|%k0, %1} diff --git a/gcc/testsuite/gcc.target/i386/zero_extendkmask.c b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c new file mode 100644 index 00000000000..6b18980bbd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c @@ -0,0 +1,43 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not {(?n)shr[bwl]} } } */ +/* { dg-final { scan-assembler-not {(?n)movz[bw]} } } */ + +#include + +__m512 +foo (__m512d a, __m512d b, __m512 c, __m512 d) +{ + return _mm512_mask_mov_ps (c, (__mmask16) (_mm512_cmpeq_pd_mask (a, b) >> 1), d); +} + + +__m512i +foo1 (__m512d a, __m512d b, __m512i c, __m512i d) +{ + return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_pd_mask (a, b) >> 1), d); +} + +__m512i +foo2 (__m512d a, __m512d b, __m512i c, __m512i d) +{ + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_pd_mask (a, b) >> 1), d); +} + +__m512i +foo3 (__m512 a, __m512 b, __m512i c, __m512i d) +{ + return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_ps_mask (a, b) >> 1), d); +} + +__m512i +foo4 (__m512 a, __m512 b, __m512i c, __m512i d) +{ + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_ps_mask (a, b) >> 1), d); +} + +__m512i +foo5 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmp_epi16_mask (a, b, 5) >> 1), d); +}