public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r15-22] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns
@ 2024-04-28 10:00 hongtao Liu
0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2024-04-28 10:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:c19a674d03847b900919b97d0957c8ae5164f8f1
commit r15-22-gc19a674d03847b900919b97d0957c8ae5164f8f1
Author: liuhongt <hongtao.liu@intel.com>
Date: Tue Apr 16 08:37:22 2024 +0800
Adjust alternative *k to ?k for avx512 mask in zero_extend patterns
So when both source operand and dest operand require avx512 MASK_REGS, RA
can allocate MASK_REGS register instead of GPR to avoid reload it from
GPR to MASK_REGS.
gcc/ChangeLog:
* config/i386/i386.md: (zero_extendsidi2): Adjust
alternative *k to ?k.
(zero_extend<mode>di2): Ditto.
(*zero_extend<mode>si2): Ditto.
(*zero_extendqihi2): Ditto.
Diff:
---
gcc/config/i386/i386.md | 16 ++++-----
gcc/testsuite/gcc.target/i386/zero_extendkmask.c | 43 ++++++++++++++++++++++++
2 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 80e64c603eb..764bfe20ff2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4569,10 +4569,10 @@
(define_insn "*zero_extendsidi2"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,*r,*k")
+ "=r,?r,?o,r ,o,?*y,?!*y,$r,$v,$x,*x,*v,?r,?k")
(zero_extend:DI
(match_operand:SI 1 "x86_64_zext_operand"
- "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,*k,*km")))]
+ "0 ,rm,r ,rmWz,0,r ,m ,v ,r ,m ,*x,*v,?k,?km")))]
""
{
switch (get_attr_type (insn))
@@ -4705,9 +4705,9 @@
[(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
(define_insn "zero_extend<mode>di2"
- [(set (match_operand:DI 0 "register_operand" "=r,*r,*k")
+ [(set (match_operand:DI 0 "register_operand" "=r,?r,?k")
(zero_extend:DI
- (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
+ (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
"TARGET_64BIT"
"@
movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
@@ -4760,9 +4760,9 @@
(set_attr "mode" "SI")])
(define_insn "*zero_extend<mode>si2"
- [(set (match_operand:SI 0 "register_operand" "=r,*r,*k")
+ [(set (match_operand:SI 0 "register_operand" "=r,?r,?k")
(zero_extend:SI
- (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,*k,*km")))]
+ (match_operand:SWI12 1 "nonimmediate_operand" "<r>m,?k,?km")))]
"!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
"@
movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
@@ -4815,8 +4815,8 @@
; zero extend to SImode to avoid partial register stalls
(define_insn "*zero_extendqihi2"
- [(set (match_operand:HI 0 "register_operand" "=r,*r,*k")
- (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,*k,*km")))]
+ [(set (match_operand:HI 0 "register_operand" "=r,?r,?k")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,?k,?km")))]
"!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
"@
movz{bl|x}\t{%1, %k0|%k0, %1}
diff --git a/gcc/testsuite/gcc.target/i386/zero_extendkmask.c b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
new file mode 100644
index 00000000000..6b18980bbd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/zero_extendkmask.c
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not {(?n)shr[bwl]} } } */
+/* { dg-final { scan-assembler-not {(?n)movz[bw]} } } */
+
+#include<immintrin.h>
+
+__m512
+foo (__m512d a, __m512d b, __m512 c, __m512 d)
+{
+ return _mm512_mask_mov_ps (c, (__mmask16) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+
+__m512i
+foo1 (__m512d a, __m512d b, __m512i c, __m512i d)
+{
+ return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo2 (__m512d a, __m512d b, __m512i c, __m512i d)
+{
+ return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_pd_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo3 (__m512 a, __m512 b, __m512i c, __m512i d)
+{
+ return _mm512_mask_mov_epi16 (c, (__mmask32) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo4 (__m512 a, __m512 b, __m512i c, __m512i d)
+{
+ return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmpeq_ps_mask (a, b) >> 1), d);
+}
+
+__m512i
+foo5 (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+ return _mm512_mask_mov_epi8 (c, (__mmask64) (_mm512_cmp_epi16_mask (a, b, 5) >> 1), d);
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-04-28 10:00 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-28 10:00 [gcc r15-22] Adjust alternative *k to ?k for avx512 mask in zero_extend patterns hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).