public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-284] Optimize movzwl + vmovd/vmovq to vmovw.
@ 2022-05-11  7:35 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2022-05-11  7:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1e69bc810272f289e367570cf89d8c72af6124e4

commit r13-284-g1e69bc810272f289e367570cf89d8c72af6124e4
Author: liuhongt <hongtao.liu@intel.com>
Date:   Tue Mar 29 09:21:21 2022 +0800

    Optimize movzwl + vmovd/vmovq to vmovw.
    
    Similarly optimize movl + vmovq to vmovd.
    
    gcc/ChangeLog:
    
            PR target/104915
            * config/i386/sse.md (*vec_set<mode>_0_zero_extendhi): New
            pre_reload define_insn_and_split.
            (*vec_setv2di_0_zero_extendhi_1): Ditto.
            (*vec_set<mode>_0_zero_extendsi): Ditto.
            (*vec_setv2di_0_zero_extendsi_1): Ditto.
            (ssewvecmode): New mode attr.
            (ssewvecmodelower): Ditto.
            (ssepackmodelower): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr104915-vmovd.c: New test.
            * gcc.target/i386/pr104915-vmovw.c: New test.

Diff:
---
 gcc/config/i386/sse.md                         | 94 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr104915-vmovd.c | 25 +++++++
 gcc/testsuite/gcc.target/i386/pr104915-vmovw.c | 45 ++++++++++++
 3 files changed, 164 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 47f8b18b82e..a63df0d0b1f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -985,6 +985,15 @@
    (V32HI "V32HI") (V64QI "V64QI")
    (V32QI "V32QI") (V16QI "V16QI")])
 
+;; Mapping of vector modes to an V*HImode of the same size
+(define_mode_attr ssewvecmode
+  [(V8DI "V32HI") (V4DI "V16HI") (V2DI "V8HI")
+   (V16SI "V32HI") (V8SI "V16HI") (V4SI "V8HI")])
+
+(define_mode_attr ssewvecmodelower
+  [(V8DI "v32hi") (V4DI "v16hi") (V2DI "v8hi")
+   (V16SI "v32hi") (V8SI "v16hi") (V4SI "v8hi")])
+
 (define_mode_attr sseintvecmode2
   [(V8DF "XI") (V4DF "OI") (V2DF "TI")
    (V8SF "OI") (V4SF "TI")
@@ -1194,6 +1203,11 @@
    (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
    (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
 
+(define_mode_attr ssepackmodelower
+  [(V8HI "v16qi") (V4SI "v8hi") (V2DI "v4si")
+   (V16HI "v32qi") (V8SI "v16hi") (V4DI "v8si")
+   (V32HI "v64qi") (V16SI "v32hi") (V8DI "v16si")])
+
 ;; Mapping of the max integer size for xop rotate immediate constraint
 (define_mode_attr sserotatemax
   [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
@@ -10681,6 +10695,46 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+(define_insn_and_split "*vec_set<mode>_0_zero_extendhi"
+  [(set (match_operand:VI48_AVX512F 0 "register_operand")
+	(vec_merge:VI48_AVX512F
+	 (vec_duplicate:VI48_AVX512F
+	  (zero_extend:<ssescalarmode>
+	    (match_operand:HI 1 "nonimmediate_operand")))
+	 (match_operand:VI48_AVX512F 2 "const0_operand")
+	 (const_int 1)))]
+  "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = gen_reg_rtx (<ssewvecmode>mode);
+  emit_insn (gen_vec_set<ssewvecmodelower>_0 (dest,
+					      CONST0_RTX (<ssewvecmode>mode),
+					      operands[1]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, dest, <ssewvecmode>mode));
+  DONE;
+})
+
+(define_insn_and_split "*vec_setv2di_0_zero_extendhi_1"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(vec_concat:V2DI
+	  (zero_extend:DI
+	    (match_operand:HI 1 "nonimmediate_operand"))
+	  (const_int 0)))]
+  "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = gen_reg_rtx (V8HImode);
+  emit_insn (gen_vec_setv8hi_0 (dest, CONST0_RTX (V8HImode), operands[1]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (V2DImode, dest, V8HImode));
+  DONE;
+})
+
 (define_insn "avx512fp16_movsh"
   [(set (match_operand:V8HF 0 "register_operand" "=v")
 	(vec_merge:V8HF
@@ -10750,6 +10804,46 @@
 	   ]
 	   (symbol_ref "true")))])
 
+(define_insn_and_split "*vec_set<mode>_0_zero_extendsi"
+  [(set (match_operand:VI8 0 "register_operand")
+	(vec_merge:VI8
+	 (vec_duplicate:VI8
+	  (zero_extend:DI
+	    (match_operand:SI 1 "nonimmediate_operand")))
+	 (match_operand:VI8 2 "const0_operand")
+	 (const_int 1)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = gen_reg_rtx (<ssepackmode>mode);
+  emit_insn (gen_vec_set<ssepackmodelower>_0 (dest,
+					      CONST0_RTX (<ssepackmode>mode),
+					      operands[1]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (<MODE>mode, dest, <ssepackmode>mode));
+  DONE;
+})
+
+(define_insn_and_split "*vec_setv2di_0_zero_extendsi_1"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(vec_concat:V2DI
+	  (zero_extend:DI
+	    (match_operand:SI 1 "nonimmediate_operand"))
+	  (const_int 0)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx dest = gen_reg_rtx (V4SImode);
+  emit_insn (gen_vec_setv4si_0 (dest, CONST0_RTX (V4SImode), operands[1]));
+  emit_move_insn (operands[0],
+		  lowpart_subreg (V2DImode, dest, V4SImode));
+  DONE;
+})
+
 (define_insn "sse4_1_insertps"
   [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
 	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
diff --git a/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c b/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c
new file mode 100644
index 00000000000..913ff8806f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104915-vmovd.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times {(?n)vmovd[ \t]+} 3 } } */
+/* { dg-final { scan-assembler-not {(?n)movq[ \t]+} } } */
+
+#include<immintrin.h>
+
+__m128i
+foo1 (int* p)
+{
+  return _mm_set_epi64x (0, (unsigned int) ((*(__m32_u *)p)[0]));
+}
+
+__m256i
+foo3 (int* p)
+{
+  return _mm256_set_epi64x (0, 0, 0, (unsigned int) ((*(__m32_u *)p)[0]));
+}
+
+__m512i
+foo5 (int* p)
+{
+  return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, 0,
+			   (unsigned int) ((*(__m32_u *)p)[0]));
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c b/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c
new file mode 100644
index 00000000000..ac47865d17a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104915-vmovw.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)vmovw[ \t]+} 6 } } */
+/* { dg-final { scan-assembler-not {(?n)mov[dq][ \t]+} } } */
+
+#include<immintrin.h>
+__m128i
+foo (short* p)
+{
+  return _mm_set_epi32 (0, 0, 0, (unsigned short) ((*(__m16_u *)p)[0]));
+}
+
+__m128i
+foo1 (short* p)
+{
+  return _mm_set_epi64x (0, (unsigned short) ((*(__m16_u *)p)[0]));
+}
+
+__m256i
+foo2 (short* p)
+{
+  return _mm256_set_epi32 (0, 0, 0, 0, 0, 0, 0,
+			   (unsigned short) ((*(__m16_u *)p)[0]));
+}
+
+__m256i
+foo3 (short* p)
+{
+  return _mm256_set_epi64x (0, 0, 0, (unsigned short) ((*(__m16_u *)p)[0]));
+}
+
+__m512i
+foo4 (short* p)
+{
+  return _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0,
+			   0, 0, 0, 0, 0, 0, 0,
+			   (unsigned short) ((*(__m16_u *)p)[0]));
+}
+
+__m512i
+foo5 (short* p)
+{
+  return _mm512_set_epi64 (0, 0, 0, 0, 0, 0, 0,
+			   (unsigned short) ((*(__m16_u *)p)[0]));
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-11  7:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-11  7:35 [gcc r13-284] Optimize movzwl + vmovd/vmovq to vmovw hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).