* [PATCH] i386: Add missing vector extend patterns [PR92658]
@ 2023-05-10 20:45 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-05-10 20:45 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 620 bytes --]
Add missing insn pattern for v2qi -> v2si vector extend and named
expanders to activate generation of vector extends to 8-byte and 4-byte
vectors.
gcc/ChangeLog:
PR target/92658
* config/i386/mmx.md (sse4_1_<code>v2qiv2si2): New insn pattern.
(<insn>v4qiv4hi2): New expander.
(<insn>v2hiv2si2): Ditto.
(<insn>v2qiv2si2): Ditto.
(<insn>v2qiv2hi2): Ditto.
gcc/testsuite/ChangeLog:
PR target/92658
* gcc.target/i386/pr92658-sse4-4b.c: New test.
* gcc.target/i386/pr92658-sse4-8b.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5511 bytes --]
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 6dd203f4fa8..e7ca921dd2b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3543,6 +3543,18 @@ (define_insn "sse4_1_<code>v4qiv4hi2"
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_expand "<insn>v4qiv4hi2"
+ [(set (match_operand:V4HI 0 "register_operand")
+ (any_extend:V4HI
+ (match_operand:V4QI 1 "register_operand")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+{
+ rtx op1 = force_reg (V4QImode, operands[1]);
+ op1 = lowpart_subreg (V8QImode, op1, V4QImode);
+ emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1));
+ DONE;
+})
+
(define_insn "sse4_1_<code>v2hiv2si2"
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
(any_extend:V2SI
@@ -3557,6 +3569,44 @@ (define_insn "sse4_1_<code>v2hiv2si2"
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_expand "<insn>v2hiv2si2"
+ [(set (match_operand:V2SI 0 "register_operand")
+ (any_extend:V2SI
+ (match_operand:V2HI 1 "register_operand")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+{
+ rtx op1 = force_reg (V2HImode, operands[1]);
+ op1 = lowpart_subreg (V4HImode, op1, V2HImode);
+ emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1));
+ DONE;
+})
+
+(define_insn "sse4_1_<code>v2qiv2si2"
+ [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V2SI
+ (vec_select:V2QI
+ (match_operand:V4QI 1 "register_operand" "Yr,*x,v")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
+(define_expand "<insn>v2qiv2si2"
+ [(set (match_operand:V2SI 0 "register_operand")
+ (any_extend:V2SI
+ (match_operand:V2QI 1 "register_operand")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+{
+ rtx op1 = force_reg (V2QImode, operands[1]);
+ op1 = lowpart_subreg (V4QImode, op1, V2QImode);
+ emit_insn (gen_sse4_1_<code>v2qiv2si2 (operands[0], op1));
+ DONE;
+})
+
(define_insn "sse4_1_<code>v2qiv2hi2"
[(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw")
(any_extend:V2HI
@@ -3571,6 +3621,18 @@ (define_insn "sse4_1_<code>v2qiv2hi2"
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_expand "<insn>v2qiv2hi2"
+ [(set (match_operand:V2HI 0 "register_operand")
+ (any_extend:V2HI
+ (match_operand:V2QI 1 "register_operand")))]
+ "TARGET_SSE4_1"
+{
+ rtx op1 = force_reg (V2QImode, operands[1]);
+ op1 = lowpart_subreg (V4QImode, op1, V2QImode);
+ emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1));
+ DONE;
+})
+
;; Pack/unpack vector modes
(define_mode_attr mmxpackmode
[(V4HI "V8QI") (V2SI "V4HI")])
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c
new file mode 100644
index 00000000000..f0264a3cbe1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c
@@ -0,0 +1,26 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */
+
+typedef unsigned char v4qi __attribute__((vector_size (4)));
+typedef unsigned short v2hi __attribute__((vector_size (4)));
+
+void
+foo_u8_u16 (v2hi * dst, v4qi * __restrict src)
+{
+ unsigned short tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2hi *) tem;
+}
+
+void
+bar_u8_u16 (v2hi * dst, v4qi src)
+{
+ unsigned short tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c
new file mode 100644
index 00000000000..5c815f51ee3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c
@@ -0,0 +1,71 @@
+/* PR target/92658 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */
+
+typedef unsigned char v8qi __attribute__((vector_size (8)));
+typedef unsigned short v4hi __attribute__((vector_size (8)));
+typedef unsigned int v2si __attribute__((vector_size (8)));
+
+void
+foo_u8_u16 (v4hi * dst, v8qi * __restrict src)
+{
+ unsigned short tem[4];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ tem[2] = (*src)[2];
+ tem[3] = (*src)[3];
+ dst[0] = *(v4hi *) tem;
+}
+
+void
+bar_u8_u16 (v4hi * dst, v8qi src)
+{
+ unsigned short tem[4];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ tem[2] = src[2];
+ tem[3] = src[3];
+ dst[0] = *(v4hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
+
+void
+foo_u8_u32 (v2si * dst, v8qi * __restrict src)
+{
+ unsigned int tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2si *) tem;
+}
+
+void
+bar_u8_u32 (v2si * dst, v8qi src)
+{
+ unsigned int tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxbd" 2 } } */
+
+void
+foo_u16_u32 (v2si * dst, v4hi * __restrict src)
+{
+ unsigned int tem[2];
+ tem[0] = (*src)[0];
+ tem[1] = (*src)[1];
+ dst[0] = *(v2si *) tem;
+}
+
+void
+bar_u16_u32 (v2si * dst, v4hi src)
+{
+ unsigned int tem[2];
+ tem[0] = src[0];
+ tem[1] = src[1];
+ dst[0] = *(v2si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-05-10 20:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-10 20:45 [PATCH] i386: Add missing vector extend patterns [PR92658] Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).