public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/c++-modules] Add missing vector truncmn2 expanders [PR92658]
@ 2020-06-10 16:41 Nathan Sidwell
  0 siblings, 0 replies; only message in thread
From: Nathan Sidwell @ 2020-06-10 16:41 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e740f3d73144abbca1ad98a04825c6bd63314a0b

commit e740f3d73144abbca1ad98a04825c6bd63314a0b
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed May 20 15:53:14 2020 +0800

    Add missing vector truncmn2 expanders [PR92658]
    
    2020-05-22  Hongtao.liu  <hongtao.liu@intel.com>
    
    gcc/ChangeLog:
            PR target/92658
            * config/i386/sse.md (trunc<pmov_src_lower><mode>2): New expander
            (truncv32hiv32qi2): Ditto.
            (trunc<ssedoublemodelower><mode>2): Ditto.
            (trunc<mode><pmov_dst_3>2): Ditto.
            (trunc<mode><pmov_dst_mode_4>2): Ditto.
            (truncv2div2si2): Ditto.
            (truncv8div8qi2): Ditto.
            (avx512f_<code>v8div16qi2): Renaming from *avx512f_<code>v8div16qi2.
            (avx512vl_<code>v2div2si): Renaming from *avx512vl_<code>v2div2si2.
            (avx512vl_<code><mode>v2<ssecakarnum>qi2): Renaming
            from *avx512vl_<code><mode>v<ssescalarnum>qi2.
    
    gcc/testsuite/ChangeLog:
            * gcc.target/i386/pr92658-avx512f.c: New test.
            * gcc.target/i386/pr92658-avx512vl.c: Ditto.
            * gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto.

Diff:
---
 gcc/ChangeLog                                      |  15 +++
 gcc/config/i386/sse.md                             |  77 +++++++++++-
 gcc/testsuite/ChangeLog                            |   6 +
 .../gcc.target/i386/pr92658-avx512bw-trunc.c       |  91 +++++++++++++++
 gcc/testsuite/gcc.target/i386/pr92658-avx512f.c    | 106 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c   | 129 +++++++++++++++++++++
 6 files changed, 420 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9106cdb2e72..4049ac308e2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2020-05-22  Hongtao.liu  <hongtao.liu@intel.com>
+
+	PR target/92658
+	* config/i386/sse.md (trunc<pmov_src_lower><mode>2): New expander
+	(truncv32hiv32qi2): Ditto.
+	(trunc<ssedoublemodelower><mode>2): Ditto.
+	(trunc<mode><pmov_dst_3>2): Ditto.
+	(trunc<mode><pmov_dst_mode_4>2): Ditto.
+	(truncv2div2si2): Ditto.
+	(truncv8div8qi2): Ditto.
+	(avx512f_<code>v8div16qi2): Renaming from *avx512f_<code>v8div16qi2.
+	(avx512vl_<code>v2div2si): Renaming from *avx512vl_<code>v2div2si2.
+	(avx512vl_<code><mode>v2<ssecakarnum>qi2): Renaming from
+	*avx512vl_<code><mode>v<ssescalarnum>qi2.
+
 2020-05-22  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR target/95258
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5071fb2895a..bb8ee19b64b 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10513,6 +10513,12 @@
 (define_mode_attr pmov_suff_1
   [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
 
+(define_expand "trunc<pmov_src_lower><mode>2"
+  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
+	(truncate:PMOV_DST_MODE_1
+	  (match_operand:<pmov_src_mode> 1 "register_operand")))]
+  "TARGET_AVX512F")
+
 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
   [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:PMOV_DST_MODE_1
@@ -10547,6 +10553,12 @@
       (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
+(define_expand "truncv32hiv32qi2"
+  [(set (match_operand:V32QI 0 "nonimmediate_operand")
+	(truncate:V32QI
+	  (match_operand:V32HI 1 "register_operand")))]
+  "TARGET_AVX512BW")
+
 (define_insn "avx512bw_<code>v32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:V32QI
@@ -10586,6 +10598,12 @@
 (define_mode_attr pmov_suff_2
   [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
 
+(define_expand "trunc<ssedoublemodelower><mode>2"
+  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
+	(truncate:PMOV_DST_MODE_2
+	  (match_operand:<ssedoublemode> 1 "register_operand")))]
+  "TARGET_AVX512VL")
+
 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
   [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:PMOV_DST_MODE_2
@@ -10628,7 +10646,20 @@
 (define_mode_attr pmov_suff_3
   [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
 
-(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
+(define_expand "trunc<mode><pmov_dst_3>2"
+  [(set (match_operand:<pmov_dst_3> 0 "register_operand")
+	(truncate:<pmov_dst_3>
+	  (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
+  "TARGET_AVX512VL"
+{
+  operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
+  emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
+							    operands[1],
+							    CONST0_RTX (<pmov_dst_zeroed_3>mode)));
+  DONE;
+})
+
+(define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
     (vec_concat:V16QI
       (any_truncate:<pmov_dst_3>
@@ -10920,7 +10951,21 @@
 (define_mode_attr pmov_suff_4
   [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
 
-(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
+(define_expand "trunc<mode><pmov_dst_4>2"
+  [(set (match_operand:<pmov_dst_4> 0 "register_operand")
+	(truncate:<pmov_dst_4>
+	  (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
+  "TARGET_AVX512VL"
+{
+  operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
+  emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
+							    operands[1],
+							    CONST0_RTX (<pmov_dst_zeroed_4>mode)));
+  DONE;
+
+})
+
+(define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
     (vec_concat:V8HI
       (any_truncate:<pmov_dst_4>
@@ -11085,7 +11130,20 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code>v2div2si2"
+(define_expand "truncv2div2si2"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(truncate:V2SI
+	  (match_operand:V2DI 1 "register_operand")))]
+  "TARGET_AVX512VL"
+{
+  operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
+  emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
+					     operands[1],
+					     CONST0_RTX (V2SImode)));
+  DONE;
+})
+
+(define_insn "avx512vl_<code>v2div2si2"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
     (vec_concat:V4SI
       (any_truncate:V2SI
@@ -11164,7 +11222,18 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512f_<code>v8div16qi2"
+(define_expand "truncv8div8qi2"
+  [(set (match_operand:V8QI 0 "register_operand")
+	(truncate:V8QI
+	    (match_operand:V8DI 1 "register_operand")))]
+  "TARGET_AVX512F"
+{
+  operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
+  emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "avx512f_<code>v8div16qi2"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(vec_concat:V16QI
 	  (any_truncate:V8QI
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5a628260925..7812e3f0f57 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-05-22  Hongtao.liu  <hongtao.liu@intel.com>
+
+	* gcc.target/i386/pr92658-avx512f.c: New test.
+	* gcc.target/i386/pr92658-avx512vl.c: Ditto.
+	* gcc.target/i386/pr92658-avx512bw-trunc.c: Ditto.
+
 2020-05-22  Richard Biener  <rguenther@suse.de>
 
 	PR tree-optimization/95268
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c
new file mode 100644
index 00000000000..bdfad7a4d18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-trunc.c
@@ -0,0 +1,91 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw -mavx512vl" } */
+
+typedef unsigned char v8qi __attribute__((vector_size (8)));
+typedef unsigned char v16qi __attribute__((vector_size (16)));
+typedef unsigned char v32qi __attribute__((vector_size (32)));
+typedef unsigned short v8hi __attribute__((vector_size (16)));
+typedef unsigned short v16hi __attribute__((vector_size (32)));
+typedef unsigned short v32hi __attribute__((vector_size (64)));
+
+
+void
+truncwb_512 (v32qi * dst, v32hi * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  tem[16] = (*src)[16];
+  tem[17] = (*src)[17];
+  tem[18] = (*src)[18];
+  tem[19] = (*src)[19];
+  tem[20] = (*src)[20];
+  tem[21] = (*src)[21];
+  tem[22] = (*src)[22];
+  tem[23] = (*src)[23];
+  tem[24] = (*src)[24];
+  tem[25] = (*src)[25];
+  tem[26] = (*src)[26];
+  tem[27] = (*src)[27];
+  tem[28] = (*src)[28];
+  tem[29] = (*src)[29];
+  tem[30] = (*src)[30];
+  tem[31] = (*src)[31];
+  dst[0] = *(v32qi *) tem;
+}
+
+void
+truncwb_256 (v16qi * dst, v16hi * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16qi *) tem;
+}
+
+void
+truncwb_128 (v16qi * dst, v8hi * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v16qi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "vpmovwb" 2 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 3 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c
new file mode 100644
index 00000000000..2ba29074a81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512f.c
@@ -0,0 +1,106 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512f" } */
+
+typedef unsigned char v8qi __attribute__((vector_size (8)));
+typedef unsigned char v16qi __attribute__((vector_size (16)));
+typedef unsigned short v8hi __attribute__((vector_size (16)));
+typedef unsigned short v16hi __attribute__((vector_size (32)));
+typedef unsigned int v8si __attribute__((vector_size (32)));
+typedef unsigned int v16si __attribute__((vector_size (64)));
+typedef unsigned long long v8di __attribute__((vector_size (64)));
+
+void
+truncqd (v8si * dst, v8di * __restrict src)
+{
+  unsigned tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8si *) tem;
+}
+
+void
+truncqw (v8hi * dst, v8di * __restrict src)
+{
+  unsigned short tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+truncqb (v8qi * dst, v8di * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v8qi *) tem;
+}
+
+void
+truncdw (v16hi * dst, v16si * __restrict src)
+{
+  unsigned short tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16hi *) tem;
+}
+
+
+void
+truncdb (v16qi * dst, v16si * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16qi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "vpmovqd" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c
new file mode 100644
index 00000000000..50b32f968ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c
@@ -0,0 +1,129 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512f -mavx512vl" } */
+
+typedef unsigned char v16qi __attribute__((vector_size (16)));
+typedef unsigned short v8hi __attribute__((vector_size (16)));
+typedef unsigned int v4si __attribute__((vector_size (16)));
+typedef unsigned int v8si __attribute__((vector_size (32)));
+typedef unsigned long long v2di __attribute__((vector_size (16)));
+typedef unsigned long long v4di __attribute__((vector_size (32)));
+
+void
+truncqd_256 (v4si * dst, v4di * __restrict src)
+{
+  unsigned tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4si *) tem;
+}
+
+void
+truncqw_256 (v8hi * dst, v4di * __restrict src)
+{
+  unsigned short tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+truncqb_256 (v16qi * dst, v4di * __restrict src)
+{
+  unsigned char tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v16qi *) tem;
+}
+
+void
+truncqd_128 (v4si * dst, v2di * __restrict src)
+{
+  unsigned tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v4si *) tem;
+}
+
+void
+truncqw_128 (v8hi * dst, v2di * __restrict src)
+{
+  unsigned short tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+truncqb_128 (v16qi * dst, v2di * __restrict src)
+{
+  unsigned char tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v16qi *) tem;
+}
+
+void
+truncdw_256 (v8hi * dst, v8si * __restrict src)
+{
+  unsigned short tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+truncdb_256 (v16qi * dst, v8si * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v16qi *) tem;
+}
+
+void
+truncdw_128 (v8hi * dst, v4si * __restrict src)
+{
+  unsigned short tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+truncdb_128 (v16qi * dst, v4si * __restrict src)
+{
+  unsigned char tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v16qi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "vpmovqd" 2 } } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 2 { xfail *-*-* } } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-06-10 16:41 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-10 16:41 [gcc/devel/c++-modules] Add missing vector truncmn2 expanders [PR92658] Nathan Sidwell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).