public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6756] Enhance vec_pack_trunc for integral mode mask.
@ 2022-01-20  8:52 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2022-01-20  8:52 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8bc700f4c3fbe405413db02281ef2918bfa831fc

commit r12-6756-g8bc700f4c3fbe405413db02281ef2918bfa831fc
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Jan 17 10:47:46 2022 +0800

    Enhance vec_pack_trunc for integral mode mask.
    
    For testcase in PR, the patch supports QI:4 -> HI:16 pack with
    multi steps(first pack QI:4 -> QI:8 through vec_pack_sbool_trunc_qi,
    then pack QI:8 -> HI:16 through vec_pack_trunc_hi).
    Similar for QI:2 -> HI:16 which is test4 in mask-pack-prefer-128.c.
    
    gcc/ChangeLog:
    
            PR target/103771
            * tree-vect-stmts.cc (supportable_narrowing_operation): Enhance
            integral mode mask pack by multi steps which takes
            vec_pack_sbool_trunc_optab as start when elements number is
            less than BITS_PER_UNITS.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/mask-pack-prefer128.c: New test.
            * gcc.target/i386/mask-pack-prefer256.c: New test.
            * gcc.target/i386/pr103771.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c |  8 ++++++++
 gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c |  8 ++++++++
 gcc/testsuite/gcc.target/i386/pr103771.c            | 18 ++++++++++++++++++
 gcc/tree-vect-stmts.cc                              | 11 +++++++----
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c b/gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c
new file mode 100644
index 00000000000..c9ea37c7ed3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mask-pack-prefer128.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=128" } */
+/* Disabling epilogues until we find a better way to deal with scans.  */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { scan-assembler-not "maskmov" } } */
+
+#include "mask-pack.c"
diff --git a/gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c b/gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c
new file mode 100644
index 00000000000..841f51b4041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mask-pack-prefer256.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=skylake-avx512 -O3 -fopenmp-simd -fdump-tree-vect-details -mprefer-vector-width=256" } */
+/* Disabling epilogues until we find a better way to deal with scans.  */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { scan-assembler-not "maskmov" } } */
+
+#include "mask-pack.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr103771.c b/gcc/testsuite/gcc.target/i386/pr103771.c
new file mode 100644
index 00000000000..a1a9952b6a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103771.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=cascadelake -O3 -fdump-tree-vect-details -mprefer-vector-width=128" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+
+typedef unsigned char uint8_t;
+
+static uint8_t x264_clip_uint8 (int x)
+{
+  return x & (~255) ? (-x) >> 31 : x;
+}
+
+void
+mc_weight (uint8_t* __restrict dst, uint8_t* __restrict src,
+	   int i_width,int i_scale)
+{
+  for(int x = 0; x < i_width; x++)
+    dst[x] = x264_clip_uint8 (src[x] * i_scale);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 95be4f38eea..824ebb6354b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12124,6 +12124,7 @@ supportable_narrowing_operation (enum tree_code code,
   tree intermediate_type, prev_type;
   machine_mode intermediate_mode, prev_mode;
   int i;
+  unsigned HOST_WIDE_INT n_elts;
   bool uns;
 
   *multi_step_cvt = 0;
@@ -12133,8 +12134,9 @@ supportable_narrowing_operation (enum tree_code code,
       c1 = VEC_PACK_TRUNC_EXPR;
       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
 	  && VECTOR_BOOLEAN_TYPE_P (vectype)
-	  && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
-	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
+	  && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
+	  && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
+	  && n_elts < BITS_PER_UNIT)
 	optab1 = vec_pack_sbool_trunc_optab;
       else
 	optab1 = optab_for_tree_code (c1, vectype, optab_default);
@@ -12225,8 +12227,9 @@ supportable_narrowing_operation (enum tree_code code,
 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
 	  && VECTOR_BOOLEAN_TYPE_P (prev_type)
-	  && intermediate_mode == prev_mode
-	  && SCALAR_INT_MODE_P (prev_mode))
+	  && SCALAR_INT_MODE_P (prev_mode)
+	  && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
+	  && n_elts < BITS_PER_UNIT)
 	interm_optab = vec_pack_sbool_trunc_optab;
       else
 	interm_optab


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-01-20  8:52 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-20  8:52 [gcc r12-6756] Enhance vec_pack_trunc for integral mode mask hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).