public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1103] forwprop: Enhance vec perm fed by CTOR and CTOR/CST [PR99398]
@ 2021-05-28  6:13 Kewen Lin
  0 siblings, 0 replies; only message in thread
From: Kewen Lin @ 2021-05-28  6:13 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4a9f2306cb39a3cf265eeb6f8f3a8bbaf230c4c8

commit r12-1103-g4a9f2306cb39a3cf265eeb6f8f3a8bbaf230c4c8
Author: Kewen Lin <linkw@linux.ibm.com>
Date:   Fri May 28 01:11:45 2021 -0500

    forwprop: Enhance vec perm fed by CTOR and CTOR/CST [PR99398]
    
    VEC_PERM_EXPR requires the number of MASK elements must be the
    same with the number of elements in operands V0 and V1.  In
    some cases, like with Power altivec built-in function vec_perm,
    VIEW_CONVERT_EXPR has to be used to guarantee this requirement,
    but it can prevent some simplifications which don't consider
    this well.
    
    For the cases that the permutated operands of vector
    permutation are from two same type CTOR and CTOR, or one CTOR
    and one VECTOR CST, this patch is to enhance forwprop to look
    through intermediate VIEW_CONVERT_EXPR and further simplify
    them if possible.
    
    Bootstrapped/regtested on powerpc64le-linux-gnu P9,
    powerpc64-linux-gnu P8, x86_64-redhat-linux and
    aarch64-linux-gnu.
    
    gcc/ChangeLog:
    
            PR tree-optimization/99398
            * tree-ssa-forwprop.c (simplify_permutation): Optimize some cases
            where the fed operands are CTOR/CST and propagated through
            VIEW_CONVERT_EXPR.  Call vec_perm_indices::new_shrunk_vector.
            * vec-perm-indices.c (vec_perm_indices::new_shrunk_vector): New
            function.
            * vec-perm-indices.h (vec_perm_indices::new_shrunk_vector): New
            declare.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/99398
            * gcc.target/powerpc/vec-perm-ctor-run.c: New test.
            * gcc.target/powerpc/vec-perm-ctor.c: New test.
            * gcc.target/powerpc/vec-perm-ctor.h: New test.

Diff:
---
 .../gcc.target/powerpc/vec-perm-ctor-run.c         | 124 ++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c   |   9 ++
 gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h   | 163 +++++++++++++++++++++
 gcc/tree-ssa-forwprop.c                            | 141 ++++++++++++++++--
 gcc/vec-perm-indices.c                             |  59 ++++++++
 gcc/vec-perm-indices.h                             |   1 +
 6 files changed, 482 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c
new file mode 100644
index 00000000000..987d6db999c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor-run.c
@@ -0,0 +1,124 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+#include "vec-perm-ctor.h"
+
+#include <stdlib.h>
+
+int
+main ()
+{
+  du a_du = 100ULL;
+  du b_du = 200ULL;
+
+  di a_di = -100;
+  di b_di = 200;
+
+  df a_df = 10.0;
+  df b_df = 20.0;
+
+  si a_si = 12;
+  si b_si = -25;
+  si c_si = -37;
+  si d_si = 50;
+
+  sf a_sf = 30.0f;
+  sf b_sf = 40.0f;
+  sf c_sf = 50.0f;
+  sf d_sf = 60.0f;
+
+  hu a_hu = 10;
+  hu b_hu = 20;
+  hu c_hu = 30;
+  hu d_hu = 40;
+  hu e_hu = 50;
+  hu f_hu = 60;
+  hu g_hu = 70;
+  hu h_hu = 80;
+
+  qi a_qi = 10;
+  qi b_qi = 20;
+  qi c_qi = -30;
+  qi d_qi = 40;
+  qi e_qi = -50;
+  qi f_qi = 60;
+  qi g_qi = 70;
+  qi h_qi = -80;
+
+  v2du res1 = test_ctor_ctor_same_du (a_du, b_du);
+  if (res1[0] != a_du || res1[1] != b_du)
+    abort ();
+
+  v2df res2 = test_ctor_ctor_same_df (a_df, b_df);
+  if (res2[0] != a_df || res2[1] != b_df)
+    abort ();
+
+  v4si res3 = test_ctor_ctor_same_si (a_si, b_si, c_si, d_si);
+  if (res3[0] != a_si || res3[1] != b_si || res3[2] != c_si || res3[3] != d_si)
+    abort ();
+
+  v4sf res4 = test_ctor_ctor_same_sf (a_sf, b_sf, c_sf, d_sf);
+  if (res4[0] != a_sf || res4[1] != b_sf || res4[2] != c_sf || res4[3] != d_sf)
+    abort ();
+
+  v8hu res5
+    = test_ctor_ctor_same_hu (a_hu, b_hu, c_hu, d_hu, e_hu, f_hu, g_hu, h_hu);
+
+  if (res5[0] != a_hu || res5[1] != b_hu || res5[2] != c_hu || res5[3] != d_hu
+      || res5[4] != e_hu || res5[5] != f_hu || res5[6] != g_hu
+      || res5[7] != h_hu)
+    abort ();
+
+  v16qi res6
+    = test_ctor_ctor_same_qi (a_qi, b_qi, c_qi, d_qi, e_qi, f_qi, g_qi, h_qi);
+
+  if (res6[0] != a_qi || res6[1] != b_qi || res6[2] != c_qi || res6[3] != d_qi
+      || res6[4] != a_qi || res6[5] != b_qi || res6[6] != c_qi
+      || res6[7] != d_qi || res6[8] != e_qi || res6[9] != f_qi
+      || res6[10] != g_qi || res6[11] != h_qi || res6[12] != e_qi
+      || res6[13] != f_qi || res6[14] != g_qi || res6[15] != h_qi)
+    abort ();
+
+  v2du res7 = test_ctor_cst_same_du (a_du, b_du);
+  if (res7[0] != a_du || res7[1] != 100)
+    abort ();
+
+  v4sf res8 = test_ctor_cst_same_sf (a_sf, b_sf);
+  if (res8[0] != a_sf || res8[1] != 2.0f || res8[2] != b_sf || res8[3] != 4.0f)
+    abort ();
+
+  v2df res9 = test_ctor_cst_same_df (a_df, b_df);
+  if (res9[0] != b_df || res9[1] != 200.0)
+    abort ();
+
+  v4si res10 = test_cst_ctor_same_si (a_si, b_si);
+  if (res10[0] != 1 || res10[1] != 3 || res10[2] != a_si || res10[3] != b_si)
+    abort ();
+
+  v2di res11 = test_ctor_cst_diff_di_si (a_di, b_di);
+  /* Need to take care of the endianness since the function converts vector
+     const to one different vector type (element size), the endianness
+     determines the reinterpreted layout.  Same reason for res12 below.  */
+  if (res11[0] != -100 ||
+#ifdef __LITTLE_ENDIAN__
+      res11[1] != 3
+#else
+      res11[1] != 0x300000000LL
+#endif
+  )
+    abort ();
+
+  v2du res12 = test_cst_ctor_diff_sf_du (a_du, b_du);
+  if (
+#ifdef __LITTLE_ENDIAN__
+    res12[0] != 0x400000003f800000ULL
+#else
+    res12[0] != 0x3f80000040000000ULL
+#endif
+    || res12[1] != 100)
+    abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c
new file mode 100644
index 00000000000..cc59e60035f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -fdump-tree-optimized" } */
+
+/* To test all permutations fed by CTOR and CST can be optimized away.  */
+
+#include "vec-perm-ctor.h"
+
+/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h
new file mode 100644
index 00000000000..18782701e51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-perm-ctor.h
@@ -0,0 +1,163 @@
+#include "altivec.h"
+
+typedef vector unsigned long long v2du;
+typedef vector signed long long v2di;
+typedef vector unsigned int v4su;
+typedef vector signed int v4si;
+typedef vector unsigned short v8hu;
+typedef vector signed short v8hi;
+typedef vector unsigned char v16qu;
+typedef vector signed char v16qi;
+typedef vector double v2df;
+typedef vector float v4sf;
+
+typedef unsigned long long du;
+typedef signed long long di;
+typedef unsigned int su;
+typedef signed int si;
+typedef unsigned short hu;
+typedef signed short hi;
+typedef unsigned char qu;
+typedef signed char qi;
+typedef double df;
+typedef float sf;
+
+/* To test whether we can optimize vector permutation away when
+   the two inputs are same type CTOR or one input is CTOR and the
+   other is CST.  */
+
+/* CTOR + CTOR part (only same type supported).  */
+
+/* Test both operands are same type CTOR (type unsigned long long).  */
+__attribute__ ((noipa)) v2du
+test_ctor_ctor_same_du (du a, du b)
+{
+  v2du v1 = {a, 0};
+  v2du v2 = {b, 0};
+  v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* Test both operands are same type CTOR (type double).  */
+__attribute__ ((noipa)) v2df
+test_ctor_ctor_same_df (df a, df b)
+{
+  v2df v1 = {0.0, a};
+  v2df v2 = {0.0, b};
+  v16qu vc = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+  v2df vres = (v2df) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* Test both operands are same type CTOR (type signed int).  */
+__attribute__ ((noipa)) v4si
+test_ctor_ctor_same_si (si a, si b, si c, si d)
+{
+  v4si v1 = {0, a, 0, c};
+  v4si v2 = {0, b, 0, d};
+  v16qu vc = {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31};
+  v4si vres = (v4si) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* Test both operands are same type CTOR (type float).  */
+__attribute__ ((noipa)) v4sf
+test_ctor_ctor_same_sf (sf a, sf b, sf c, sf d)
+{
+  v4sf v1 = {c, 0.0f, d, 0.0f};
+  v4sf v2 = {a, 0.0f, b, 0.0f};
+  v16qu vc = {16, 17, 18, 19, 24, 25, 26, 27, 0, 1, 2, 3, 8, 9, 10, 11};
+  v4sf vres = (v4sf) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* Test both operands are same type CTOR (type unsigned short).  */
+__attribute__ ((noipa)) v8hu
+test_ctor_ctor_same_hu (hu a, hu b, hu c, hu d, hu e, hu f, hu g, hu h)
+{
+  v8hu v1 = {0, a, 0, b, 0, c, 0, d};
+  v8hu v2 = {0, e, 0, f, 0, g, 0, h};
+  v16qu vc = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+  v8hu vres = (v8hu) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* Test both operands are same type CTOR (type signed char).  */
+__attribute__ ((noipa)) v16qi
+test_ctor_ctor_same_qi (qi a, qi b, qi c, qi d, qi e, qi f, qi g, qi h)
+{
+  v16qi v1 = {0, a, 0, b, 0, c, 0, d, 0, a, 0, b, 0, c, 0, d};
+  v16qi v2 = {0, e, 0, f, 0, g, 0, h, 0, e, 0, f, 0, g, 0, h};
+  v16qu vc = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  v16qi vres = (v16qi) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* CTOR + CST part (same type).  */
+
+__attribute__ ((noipa)) v2du
+test_ctor_cst_same_du (du a, du b)
+{
+  v2du v1 = {a, b};
+  v2du v2 = {100, 200};
+  v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+__attribute__ ((noipa)) v4sf
+test_ctor_cst_same_sf (sf a, sf b)
+{
+  v4sf v1 = {0.0f, a, 0.0f, b};
+  v4sf v2 = {1.0f, 2.0f, 3.0f, 4.0f};
+  v16qu vc = {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31};
+  v4sf vres = (v4sf) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* CST + CTOR part (same type).  */
+
+__attribute__ ((noipa)) v2df
+test_ctor_cst_same_df (df a, df b)
+{
+  v2df v1 = {a, b};
+  v2df v2 = {100.0, 200.0};
+  v16qu vc = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+  v2df vres = (v2df) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+__attribute__ ((noipa)) v4si
+test_cst_ctor_same_si (si a, si b)
+{
+  v4si v1 = {a, 0, b, 0};
+  v4si v2 = {1, 2, 3, 4};
+  v16qu vc = {16, 17, 18, 19, 24, 25, 26, 27, 0, 1, 2, 3, 8, 9, 10, 11};
+  v4si vres = (v4si) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* CTOR + CST part (different types).  */
+
+__attribute__ ((noipa)) v2di
+test_ctor_cst_diff_di_si (di a, di b)
+{
+  v2di v1 = {a, b};
+  v4si v2 = {3, 0, 4, 0};
+  v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  v2di vres = (v2di) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
+
+/* CST + CTOR part (different types).  */
+
+__attribute__ ((noipa)) v2du
+test_cst_ctor_diff_sf_du (du a, du b)
+{
+  v4sf v1 = {1.0f, 2.0f, 3.0f, 4.0f};
+  v2du v2 = {a, b};
+  v16qu vc = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  v2du vres = (v2du) vec_perm ((v16qu) v1, (v16qu) v2, vc);
+  return vres;
+}
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 0706fd862de..beb2702f3b6 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2120,9 +2120,9 @@ static int
 simplify_permutation (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi);
-  gimple *def_stmt;
+  gimple *def_stmt = NULL;
   tree op0, op1, op2, op3, arg0, arg1;
-  enum tree_code code;
+  enum tree_code code, code2 = ERROR_MARK;
   bool single_use_op0 = false;
 
   gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR);
@@ -2142,10 +2142,28 @@ simplify_permutation (gimple_stmt_iterator *gsi)
   else if (TREE_CODE (op0) == SSA_NAME)
     {
       def_stmt = get_prop_source_stmt (op0, false, &single_use_op0);
-      if (!def_stmt || !can_propagate_from (def_stmt))
+      if (!def_stmt)
 	return 0;
-
       code = gimple_assign_rhs_code (def_stmt);
+      if (code == VIEW_CONVERT_EXPR)
+	{
+	  tree rhs = gimple_assign_rhs1 (def_stmt);
+	  tree name = TREE_OPERAND (rhs, 0);
+	  if (TREE_CODE (name) != SSA_NAME)
+	    return 0;
+	  if (!has_single_use (name))
+	    single_use_op0 = false;
+	  /* Here we update the def_stmt through this VIEW_CONVERT_EXPR,
+	     but still keep the code to indicate it comes from
+	     VIEW_CONVERT_EXPR.  */
+	  def_stmt = SSA_NAME_DEF_STMT (name);
+	  if (!def_stmt || !is_gimple_assign (def_stmt))
+	    return 0;
+	  if (gimple_assign_rhs_code (def_stmt) != CONSTRUCTOR)
+	    return 0;
+	}
+      if (!can_propagate_from (def_stmt))
+	return 0;
       arg0 = gimple_assign_rhs1 (def_stmt);
     }
   else
@@ -2173,12 +2191,10 @@ simplify_permutation (gimple_stmt_iterator *gsi)
       update_stmt (stmt);
       return remove_prop_source_from_use (op0) ? 2 : 1;
     }
-
-  /* Shuffle of a constructor.  */
-  else if (code == CONSTRUCTOR || code == VECTOR_CST)
+  else if (code == CONSTRUCTOR
+	   || code == VECTOR_CST
+	   || code == VIEW_CONVERT_EXPR)
     {
-      tree opt;
-      bool ret = false;
       if (op0 != op1)
 	{
 	  if (TREE_CODE (op0) == SSA_NAME && !single_use_op0)
@@ -2188,14 +2204,27 @@ simplify_permutation (gimple_stmt_iterator *gsi)
 	    arg1 = op1;
 	  else if (TREE_CODE (op1) == SSA_NAME)
 	    {
-	      enum tree_code code2;
-
 	      gimple *def_stmt2 = get_prop_source_stmt (op1, true, NULL);
-	      if (!def_stmt2 || !can_propagate_from (def_stmt2))
+	      if (!def_stmt2)
 		return 0;
-
 	      code2 = gimple_assign_rhs_code (def_stmt2);
-	      if (code2 != CONSTRUCTOR && code2 != VECTOR_CST)
+	      if (code2 == VIEW_CONVERT_EXPR)
+		{
+		  tree rhs = gimple_assign_rhs1 (def_stmt2);
+		  tree name = TREE_OPERAND (rhs, 0);
+		  if (TREE_CODE (name) != SSA_NAME)
+		    return 0;
+		  if (!has_single_use (name))
+		    return 0;
+		  def_stmt2 = SSA_NAME_DEF_STMT (name);
+		  if (!def_stmt2 || !is_gimple_assign (def_stmt2))
+		    return 0;
+		  if (gimple_assign_rhs_code (def_stmt2) != CONSTRUCTOR)
+		    return 0;
+		}
+	      else if (code2 != CONSTRUCTOR && code2 != VECTOR_CST)
+		return 0;
+	      if (!can_propagate_from (def_stmt2))
 		return 0;
 	      arg1 = gimple_assign_rhs1 (def_stmt2);
 	    }
@@ -2209,10 +2238,92 @@ simplify_permutation (gimple_stmt_iterator *gsi)
 	    return 0;
 	  arg1 = arg0;
 	}
-      opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (op0), arg0, arg1, op2);
+
+      /* If there are any VIEW_CONVERT_EXPRs found when finding permutation
+	 operands source, check whether it's valid to transform and prepare
+	 the required new operands.  */
+      if (code == VIEW_CONVERT_EXPR || code2 == VIEW_CONVERT_EXPR)
+	{
+	  /* Figure out the target vector type to which operands should be
+	     converted.  If both are CONSTRUCTOR, the types should be the
+	     same, otherwise, use the one of CONSTRUCTOR.  */
+	  tree tgt_type = NULL_TREE;
+	  if (code == VIEW_CONVERT_EXPR)
+	    {
+	      gcc_assert (gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR);
+	      code = CONSTRUCTOR;
+	      tgt_type = TREE_TYPE (arg0);
+	    }
+	  if (code2 == VIEW_CONVERT_EXPR)
+	    {
+	      tree arg1_type = TREE_TYPE (arg1);
+	      if (tgt_type == NULL_TREE)
+		tgt_type = arg1_type;
+	      else if (tgt_type != arg1_type)
+		return 0;
+	    }
+
+	  if (!VECTOR_TYPE_P (tgt_type))
+	    return 0;
+	  tree op2_type = TREE_TYPE (op2);
+	  /* Should have folded this before.  */
+	  gcc_assert (op2_type != tgt_type);
+
+	  /* Figure out the shrunk factor.  */
+	  poly_uint64 tgt_units = TYPE_VECTOR_SUBPARTS (tgt_type);
+	  poly_uint64 op2_units = TYPE_VECTOR_SUBPARTS (op2_type);
+	  if (maybe_gt (tgt_units, op2_units))
+	    return 0;
+	  unsigned int factor;
+	  if (!constant_multiple_p (op2_units, tgt_units, &factor))
+	    return 0;
+
+	  /* Build the new permutation control vector as target vector.  */
+	  vec_perm_builder builder;
+	  if (!tree_to_vec_perm_builder (&builder, op2))
+	    return 0;
+	  vec_perm_indices indices (builder, 2, op2_units);
+	  vec_perm_indices new_indices;
+	  if (new_indices.new_shrunk_vector (indices, factor))
+	    {
+	      tree mask_type = tgt_type;
+	      if (!VECTOR_INTEGER_TYPE_P (mask_type))
+		{
+		  tree elem_type = TREE_TYPE (mask_type);
+		  unsigned elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+		  tree int_type = build_nonstandard_integer_type (elem_size, 0);
+		  mask_type = build_vector_type (int_type, tgt_units);
+		}
+	      op2 = vec_perm_indices_to_tree (mask_type, new_indices);
+	    }
+	  else
+	    return 0;
+
+	  /* Convert the VECTOR_CST to the appropriate vector type.  */
+	  if (tgt_type != TREE_TYPE (arg0))
+	    arg0 = fold_build1 (VIEW_CONVERT_EXPR, tgt_type, arg0);
+	  else if (tgt_type != TREE_TYPE (arg1))
+	    arg1 = fold_build1 (VIEW_CONVERT_EXPR, tgt_type, arg1);
+	}
+
+      /* VIEW_CONVERT_EXPR should be updated to CONSTRUCTOR before.  */
+      gcc_assert (code == CONSTRUCTOR || code == VECTOR_CST);
+
+      /* Shuffle of a constructor.  */
+      bool ret = false;
+      tree res_type = TREE_TYPE (arg0);
+      tree opt = fold_ternary (VEC_PERM_EXPR, res_type, arg0, arg1, op2);
       if (!opt
 	  || (TREE_CODE (opt) != CONSTRUCTOR && TREE_CODE (opt) != VECTOR_CST))
 	return 0;
+      /* Found VIEW_CONVERT_EXPR before, need one explicit conversion.  */
+      if (res_type != TREE_TYPE (op0))
+	{
+	  tree name = make_ssa_name (TREE_TYPE (opt));
+	  gimple *ass_stmt = gimple_build_assign (name, opt);
+	  gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT);
+	  opt = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (op0), name);
+	}
       gimple_assign_set_rhs_from_tree (gsi, opt);
       update_stmt (gsi_stmt (*gsi));
       if (TREE_CODE (op0) == SSA_NAME)
diff --git a/gcc/vec-perm-indices.c b/gcc/vec-perm-indices.c
index ede590dc5c9..31b32ea0589 100644
--- a/gcc/vec-perm-indices.c
+++ b/gcc/vec-perm-indices.c
@@ -101,6 +101,65 @@ vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
   m_encoding.finalize ();
 }
 
+/* Check whether we can switch to a new permutation vector that
+   selects the same input elements as ORIG, but with each element
+   built up from FACTOR pieces.  Return true if yes, otherwise
+   return false.  Every FACTOR permutation indexes should be
+   continuous separately and the first one of each batch should
+   be able to exactly modulo FACTOR.  For example, if ORIG is
+   { 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
+   is { 1, 2, 0, 3 }.  */
+
+bool
+vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
+				     unsigned int factor)
+{
+  gcc_assert (factor > 0);
+
+  if (maybe_lt (orig.m_nelts_per_input, factor))
+    return false;
+
+  poly_uint64 nelts;
+  /* Invalid if vector units number isn't multiple of factor.  */
+  if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
+    return false;
+
+  /* Only handle the case that npatterns is multiple of factor.
+     FIXME: Try to see whether we can reshape it by factor npatterns.  */
+  if (orig.m_encoding.npatterns () % factor != 0)
+    return false;
+
+  unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
+  auto_vec<element_type, 32> encoding (encoded_nelts);
+  /* Separate all encoded elements into batches by size factor,
+     then ensure the first element of each batch is multiple of
+     factor and all elements in each batch is consecutive from
+     the first one.  */
+  for (unsigned int i = 0; i < encoded_nelts; i += factor)
+    {
+      element_type first = orig.m_encoding[i];
+      element_type new_index;
+      if (!multiple_p (first, factor, &new_index))
+	return false;
+      for (unsigned int j = 1; j < factor; ++j)
+	if (maybe_ne (first + j, orig.m_encoding[i + j]))
+	  return false;
+      encoding.quick_push (new_index);
+    }
+
+  m_ninputs = orig.m_ninputs;
+  m_nelts_per_input = nelts;
+  poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
+  unsigned int npatterns = orig.m_encoding.npatterns () / factor;
+
+  m_encoding.new_vector (full_nelts, npatterns,
+			 orig.m_encoding.nelts_per_pattern ());
+  m_encoding.splice (encoding);
+  m_encoding.finalize ();
+
+  return true;
+}
+
 /* Rotate the inputs of the permutation right by DELTA inputs.  This changes
    the values of the permutation vector but it doesn't change the way that
    the elements are encoded.  */
diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
index bc70ecd8a1d..98d27f0ec42 100644
--- a/gcc/vec-perm-indices.h
+++ b/gcc/vec-perm-indices.h
@@ -57,6 +57,7 @@ public:
 
   void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
   void new_expanded_vector (const vec_perm_indices &, unsigned int);
+  bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
   void rotate_inputs (int delta);
 
   /* Return the underlying vector encoding.  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-28  6:13 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-28  6:13 [gcc r12-1103] forwprop: Enhance vec perm fed by CTOR and CTOR/CST [PR99398] Kewen Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).