public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 3/3] target/99881 - x86 vector cost of CTOR from integer regs
@ 2022-02-18 14:01 Richard Biener
  2022-02-21  1:35 ` Hongtao Liu
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2022-02-18 14:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, hongtao.liu, ubizjak

This uses the now passed SLP node to the vectorizer costing hook
to adjust vector construction costs for the cost of moving an
integer component from a GPR to a vector register when that's
required for building a vector from components.  A cruical difference
here is whether the component is loaded from memory or extracted
from a vector register as in those cases no intermediate GPR is involved.

The pr99881.c testcase can be Un-XFAILed with this patch, the
pr91446.c testcase now produces scalar code which looks superior
to me so I've adjusted it as well.

I'm currently re-bootstrapping and testing on x86_64-unknown-linux-gnu
after adding the BIT_FIELD_REF vector extracting special casing.

I suppose we can let autotesters look for SPEC performance fallout.

OK if testing succeeds?

Thanks,
Richard.

2022-02-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/104582
	PR target/99881
	* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost):
	Cost GPR to vector register moves for integer vector construction.

	* gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c: New.
	* gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c: Likewise.
	* gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c: Likewise.
	* gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c: Likewise.
	* gcc.target/i386/pr99881.c: Un-XFAIL.
	* gcc.target/i386/pr91446.c: Adjust to not expect vectorization.
---
 gcc/config/i386/i386.cc                       | 45 ++++++++++++++++++-
 .../costmodel/x86_64/costmodel-pr104582-1.c   | 15 +++++++
 .../costmodel/x86_64/costmodel-pr104582-2.c   | 13 ++++++
 .../costmodel/x86_64/costmodel-pr104582-3.c   | 13 ++++++
 .../costmodel/x86_64/costmodel-pr104582-4.c   | 15 +++++++
 gcc/testsuite/gcc.target/i386/pr91446.c       |  2 +-
 gcc/testsuite/gcc.target/i386/pr99881.c       |  2 +-
 7 files changed, 102 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 0830dbd7dca..b2bf90576d5 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22997,7 +22997,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
 
 unsigned
 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
-				  stmt_vec_info stmt_info, slp_tree,
+				  stmt_vec_info stmt_info, slp_tree node,
 				  tree vectype, int misalign,
 				  vect_cost_model_location where)
 {
@@ -23160,6 +23160,49 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
     }
+  else if (kind == vec_construct
+	   && node
+	   && SLP_TREE_DEF_TYPE (node) == vect_external_def
+	   && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
+    {
+      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+      unsigned i;
+      tree op;
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+	if (TREE_CODE (op) == SSA_NAME)
+	  TREE_VISITED (op) = 0;
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+	{
+	  if (TREE_CODE (op) != SSA_NAME
+	      || TREE_VISITED (op))
+	    continue;
+	  TREE_VISITED (op) = 1;
+	  gimple *def = SSA_NAME_DEF_STMT (op);
+	  tree tem;
+	  if (is_gimple_assign (def)
+	      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
+	      && ((tem = gimple_assign_rhs1 (def)), true)
+	      && TREE_CODE (tem) == SSA_NAME
+	      /* A sign-change expands to nothing.  */
+	      && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
+					TREE_TYPE (tem)))
+	    def = SSA_NAME_DEF_STMT (tem);
+	  /* When the component is loaded from memory we can directly
+	     move it to a vector register, otherwise we have to go
+	     via a GPR or via vpinsr which involves similar cost.
+	     Likewise with a BIT_FIELD_REF extracting from a vector
+	     register we can hope to avoid using a GPR.  */
+	  if (!is_gimple_assign (def)
+	      || (!gimple_assign_load_p (def)
+		  && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
+		      || !VECTOR_TYPE_P (TREE_TYPE
+				(TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
+	    stmt_cost += ix86_cost->sse_to_integer;
+	}
+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
+	if (TREE_CODE (op) == SSA_NAME)
+	  TREE_VISITED (op) = 0;
+    }
   if (stmt_cost == -1)
     stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
 
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
new file mode 100644
index 00000000000..992a845ad7a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
+
+struct S { unsigned long a, b; } s;
+
+void
+foo (unsigned long *a, unsigned long *b)
+{
+  unsigned long a_ = *a;
+  unsigned long b_ = *b;
+  s.a = a_;
+  s.b = b_;
+}
+
+/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
new file mode 100644
index 00000000000..7637cdb4a97
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
+
+struct S { unsigned long a, b; } s;
+
+void
+foo (unsigned long a, unsigned long b)
+{
+  s.a = a;
+  s.b = b;
+}
+
+/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
new file mode 100644
index 00000000000..999c4905708
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
+
+struct S { double a, b; } s;
+
+void
+foo (double a, double b)
+{
+  s.a = a;
+  s.b = b;
+}
+
+/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c
new file mode 100644
index 00000000000..cc471e1ed73
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
+
+struct S { unsigned long a, b; } s;
+
+void
+foo (signed long *a, unsigned long *b)
+{
+  unsigned long a_ = *a;
+  unsigned long b_ = *b;
+  s.a = a_;
+  s.b = b_;
+}
+
+/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr91446.c b/gcc/testsuite/gcc.target/i386/pr91446.c
index 0243ca3ea68..067bf43f698 100644
--- a/gcc/testsuite/gcc.target/i386/pr91446.c
+++ b/gcc/testsuite/gcc.target/i386/pr91446.c
@@ -21,4 +21,4 @@ foo (unsigned long long width, unsigned long long height,
   bar (&t);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqa\[^\n\r\]*xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "xmm\[0-9\]" 0 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99881.c b/gcc/testsuite/gcc.target/i386/pr99881.c
index 3e087eb2ed7..a1ec1d1ba8a 100644
--- a/gcc/testsuite/gcc.target/i386/pr99881.c
+++ b/gcc/testsuite/gcc.target/i386/pr99881.c
@@ -1,7 +1,7 @@
 /* PR target/99881.  */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-Ofast -march=skylake" } */
-/* { dg-final { scan-assembler-not "xmm\[0-9\]" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "xmm\[0-9\]" } } */
 
 void
 foo (int* __restrict a, int n, int c)
-- 
2.34.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-02-22  9:51 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-18 14:01 [PATCH 3/3] target/99881 - x86 vector cost of CTOR from integer regs Richard Biener
2022-02-21  1:35 ` Hongtao Liu
2022-02-21  9:10   ` Richard Biener
2022-02-22  4:08     ` Hongtao Liu
2022-02-22  7:58       ` Richard Biener
2022-02-22  9:51         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).