public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [COMMITTED] i386: Adjust emulated integer vector mode shift costs
@ 2023-05-22 20:39 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-05-22 20:39 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 955 bytes --]

Returned integer vector mode costs of emulated instructions in
ix86_shift_rotate_cost are wrong and do not reflect generated
instruction sequences.  Rewrite handling of different integer vector
modes and different target ABIs to return real instruction
counts in order to calcuate better costs of various emulated modes.

Also add the cost of a memory read, when the instruction in the
sequence reads memory.

gcc/ChangeLog:

    * config/i386/i386.cc (ix86_shift_rotate_cost): Correct
    calculation of integer vector mode costs to reflect generated
    instruction sequences of different integer vector modes and
    different target ABIs.  Remove "speed" function argument.
    (ix86_rtx_costs): Update call for removed function argument.
    (ix86_vector_costs::add_stmt_cost): Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/sse2-shiftqihi-constant-1.c: Remove XFAILs.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 4891 bytes --]

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a36e625342d..38125ce284a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20565,20 +20565,23 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 			enum rtx_code code,
 			enum machine_mode mode, bool constant_op1,
 			HOST_WIDE_INT op1_val,
-			bool speed,
 			bool and_in_op1,
 			bool shift_and_truncate,
 			bool *skip_op0, bool *skip_op1)
 {
   if (skip_op0)
     *skip_op0 = *skip_op1 = false;
+
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
     {
-      /* V*QImode is emulated with 1-11 insns.  */
-      if (mode == V16QImode || mode == V32QImode)
+      int count;
+      /* Cost of reading the memory.  */
+      int extra;
+
+      switch (mode)
 	{
-	  int count = 11;
-	  if (TARGET_XOP && mode == V16QImode)
+	case V16QImode:
+	  if (TARGET_XOP)
 	    {
 	      /* For XOP we use vpshab, which requires a broadcast of the
 		 value to the variable shift insn.  For constants this
@@ -20586,37 +20589,65 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 		 shift with one insn set the cost to prefer paddb.  */
 	      if (constant_op1)
 		{
-		  if (skip_op1)
-		    *skip_op1 = true;
-		  return ix86_vec_cost (mode,
-					cost->sse_op
-					+ (speed
-					   ? 2
-					   : COSTS_N_BYTES
-					       (GET_MODE_UNIT_SIZE (mode))));
+		  extra = cost->sse_load[2];
+		  return ix86_vec_cost (mode, cost->sse_op) + extra;
+		}
+	      else
+		{
+		  count = (code == ASHIFT) ? 2 : 3;
+		  return ix86_vec_cost (mode, cost->sse_op * count);
+		}
+	    }
+	  /* FALLTHRU */
+	case V32QImode:
+	  extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+	  if (constant_op1)
+	    {
+	      if (code == ASHIFTRT)
+		{
+		  count = 4;
+		  extra *= 2;
+		}
+	      else
+		count = 2;
+	    }
+	  else if (TARGET_SSE4_1)
+	    count = 8;
+	  else if (code == ASHIFTRT)
+	    count = 9;
+	  else
+	    count = 8;
+	  return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+
+	case V2DImode:
+	case V4DImode:
+	  /* V*DImode arithmetic right shift is emulated.  */
+	  if (code == ASHIFTRT && !TARGET_AVX512VL)
+	    {
+	      if (constant_op1)
+		{
+		  if (op1_val == 63)
+		    count = TARGET_SSE4_2 ? 1 : 2;
+		  else if (TARGET_XOP)
+		    count = 2;
+		  else
+		    count = 4;
 		}
-	      count = 3;
+	      else if (TARGET_XOP)
+		count = 3;
+	      else if (TARGET_SSE4_2)
+		count = 4;
+	      else
+		count = 5;
+
+	      return ix86_vec_cost (mode, cost->sse_op * count);
 	    }
-	  else if (TARGET_SSSE3)
-	    count = 7;
-	  return ix86_vec_cost (mode, cost->sse_op * count);
-	}
-      /* V*DImode arithmetic right shift is emulated.  */
-      else if (code == ASHIFTRT
-	       && (mode == V2DImode || mode == V4DImode)
-	       && !TARGET_XOP
-	       && !TARGET_AVX512VL)
-	{
-	  int count = 4;
-	  if (constant_op1 && op1_val == 63 && TARGET_SSE4_2)
-	    count = 2;
-	  else if (constant_op1)
-	    count = 3;
-	  return ix86_vec_cost (mode, cost->sse_op * count);
+	  /* FALLTHRU */
+	default:
+	  return ix86_vec_cost (mode, cost->sse_op);
 	}
-      else
-	return ix86_vec_cost (mode, cost->sse_op);
     }
+
   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
     {
       if (constant_op1)
@@ -20786,7 +20817,6 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 				       CONSTANT_P (XEXP (x, 1)),
 				       CONST_INT_P (XEXP (x, 1))
 					 ? INTVAL (XEXP (x, 1)) : -1,
-				       speed,
 				       GET_CODE (XEXP (x, 1)) == AND,
 				       SUBREG_P (XEXP (x, 1))
 				       && GET_CODE (XEXP (XEXP (x, 1),
@@ -23558,7 +23588,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 		            TREE_CODE (op2) == INTEGER_CST,
 			    cst_and_fits_in_hwi (op2)
 			    ? int_cst_value (op2) : -1,
-		            true, false, false, NULL, NULL);
+			    false, false, NULL, NULL);
 	  }
 	  break;
 	case NOP_EXPR:
diff --git a/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c
index 015450f8219..8a79afcdaf7 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-shiftqihi-constant-1.c
@@ -1,7 +1,7 @@
 /* PR target/95524 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -msse2 -mno-avx" } */
-/* { dg-final { scan-assembler-times "pand\[^\n\]*%xmm" 3 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "pand\[^\n\]*%xmm" 3 } } */
 typedef char v16qi  __attribute__ ((vector_size (16)));
 typedef unsigned char v16uqi  __attribute__ ((vector_size (16)));
 
@@ -20,7 +20,7 @@ foo_ashift_128 (v16qi a)
   return a << 7;
 }
 
-/* { dg-final { scan-assembler-times "psllw\[^\n\]*%xmm" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "psllw\[^\n\]*%xmm" 1 } } */
 
 __attribute__((noipa)) v16uqi
 foo_lshiftrt_128 (v16uqi a)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-22 20:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-22 20:39 [COMMITTED] i386: Adjust emulated integer vector mode shift costs Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).