public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] S/390: Improve risbg usage
@ 2015-07-22 13:55 Andreas Krebbel
  0 siblings, 0 replies; only message in thread
From: Andreas Krebbel @ 2015-07-22 13:55 UTC (permalink / raw)
  To: gcc-patches

Hi,

with the attached patch we use risbg in more situations.

This especially helps the SpecCPU 400.perlbench testcase.

Bootstrapped on s390 and s390x. No regressions.

I'll commit the patch after waiting a few days for review comments.

Bye,

-Andreas-


gcc/ChangeLog:

2015-07-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_rtx_costs): Make risbg patterns
	cheaper.
	(s390_expand_insv): Don't generate risbg pattern for constant zero
	sources.
	* config/s390/s390.md ("*insv<mode>_zEC12_appendbitsleft")
	("*insv<mode>_z10_appendbitsleft"): New pattern definitions.  New
	splitters.

gcc/testsuite/ChangeLog:

2015-07-22  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/insv-1.c: New test.
	* gcc.target/s390/insv-2.c: New test.
	* gcc.target/s390/insv-3.c: New test.


diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 861dfb2..a8712b9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3321,13 +3321,26 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
       *total = 0;
       return true;
 
+    case IOR:
+      /* risbg */
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && GET_CODE (XEXP (x, 1)) == ASHIFT
+	  && REG_P (XEXP (XEXP (x, 0), 0))
+	  && REG_P (XEXP (XEXP (x, 1), 0))
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
+	      (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
     case ASHIFT:
     case ASHIFTRT:
     case LSHIFTRT:
     case ROTATE:
     case ROTATERT:
     case AND:
-    case IOR:
     case XOR:
     case NEG:
     case NOT:
@@ -5839,8 +5852,17 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
 
       if (mode_s == VOIDmode)
 	{
-	  /* Assume const_int etc already in the proper mode.  */
-	  src = force_reg (mode, src);
+	  /* For constant zero values the representation with AND
+	     appears to be folded in more situations than the (set
+	     (zero_extract) ...).
+	     We only do this when the start and end of the bitfield
+	     remain in the same SImode chunk.  That way nihf or nilf
+	     can be used.
+	     The AND patterns might still generate a risbg for this.  */
+	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
+	    return false;
+	  else
+	    src = force_reg (mode, src);
 	}
       else if (mode_s != mode)
 	{
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8c07d1b..2961f61 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3776,6 +3776,71 @@
   [(set_attr "op_type" "RIE")
    (set_attr "z10prop" "z10_super_E1")])
 
+; Implement appending Y on the left of S bits of X
+; x = (y << s) | (x & ((1 << s) - 1))
+(define_insn "*insv<mode>_zEC12_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+			  (match_operand:GPR 2 "immediate_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+  "risbgn\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+(define_insn "*insv<mode>_z10_appendbitsleft"
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
+	(ior:GPR (and:GPR (match_operand:GPR 1 "nonimmediate_operand" "0")
+			  (match_operand:GPR 2 "immediate_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "d")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) == (1UL << UINTVAL (operands[4])) - 1"
+  "risbg\t%0,%3,64-<bitsize>,64-%4-1,%4"
+  [(set_attr "op_type" "RIE")
+   (set_attr "z10prop" "z10_super_E1")])
+
+; z = (x << c) | (y >> d) with (x << c) and (y >> d) not overlapping after shifting
+;  -> z = y >> d; z = (x << c) | (y & ((1 << c) - 1))
+;  -> z = y >> d; z = risbg;
+
+(define_split
+  [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	(ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+			       (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		 (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+			     (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))]
+  "TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+	(lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+		 (ashift:GPR (match_dup 3) (match_dup 4))))]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
+(define_split
+  [(parallel
+    [(set (match_operand:GPR 0 "nonimmediate_operand" "")
+	  (ior:GPR (lshiftrt:GPR (match_operand:GPR 1 "nonimmediate_operand" "")
+				 (match_operand:GPR 2 "nonzero_shift_count_operand" ""))
+		   (ashift:GPR (match_operand:GPR 3 "nonimmediate_operand" "")
+			       (match_operand:GPR 4 "nonzero_shift_count_operand" ""))))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z10 && !TARGET_ZEC12 && UINTVAL (operands[2]) + UINTVAL (operands[4]) >= <bitsize>"
+  [(set (match_dup 0)
+	(lshiftrt:GPR (match_dup 1) (match_dup 2)))
+   (parallel
+    [(set (match_dup 0)
+	  (ior:GPR (and:GPR (match_dup 0) (match_dup 5))
+		   (ashift:GPR (match_dup 3) (match_dup 4))))
+     (clobber (reg:CC CC_REGNUM))])]
+{
+  operands[5] = GEN_INT ((1UL << UINTVAL (operands[4])) - 1);
+})
+
 (define_insn "*r<noxa>sbg_<mode>_noshift"
   [(set (match_operand:GPR 0 "nonimmediate_operand" "=d")
 	(IXOR:GPR
diff --git a/gcc/testsuite/gcc.target/s390/insv-1.c b/gcc/testsuite/gcc.target/s390/insv-1.c
new file mode 100644
index 0000000..e6c1b8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-1.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbg cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbg cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbg can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbg.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbg" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-2.c b/gcc/testsuite/gcc.target/s390/insv-2.c
new file mode 100644
index 0000000..2ba6d6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-2.c
@@ -0,0 +1,111 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12 -mzarch" } */
+
+unsigned long
+foo1 (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & (((1UL << 5) - 1)));
+}
+
+/* This generates very different RTX than foo1.  The output reg (r2)
+   matches the unshifted argument.  So it actually is a
+   (set (zero_extract a 59 0) b) */
+unsigned long
+foo2 (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & (((1UL << 5) - 1)));
+}
+
+/* risbgn cannot be used when less bits are removed with the mask.  */
+
+unsigned long
+foo1b (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 1);
+}
+
+unsigned long
+foo2b (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 1);
+}
+
+/* risbgn cannot be used when the masked bits would end up in the
+   result since a real OR is required then.  */
+unsigned long
+foo1c (unsigned long a, unsigned long b)
+{
+  return (a << 5) | (b & 127);
+}
+
+unsigned long
+foo2c (unsigned long a, unsigned long b)
+{
+  return (b << 5) | (a & 127);
+}
+
+unsigned long
+foo3 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 5) | (b >> 59);
+#else
+  return (a << 5) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4 (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 5) | (a >> 59);
+#else
+  return (b << 5) | (a >> 27);
+#endif
+}
+
+/* risbgn can be used also if there are some bits spared in the middle
+   of the two chunks.  */
+unsigned long
+foo3b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 6) | (b >> 59);
+#else
+  return (a << 6) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4b (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 6) | (a >> 59);
+#else
+  return (b << 6) | (a >> 27);
+#endif
+}
+
+/* One bit of overlap so better don't use risbgn.  */
+
+unsigned long
+foo3c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (a << 4) | (b >> 59);
+#else
+  return (a << 4) | (b >> 27);
+#endif
+}
+
+unsigned long
+foo4c (unsigned long a, unsigned long b)
+{
+#ifdef __s390x__
+  return (b << 4) | (a >> 59);
+#else
+  return (b << 4) | (a >> 27);
+#endif
+}
+
+/* { dg-final { scan-assembler-times "risbgn" 6 } } */
diff --git a/gcc/testsuite/gcc.target/s390/insv-3.c b/gcc/testsuite/gcc.target/s390/insv-3.c
new file mode 100644
index 0000000..0719750
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/insv-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=z10 -mzarch" } */
+
+/* risbg with z bit would work here but we rather want this to be a shift.  */
+struct
+{
+  int a:31;
+  int b:1;
+} s;
+
+void
+foo (int in)
+{
+  s.a = in;
+  s.b = 0;
+}
+
+/* { dg-final { scan-assembler-not "risbg" } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-07-22 13:50 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-22 13:55 [PATCH] S/390: Improve risbg usage Andreas Krebbel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).