public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch/rtl-expand] Take tree range info into account to improve LSHIFT_EXP expanding
@ 2015-04-16 11:04 Jiong Wang
  2015-04-24  2:23 ` Jeff Law
  0 siblings, 1 reply; 12+ messages in thread
From: Jiong Wang @ 2015-04-16 11:04 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2508 bytes --]


This is a rework of

  https://gcc.gnu.org/ml/gcc-patches/2014-07/msg01998.html

After second thinking, I feel it's better to fix this in earlier stage
during RTL expand which is more generic, and we also avoid making the
already complex combine pass complexer.

Currently gcc expand wide mode left shift to some generic complex
instruction sequences, while if we have known the high part of wide mode
all comes from sign extension, the expand logic could be simplifed.

Given the following example,

T A = (T) B  << const_imm_shift

We know the high part of A are all comes from sign extension, if

* T is the next wider type of word_mode.

For example, for aarch64, if type T is 128int (TImode), and B is with
type SImode or DImode, then tree analyzer know that the high part of
TImode result all comes from sign extension, and kept them in range info.

 |<           T          >|
 |   high     |   low     |
              |<- sizel ->|

For above example, we could simplify the expand logic into
 1. low = low << const_imm_shift;
 2. high = low >> (sizel - const_imm_shift)  */

We can utilize the arithmetic right shift to do the sign
extension. Those reduntant instructions will be optimized out later.

For actual .s improvement,

AArch64
=======

  __int128_t
  foo (int data)
  {
    return (__int128_t) data << 50;
  }

  old:
    sxtw    x2, w0
    asr     x1, x2, 63
    lsl     x0, x2, 50
    lsl     x1, x1, 50
    orr     x1, x1, x2, lsr 14
 
  new:
    sxtw    x1, w0
    lsl     x0, x1, 50
    asr     x1, x1, 14


ARM (.fpu softvfp)
===========

  long long
  shift (int data)
  {
    return (long long) data << 20;
  }
 
  old:
    stmfd   sp!, {r4, r5}
    mov     r5, r0, asr #31
    mov     r3, r0
    mov     r0, r0, asl #20
    mov     r1, r5, asl #20
    orr     r1, r1, r3, lsr #12
    ldmfd   sp!, {r4, r5}
    bx      lr

  new:
    mov     r1, r0
    mov     r0, r0, asl #20
    mov     r1, r1, asr #12
    bx      lr

Test
====

  x86 bootstrap OK, regression test OK.
  AArch64 bootstrap OK, regression test on board OK.

Regards,
Jiong

2015-04-116  Jiong.Wang  <jiong.wang@arm.com>

gcc/
  * expr.c (expand_expr_real_2): Take tree range info into account when
  expanding LSHIFT_EXPR.

gcc/testsuite
  * gcc.dg/wide_shift_64_1.c: New testcase.
  * gcc.dg/wide_shift_128_1.c: Ditto.
  * gcc.target/aarch64/ashlti3_1.c: Ditto.
  * gcc.target/arm/ashldisi_1.c: Ditto.
  

[-- Attachment #2: range-expand.patch --]
[-- Type: text/x-diff, Size: 7360 bytes --]

diff --git a/gcc/expr.c b/gcc/expr.c
index 89ca129..96d64cc 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8984,23 +8984,85 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 
     case LSHIFT_EXPR:
     case RSHIFT_EXPR:
-      /* If this is a fixed-point operation, then we cannot use the code
-	 below because "expand_shift" doesn't support sat/no-sat fixed-point
-         shifts.   */
-      if (ALL_FIXED_POINT_MODE_P (mode))
-	goto binop;
-
-      if (! safe_from_p (subtarget, treeop1, 1))
-	subtarget = 0;
-      if (modifier == EXPAND_STACK_PARM)
-	target = 0;
-      op0 = expand_expr (treeop0, subtarget,
-			 VOIDmode, EXPAND_NORMAL);
-      temp = expand_variable_shift (code, mode, op0, treeop1, target,
-				    unsignedp);
-      if (code == LSHIFT_EXPR)
-	temp = REDUCE_BIT_FIELD (temp);
-      return temp;
+      {
+	/* If this is a fixed-point operation, then we cannot use the code
+	   below because "expand_shift" doesn't support sat/no-sat fixed-point
+	   shifts.  */
+	if (ALL_FIXED_POINT_MODE_P (mode))
+	  goto binop;
+
+	if (! safe_from_p (subtarget, treeop1, 1))
+	  subtarget = 0;
+	if (modifier == EXPAND_STACK_PARM)
+	  target = 0;
+
+	op0 = expand_expr (treeop0, subtarget,
+			   VOIDmode, EXPAND_NORMAL);
+
+	/* If mode == GET_MODE_WIDER_MODE (word_mode),
+	   then normally, there will no native instructions to support
+	   this wide mode left shift.
+
+	   given below example,
+
+	   T A = (T) B  << C
+
+	   |<		T	   >|
+	   |   high     |   low     |
+
+			|<- sizel ->|
+
+	   if from range info, we could deduce that the high part are all sign
+	   bit extension, then this left shift operation could be largely
+	   simplified into.
+
+	     1. low = low << C;
+	     2. high = low >> (sizel - C)  */
+
+	int o_bits = GET_MODE_SIZE (mode) * BITS_PER_UNIT;
+	wide_int min, max;
+
+	if (code == LSHIFT_EXPR
+	    && !unsignedp
+	    && mode == GET_MODE_WIDER_MODE (word_mode)
+	    && !have_insn_for (LSHIFT_EXPR, mode)
+	    && TREE_CONSTANT (treeop1)
+	    && get_range_info (treeop0, &min, &max) == VR_RANGE
+	    && (wi::cmp (min,
+			 wide_int::from (wi::min_value
+					 ((unsigned) (BITS_PER_WORD),
+					  SIGNED), o_bits, SIGNED),
+			 SIGNED) != -1)
+	    && (wi::cmp (max,
+			 wide_int::from (wi::max_value
+					 ((unsigned)(BITS_PER_WORD),
+					  SIGNED), o_bits, SIGNED),
+			 SIGNED) != 1))
+	  {
+	    rtx low = simplify_gen_subreg (word_mode, op0, mode, 0);
+	    rtx t_low = simplify_gen_subreg (word_mode, target, mode, 0);
+	    rtx t_high = simplify_gen_subreg (word_mode, target, mode,
+					      UNITS_PER_WORD);
+	    tree high_shift =
+	      build_int_cst (TREE_TYPE (treeop1),
+			     BITS_PER_WORD -TREE_INT_CST_LOW (treeop1));
+
+	    temp = expand_variable_shift (code, word_mode, low, treeop1,
+					  t_low, unsignedp);
+
+	    temp = expand_variable_shift (RSHIFT_EXPR, word_mode, low,
+					  high_shift, t_high, unsignedp);
+
+	    gcc_assert (GET_CODE (temp) == SUBREG);
+	    temp = XEXP (temp, 0);
+	  }
+	else
+	  temp = expand_variable_shift (code, mode, op0, treeop1, target,
+					unsignedp);
+	if (code == LSHIFT_EXPR)
+	  temp = REDUCE_BIT_FIELD (temp);
+	return temp;
+      }
 
       /* Could determine the answer when only additive constants differ.  Also,
 	 the addition of one can be handled by changing the condition.  */
diff --git a/gcc/testsuite/gcc.dg/wide-shift-128.c b/gcc/testsuite/gcc.dg/wide-shift-128.c
new file mode 100644
index 0000000..9b62715
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/wide-shift-128.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target aarch64*-*-* mips64*-*-* sparc64*-*-* } } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-O2 -fdump-rtl-combine" } */
+
+__int128_t
+load2 (int data)
+{
+    return (__int128_t) data << 50;
+}
+
+/* { dg-final { scan-rtl-dump-not "ior" "combine" } } */
+/* { dg-final { cleanup-rtl-dump "combine" } } */
diff --git a/gcc/testsuite/gcc.dg/wide-shift-64.c b/gcc/testsuite/gcc.dg/wide-shift-64.c
new file mode 100644
index 0000000..5bc278f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/wide-shift-64.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target arm*-*-* mips*-*-* sparc*-*-* } } */
+/* { dg-options "-O2 -fdump-rtl-combine" } */
+
+long long
+load1 (int data)
+{
+    return (long long) data << 12;
+}
+
+/* { dg-final { scan-rtl-dump-not "ior" "combine" } } */
+/* { dg-final { cleanup-rtl-dump "combine" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ashltidisi.c b/gcc/testsuite/gcc.target/aarch64/ashltidisi.c
new file mode 100644
index 0000000..aeb2a24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ashltidisi.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+extern void abort (void);
+
+#define GEN_TEST_CASE(x, y, z)\
+__uint128_t __attribute__ ((noinline))\
+ushift_##x##_##z (unsigned y data)\
+{\
+  return (__uint128_t) data << x;\
+}\
+__int128_t __attribute__ ((noinline)) \
+shift_##x##_##z (y data) \
+{\
+  return (__int128_t) data << x;\
+}
+
+GEN_TEST_CASE (53, int, i)
+GEN_TEST_CASE (3, long long, ll)
+GEN_TEST_CASE (13, long long, ll)
+GEN_TEST_CASE (53, long long, ll)
+
+int
+main (int argc, char **argv)
+{
+
+#define SHIFT_CHECK(x, y, z, p) \
+	if (ushift_##y##_##p (x)\
+	    != ((__uint128_t) (unsigned z) x << y)) \
+	  abort ();\
+	if (shift_##y##_##p (x)\
+	    != ((__uint128_t) (signed z) x << y)) \
+	  abort ();
+
+  SHIFT_CHECK (0x12345678, 53, int, i)
+  SHIFT_CHECK (0xcafecafe, 53, int, i)
+
+  SHIFT_CHECK (0x1234567890abcdefLL, 3, long long, ll)
+  SHIFT_CHECK (0x1234567890abcdefLL, 13, long long, ll)
+  SHIFT_CHECK (0x1234567890abcdefLL, 53, long long, ll)
+  SHIFT_CHECK (0xcafecafedeaddeadLL, 3, long long, ll)
+  SHIFT_CHECK (0xcafecafedeaddeadLL, 13, long long, ll)
+  SHIFT_CHECK (0xcafecafedeaddeadLL, 53, long long, ll)
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "asr" 4 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/arm/ashldisi.c b/gcc/testsuite/gcc.target/arm/ashldisi.c
new file mode 100644
index 0000000..00dc06e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/ashldisi.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+
+extern void abort (void);
+
+#define GEN_TEST_CASE(x)\
+unsigned long long __attribute__ ((noinline))\
+ushift_ ## x (unsigned int data)\
+{\
+  return (unsigned long long) data << x;\
+}\
+long long __attribute__ ((noinline)) \
+shift_ ## x (int data) \
+{\
+  return (long long) data << x;\
+}
+
+GEN_TEST_CASE (3)
+GEN_TEST_CASE (23)
+GEN_TEST_CASE (30)
+int
+main (int argc, char **argv)
+{
+
+#define SHIFT_CHECK(x, y) \
+	if (ushift_ ## y (x)\
+	    != ((unsigned long long) (unsigned) x << y)) \
+	  abort (); \
+	if (shift_ ## y (x)\
+	    != ((long long) (signed) x << y)) \
+	  abort ();
+
+  SHIFT_CHECK (0x12345678, 3)
+  SHIFT_CHECK (0xcafecafe, 3)
+  SHIFT_CHECK (0x12345678, 23)
+  SHIFT_CHECK (0xcafecafe, 23)
+  SHIFT_CHECK (0x12345678, 30)
+  SHIFT_CHECK (0xcafecafe, 30)
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "asr" 3 } } */
+/* { dg-final { cleanup-saved-temps } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-08-19 22:55 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-16 11:04 [Patch/rtl-expand] Take tree range info into account to improve LSHIFT_EXP expanding Jiong Wang
2015-04-24  2:23 ` Jeff Law
2015-04-27 20:58   ` Jiong Wang
2015-04-29  3:53     ` Jeff Law
2015-04-29 22:14       ` Jiong Wang
2015-04-29 22:55         ` Jeff Law
2015-08-14 17:55           ` Jiong Wang
2015-08-14 20:30             ` Jeff Law
2015-08-14 22:24               ` Jiong Wang
2015-08-18 13:22                 ` Jiong Wang
2015-08-18 17:47                   ` Jeff Law
2015-08-19 23:05                     ` Jiong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).