[PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680)

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680)
@ 2019-09-10  7:45 Jakub Jelinek
  2019-09-10  8:06 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2019-09-10  7:45 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

The following patch optimizes (A / (cast) (1 << B)) -> (A >> B)
in addition to already supported (A / (1 << B)) -> (A >> B).

The patch only supports same precision cast (i.e. a sign change),
or widening cast, in that case either zero extension (then the extended
value is known to be a power of two and all is fine), or sign extension
if the first operand has all upper bits starting with the sign bit of the
narrower type clear.  That is because (unsigned long long) (1 << 31)
is 0xffffffff80000000ULL and we need to make sure that dividing by that
huge value is equal to >> 31.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-09-10  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/91680
	* match.pd ((A / (1 << B)) -> (A >> B)): Allow widening cast from
	the shift type to type.

	* gcc.dg/tree-ssa/pr91680.c: New test.
	* g++.dg/torture/pr91680.C: New test.

--- gcc/match.pd.jj	2019-09-09 17:33:08.551582878 +0200
+++ gcc/match.pd	2019-09-09 20:05:25.966107441 +0200
@@ -305,13 +305,29 @@ (define_operator_list COND_TERNARY
 /* (A / (1 << B)) -> (A >> B).
    Only for unsigned A.  For signed A, this would not preserve rounding
    toward zero.
-   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.
+   Also also widening conversions, like:
+   (A / (unsigned long long) (1U << B)) -> (A >> B)
+   or
+   (A / (unsigned long long) (1 << B)) -> (A >> B).
+   If the left shift is signed, it can be done only if the upper bits
+   of A starting from shift's type sign bit are zero, as
+   (unsigned long long) (1 << 31) is -2147483648ULL, not 2147483648ULL,
+   so it is valid only if A >> 31 is zero.  */
 (simplify
- (trunc_div @0 (lshift integer_onep@1 @2))
+ (trunc_div @0 (convert? (lshift integer_onep@1 @2)))
  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
       && (!VECTOR_TYPE_P (type)
 	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
-	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
+      && (useless_type_conversion_p (type, TREE_TYPE (@1))
+	  || (element_precision (type) >= element_precision (TREE_TYPE (@1))
+	      && (TYPE_UNSIGNED (TREE_TYPE (@1))
+		  || (element_precision (type)
+		      == element_precision (TREE_TYPE (@1)))
+		  || (get_nonzero_bits (@0)
+		      & wi::mask (element_precision (TREE_TYPE (@1)) - 1, true,
+				  element_precision (type))) == 0))))
   (rshift @0 @2)))
 
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
--- gcc/testsuite/gcc.dg/tree-ssa/pr91680.c.jj	2019-09-09 20:18:04.349619827 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr91680.c	2019-09-09 20:18:33.922171856 +0200
@@ -0,0 +1,37 @@
+/* PR middle-end/91680 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* { dg-final { scan-tree-dump-times " / " 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times " >> " 3 "forwprop1" } } */
+
+__attribute__((noipa)) unsigned long long
+foo (unsigned char x)
+{
+  unsigned long long q = 1 << x;
+  return 256 / q;
+}
+
+__attribute__((noipa)) unsigned long long
+bar (unsigned char x)
+{
+  unsigned long long q = 1U << x;
+  return 256 / q;
+}
+
+__attribute__((noipa)) unsigned long long
+baz (unsigned char x, unsigned long long y)
+{
+  /* This can't be optimized, at least not in C++ and maybe not
+     in C89, because for x 31 q is -2147483648ULL, not
+     2147483648ULL, and e.g. 2147483648ULL >> 31 is 1, while
+     2147483648ULL / -2147483648ULL is 0.  */
+  unsigned long long q = 1 << x;
+  return y / q;
+}
+
+__attribute__((noipa)) unsigned long long
+qux (unsigned char x, unsigned long long y)
+{
+  unsigned long long q = 1U << x;
+  return y / q;
+}
--- gcc/testsuite/g++.dg/torture/pr91680.C.jj	2019-09-09 20:25:51.775539166 +0200
+++ gcc/testsuite/g++.dg/torture/pr91680.C	2019-09-09 20:26:18.610132667 +0200
@@ -0,0 +1,35 @@
+/* PR middle-end/91680 */
+/* { dg-do run { target { ilp32 || lp64 } } } */
+
+extern "C" void abort ();
+
+#include "../../gcc.dg/tree-ssa/pr91680.c"
+
+int
+main ()
+{
+  unsigned char i;
+  for (i = 0; i < __SIZEOF_INT__ * __CHAR_BIT__; i++)
+    {
+      volatile unsigned long long q = 1 << i;
+      if (foo (i) != 256 / q)
+	abort ();
+      q = 1U << i;
+      if (bar (i) != 256 / q)
+	abort ();
+      q = 1 << i;
+      if (baz (i, (1U << i) - 1) != ((1U << i) - 1) / q)
+	abort ();
+      if (baz (i, 1U << i) != (1U << i) / q)
+	abort ();
+      if (baz (i, -1) != -1 / q)
+	abort ();
+      q = 1U << i;
+      if (qux (i, (1U << i) - 1) != ((1U << i) - 1) / q)
+	abort ();
+      if (qux (i, 1U << i) != (1U << i) / q)
+	abort ();
+      if (qux (i, -1) != -1 / q)
+	abort ();
+    }
+}

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680)
  2019-09-10  7:45 [PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680) Jakub Jelinek
@ 2019-09-10  8:06 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2019-09-10  8:06 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On September 10, 2019 9:45:28 AM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>The following patch optimizes (A / (cast) (1 << B)) -> (A >> B)
>in addition to already supported (A / (1 << B)) -> (A >> B).
>
>The patch only supports same precision cast (i.e. a sign change),
>or widening cast, in that case either zero extension (then the extended
>value is known to be a power of two and all is fine), or sign extension
>if the first operand has all upper bits starting with the sign bit of
>the
>narrower type clear.  That is because (unsigned long long) (1 << 31)
>is 0xffffffff80000000ULL and we need to make sure that dividing by that
>huge value is equal to >> 31.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Richard.

>2019-09-10  Jakub Jelinek  <jakub@redhat.com>
>
>	PR middle-end/91680
>	* match.pd ((A / (1 << B)) -> (A >> B)): Allow widening cast from
>	the shift type to type.
>
>	* gcc.dg/tree-ssa/pr91680.c: New test.
>	* g++.dg/torture/pr91680.C: New test.
>
>--- gcc/match.pd.jj	2019-09-09 17:33:08.551582878 +0200
>+++ gcc/match.pd	2019-09-09 20:05:25.966107441 +0200
>@@ -305,13 +305,29 @@ (define_operator_list COND_TERNARY
> /* (A / (1 << B)) -> (A >> B).
>   Only for unsigned A.  For signed A, this would not preserve rounding
>    toward zero.
>-   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
>+   For example: (-1 / ( 1 << B)) !=  -1 >> B.
>+   Also also widening conversions, like:
>+   (A / (unsigned long long) (1U << B)) -> (A >> B)
>+   or
>+   (A / (unsigned long long) (1 << B)) -> (A >> B).
>+   If the left shift is signed, it can be done only if the upper bits
>+   of A starting from shift's type sign bit are zero, as
>+   (unsigned long long) (1 << 31) is -2147483648ULL, not
>2147483648ULL,
>+   so it is valid only if A >> 31 is zero.  */
> (simplify
>- (trunc_div @0 (lshift integer_onep@1 @2))
>+ (trunc_div @0 (convert? (lshift integer_onep@1 @2)))
>  (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
>       && (!VECTOR_TYPE_P (type)
> 	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
>-	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
>+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar))
>+      && (useless_type_conversion_p (type, TREE_TYPE (@1))
>+	  || (element_precision (type) >= element_precision (TREE_TYPE (@1))
>+	      && (TYPE_UNSIGNED (TREE_TYPE (@1))
>+		  || (element_precision (type)
>+		      == element_precision (TREE_TYPE (@1)))
>+		  || (get_nonzero_bits (@0)
>+		      & wi::mask (element_precision (TREE_TYPE (@1)) - 1, true,
>+				  element_precision (type))) == 0))))
>   (rshift @0 @2)))
> 
> /* Preserve explicit divisions by 0: the C++ front-end wants to detect
>--- gcc/testsuite/gcc.dg/tree-ssa/pr91680.c.jj	2019-09-09
>20:18:04.349619827 +0200
>+++ gcc/testsuite/gcc.dg/tree-ssa/pr91680.c	2019-09-09
>20:18:33.922171856 +0200
>@@ -0,0 +1,37 @@
>+/* PR middle-end/91680 */
>+/* { dg-do compile { target { ilp32 || lp64 } } } */
>+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
>+/* { dg-final { scan-tree-dump-times " / " 1 "forwprop1" } } */
>+/* { dg-final { scan-tree-dump-times " >> " 3 "forwprop1" } } */
>+
>+__attribute__((noipa)) unsigned long long
>+foo (unsigned char x)
>+{
>+  unsigned long long q = 1 << x;
>+  return 256 / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+bar (unsigned char x)
>+{
>+  unsigned long long q = 1U << x;
>+  return 256 / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+baz (unsigned char x, unsigned long long y)
>+{
>+  /* This can't be optimized, at least not in C++ and maybe not
>+     in C89, because for x 31 q is -2147483648ULL, not
>+     2147483648ULL, and e.g. 2147483648ULL >> 31 is 1, while
>+     2147483648ULL / -2147483648ULL is 0.  */
>+  unsigned long long q = 1 << x;
>+  return y / q;
>+}
>+
>+__attribute__((noipa)) unsigned long long
>+qux (unsigned char x, unsigned long long y)
>+{
>+  unsigned long long q = 1U << x;
>+  return y / q;
>+}
>--- gcc/testsuite/g++.dg/torture/pr91680.C.jj	2019-09-09
>20:25:51.775539166 +0200
>+++ gcc/testsuite/g++.dg/torture/pr91680.C	2019-09-09
>20:26:18.610132667 +0200
>@@ -0,0 +1,35 @@
>+/* PR middle-end/91680 */
>+/* { dg-do run { target { ilp32 || lp64 } } } */
>+
>+extern "C" void abort ();
>+
>+#include "../../gcc.dg/tree-ssa/pr91680.c"
>+
>+int
>+main ()
>+{
>+  unsigned char i;
>+  for (i = 0; i < __SIZEOF_INT__ * __CHAR_BIT__; i++)
>+    {
>+      volatile unsigned long long q = 1 << i;
>+      if (foo (i) != 256 / q)
>+	abort ();
>+      q = 1U << i;
>+      if (bar (i) != 256 / q)
>+	abort ();
>+      q = 1 << i;
>+      if (baz (i, (1U << i) - 1) != ((1U << i) - 1) / q)
>+	abort ();
>+      if (baz (i, 1U << i) != (1U << i) / q)
>+	abort ();
>+      if (baz (i, -1) != -1 / q)
>+	abort ();
>+      q = 1U << i;
>+      if (qux (i, (1U << i) - 1) != ((1U << i) - 1) / q)
>+	abort ();
>+      if (qux (i, 1U << i) != (1U << i) / q)
>+	abort ();
>+      if (qux (i, -1) != -1 / q)
>+	abort ();
>+    }
>+}
>
>	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2019-09-10  8:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-10  7:45 [PATCH] Optimize (A / (cast) (1 << B)) -> (A >> B) (PR middle-end/91680) Jakub Jelinek
2019-09-10  8:06 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).