public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
@ 2017-06-12 13:20 James Greenhalgh
  2017-06-12 13:56 ` Richard Biener
  2017-06-16  9:23 ` James Greenhalgh
  0 siblings, 2 replies; 7+ messages in thread
From: James Greenhalgh @ 2017-06-12 13:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther

[-- Attachment #1: Type: text/plain, Size: 549 bytes --]


Hi,

As subject, for the testcase in the patch:

  unsigned long
  f2 (unsigned long a, int b)
  {
    unsigned long x = 1UL << b;
    return a / x;
  }

We currently generate:

  f2:
	mov	x2, 1
	lsl	x1, x2, x1
	udiv	x0, x0, x1
	ret

Which could instead be transformed to:

  f2:
	lsr	x0, x0, x1
	ret

OK?

Thanks,
James

---
gcc/

2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* match.pd (A / (1 << B) -> A >> B): New.

gcc/testsuite/

2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.dg/tree-ssa/forwprop-37.c: New.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Patch-match.pd-Fold-A-1-B-to-A-B.patch --]
[-- Type: text/x-patch; name="0001-Patch-match.pd-Fold-A-1-B-to-A-B.patch", Size: 1399 bytes --]

diff --git a/gcc/match.pd b/gcc/match.pd
index 54a8e04..656ede2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (op @0 integer_onep)
     (non_lvalue @0)))
 
+/* For unsigned A: (A / (1 << B)), (A >> B).
+   We can't do the same for signed A, as it might be negative, which would
+   introduce undefined behaviour.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if (TYPE_UNSIGNED (type))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
    undefined behavior in constexpr evaluation, and assuming that the division
    traps enables better optimizations than these anyway.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 0000000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-12 13:20 [Patch match.pd] Fold (A / (1 << B)) to (A >> B) James Greenhalgh
@ 2017-06-12 13:56 ` Richard Biener
  2017-06-16  9:23 ` James Greenhalgh
  1 sibling, 0 replies; 7+ messages in thread
From: Richard Biener @ 2017-06-12 13:56 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

On Mon, 12 Jun 2017, James Greenhalgh wrote:

> 
> Hi,
> 
> As subject, for the testcase in the patch:
> 
>   unsigned long
>   f2 (unsigned long a, int b)
>   {
>     unsigned long x = 1UL << b;
>     return a / x;
>   }
> 
> We currently generate:
> 
>   f2:
> 	mov	x2, 1
> 	lsl	x1, x2, x1
> 	udiv	x0, x0, x1
> 	ret
> 
> Which could instead be transformed to:
> 
>   f2:
> 	lsr	x0, x0, x1
> 	ret
> 
> OK?

+   We can't do the same for signed A, as it might be negative, which 
would
+   introduce undefined behaviour.  */

huh, AFAIR it is _left_ shift of negative values that invokes
undefined behavior.

Note that as you are accepting vectors you need to make sure the
target actually supports arithmetic right shift of vectors
(you only know it supports left shift and division -- so it might
be sort-of-superfluous to check in case there is no arch that supports
those but not the other).

Richard.

> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* match.pd (A / (1 << B) -> A >> B): New.
> 
> gcc/testsuite/
> 
> 2017-06-12  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-12 13:20 [Patch match.pd] Fold (A / (1 << B)) to (A >> B) James Greenhalgh
  2017-06-12 13:56 ` Richard Biener
@ 2017-06-16  9:23 ` James Greenhalgh
  2017-06-16  9:41   ` Richard Biener
  1 sibling, 1 reply; 7+ messages in thread
From: James Greenhalgh @ 2017-06-16  9:23 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther

[-- Attachment #1: Type: text/plain, Size: 2008 bytes --]


On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> On Mon, 12 Jun 2017, James Greenhalgh wrote:
>
> >
> > Hi,
> >
> > As subject, for the testcase in the patch:
> >
> >   unsigned long
> >   f2 (unsigned long a, int b)
> >   {
> >     unsigned long x = 1UL << b;
> >     return a / x;
> >   }
> >
> > We currently generate:
> >
> >   f2:
> > 	mov	x2, 1
> > 	lsl	x1, x2, x1
> > 	udiv	x0, x0, x1
> > 	ret
> >
> > Which could instead be transformed to:
> >
> >   f2:
> > 	lsr	x0, x0, x1
> > 	ret
> >
> > OK?
>
> +   We can't do the same for signed A, as it might be negative, which
> would
> +   introduce undefined behaviour.  */
>
> huh, AFAIR it is _left_ shift of negative values that invokes
> undefined behavior.

You're right this is not a clear comment. The problem is not undefined
behaviour, so that text needs to go, but rounding towards/away from zero
for signed negative values. Division will round towards zero, arithmetic
right shift away from zero. For example in:

    -1 / (1 << 1)   !=    -1 >> 1
  = -1 / 2
  = 0                     = -1

I've rewritten the comment to make it clear this is why we can only make
this optimisation for unsigned values.

See, for example, gcc.c-torture/execute/pr34070-2.c

> Note that as you are accepting vectors you need to make sure the
> target actually supports arithmetic right shift of vectors
> (you only know it supports left shift and division -- so it might
> be sort-of-superfluous to check in case there is no arch that supports
> those but not the other).

I've added a check for that using optabs, is that the right way to do this?

Bootstrapped and tested on aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
gcc/

2017-06-13  James Greenhalgh  <james.greenhalgh@arm.com>

	* match.pd (A / (1 << B) -> A >> B): New.
	* generic-match-head.c: Include optabs-tree.h.
	* gimple-match-head.c: Likewise.

gcc/testsuite/

2017-06-13  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.dg/tree-ssa/forwprop-37.c: New.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch --]
[-- Type: text/x-patch; name="0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch", Size: 2373 bytes --]

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..2bea268 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (op @0 integer_onep)
     (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if (TYPE_UNSIGNED (type)
+      && (!VECTOR_TYPE_P (type)
+          || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+          || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
    undefined behavior in constexpr evaluation, and assuming that the division
    traps enables better optimizations than these anyway.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 0000000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-16  9:23 ` James Greenhalgh
@ 2017-06-16  9:41   ` Richard Biener
  2017-06-20 15:44     ` James Greenhalgh
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2017-06-16  9:41 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

On Fri, 16 Jun 2017, James Greenhalgh wrote:

> 
> On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > On Mon, 12 Jun 2017, James Greenhalgh wrote:
> >
> > >
> > > Hi,
> > >
> > > As subject, for the testcase in the patch:
> > >
> > >   unsigned long
> > >   f2 (unsigned long a, int b)
> > >   {
> > >     unsigned long x = 1UL << b;
> > >     return a / x;
> > >   }
> > >
> > > We currently generate:
> > >
> > >   f2:
> > > 	mov	x2, 1
> > > 	lsl	x1, x2, x1
> > > 	udiv	x0, x0, x1
> > > 	ret
> > >
> > > Which could instead be transformed to:
> > >
> > >   f2:
> > > 	lsr	x0, x0, x1
> > > 	ret
> > >
> > > OK?
> >
> > +   We can't do the same for signed A, as it might be negative, which
> > would
> > +   introduce undefined behaviour.  */
> >
> > huh, AFAIR it is _left_ shift of negative values that invokes
> > undefined behavior.
> 
> You're right this is not a clear comment. The problem is not undefined
> behaviour, so that text needs to go, but rounding towards/away from zero
> for signed negative values. Division will round towards zero, arithmetic
> right shift away from zero. For example in:
> 
>     -1 / (1 << 1)   !=    -1 >> 1
>   = -1 / 2
>   = 0                     = -1
> 
> I've rewritten the comment to make it clear this is why we can only make
> this optimisation for unsigned values.

Ah, of course.  You could use

 if ((TYPE_UNSIGNED (type)
      || tree_expr_nonnegative_p (@0))

here as improvement.

> See, for example, gcc.c-torture/execute/pr34070-2.c
> 
> > Note that as you are accepting vectors you need to make sure the
> > target actually supports arithmetic right shift of vectors
> > (you only know it supports left shift and division -- so it might
> > be sort-of-superfluous to check in case there is no arch that supports
> > those but not the other).
> 
> I've added a check for that using optabs, is that the right way to do this?

+      && (!VECTOR_TYPE_P (type)
+          || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
+          || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))

is not enough -- you need sth like

 optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
 if (ot != unknown_optab
     && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
   .. ok! ...

ideally we'd have a helper for this in optab-tree.[ch], 
tree-vect-patterns.c could also make use of that.

Thanks,
Richard.


> Bootstrapped and tested on aarch64-none-linux-gnu with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-13  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* match.pd (A / (1 << B) -> A >> B): New.
> 	* generic-match-head.c: Include optabs-tree.h.
> 	* gimple-match-head.c: Likewise.
> 
> gcc/testsuite/
> 
> 2017-06-13  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-16  9:41   ` Richard Biener
@ 2017-06-20 15:44     ` James Greenhalgh
  2017-06-21  6:59       ` Richard Biener
  0 siblings, 1 reply; 7+ messages in thread
From: James Greenhalgh @ 2017-06-20 15:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther

[-- Attachment #1: Type: text/plain, Size: 2610 bytes --]


On Fri, Jun 16, 2017 at 11:41:57AM +0200, Richard Biener wrote:
> On Fri, 16 Jun 2017, James Greenhalgh wrote:
> > On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > > +   We can't do the same for signed A, as it might be negative, which
> > > would
> > > +   introduce undefined behaviour.  */
> > >
> > > huh, AFAIR it is _left_ shift of negative values that invokes
> > > undefined behavior.
> >
> > You're right this is not a clear comment. The problem is not undefined
> > behaviour, so that text needs to go, but rounding towards/away from zero
> > for signed negative values. Division will round towards zero, arithmetic
> > right shift away from zero. For example in:
> >
> >     -1 / (1 << 1)   !=    -1 >> 1
> >   = -1 / 2
> >   = 0                     = -1
> >
> > I've rewritten the comment to make it clear this is why we can only make
> > this optimisation for unsigned values.
>
> Ah, of course.  You could use
>
>  if ((TYPE_UNSIGNED (type)
>       || tree_expr_nonnegative_p (@0))
>
> here as improvement.

Thanks, I've made that change.

> > See, for example, gcc.c-torture/execute/pr34070-2.c
> >
> > > Note that as you are accepting vectors you need to make sure the
> > > target actually supports arithmetic right shift of vectors
> > > (you only know it supports left shift and division -- so it might
> > > be sort-of-superfluous to check in case there is no arch that supports
> > > those but not the other).
> >
> > I've added a check for that using optabs, is that the right way to do this?
>
> +      && (!VECTOR_TYPE_P (type)
> +          || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
> +          || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
>
> is not enough -- you need sth like
>
>  optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
>  if (ot != unknown_optab
>      && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
>    .. ok! ...
>
> ideally we'd have a helper for this in optab-tree.[ch],
> tree-vect-patterns.c could also make use of that.

OK. I've added "target_has_vector_rshift_p" for this purpose.

Bootstrapped and tested on aarch64-none-linux-gnu with no issues.

OK?

Thanks,
James

---
gcc/

2017-06-19  James Greenhalgh  <james.greenhalgh@arm.com>

	* match.pd (A / (1 << B) -> A >> B): New.
	* generic-match-head.c: Include optabs-tree.h.
	* gimple-match-head.c: Likewise.
	* optabs-tree.h (target_has_vector_rshift_p): New.
	* optabs-tree.c (target_has_vector_rshift_p): New.

gcc/testsuite/

2017-06-19  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.dg/tree-ssa/forwprop-37.c: New.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch --]
[-- Type: text/x-patch; name="0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch", Size: 3743 bytes --]

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..eb6bd59 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (op @0 integer_onep)
     (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
+      && (!VECTOR_TYPE_P (type)
+	  || target_has_vector_rshift_p (type, optab_default)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
    undefined behavior in constexpr evaluation, and assuming that the division
    traps enables better optimizations than these anyway.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 4bb54ba..4a513d2 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -376,3 +376,24 @@ init_tree_optimization_optabs (tree optnode)
       ggc_free (tmp_optabs);
     }
 }
+
+/* Return TRUE if the target has support for vector right shift of an
+   operand of type TYPE.  If OT_TYPE is OPTAB_DEFAULT, check for existence
+   of a shift by either a scalar or a vector.  Otherwise, check only
+   for a shift that matches OT_TYPE.  */
+
+bool
+target_has_vector_rshift_p (tree type, enum optab_subtype ot_type)
+{
+  gcc_assert (VECTOR_TYPE_P (type));
+  if (ot_type != optab_default)
+    {
+      optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
+      return ot != unknown_optab
+	&& optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing;
+    }
+  else
+    return target_has_vector_rshift_p (type, optab_scalar)
+	   || target_has_vector_rshift_p (type, optab_vector);
+}
+
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
index d0b27e0..958a701 100644
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -41,5 +41,6 @@ bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
 bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
 bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
 void init_tree_optimization_optabs (tree);
+bool target_has_vector_rshift_p (tree type, enum optab_subtype);
 
 #endif
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 0000000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-20 15:44     ` James Greenhalgh
@ 2017-06-21  6:59       ` Richard Biener
  2017-06-22  8:41         ` James Greenhalgh
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Biener @ 2017-06-21  6:59 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

On Tue, 20 Jun 2017, James Greenhalgh wrote:

> 
> On Fri, Jun 16, 2017 at 11:41:57AM +0200, Richard Biener wrote:
> > On Fri, 16 Jun 2017, James Greenhalgh wrote:
> > > On Mon, Jun 12, 2017 at 03:56:25PM +0200, Richard Biener wrote:
> > > > +   We can't do the same for signed A, as it might be negative, which
> > > > would
> > > > +   introduce undefined behaviour.  */
> > > >
> > > > huh, AFAIR it is _left_ shift of negative values that invokes
> > > > undefined behavior.
> > >
> > > You're right this is not a clear comment. The problem is not undefined
> > > behaviour, so that text needs to go, but rounding towards/away from zero
> > > for signed negative values. Division will round towards zero, arithmetic
> > > right shift away from zero. For example in:
> > >
> > >     -1 / (1 << 1)   !=    -1 >> 1
> > >   = -1 / 2
> > >   = 0                     = -1
> > >
> > > I've rewritten the comment to make it clear this is why we can only make
> > > this optimisation for unsigned values.
> >
> > Ah, of course.  You could use
> >
> >  if ((TYPE_UNSIGNED (type)
> >       || tree_expr_nonnegative_p (@0))
> >
> > here as improvement.
> 
> Thanks, I've made that change.
> 
> > > See, for example, gcc.c-torture/execute/pr34070-2.c
> > >
> > > > Note that as you are accepting vectors you need to make sure the
> > > > target actually supports arithmetic right shift of vectors
> > > > (you only know it supports left shift and division -- so it might
> > > > be sort-of-superfluous to check in case there is no arch that supports
> > > > those but not the other).
> > >
> > > I've added a check for that using optabs, is that the right way to do this?
> >
> > +      && (!VECTOR_TYPE_P (type)
> > +          || optab_for_tree_code (RSHIFT_EXPR, type, optab_vector)
> > +          || optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar)))
> >
> > is not enough -- you need sth like
> >
> >  optab ot = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
> >  if (ot != unknown_optab
> >      && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing)
> >    .. ok! ...
> >
> > ideally we'd have a helper for this in optab-tree.[ch],
> > tree-vect-patterns.c could also make use of that.
> 
> OK. I've added "target_has_vector_rshift_p" for this purpose.

Actually I was looking for a bit more generic

bool
target_supports_op_p (tree type, enum tree_code code,
		      enum optab_subtype = optab_default)
{
  optab ot = optab_for_tree_code (code, type, optab_subtype);
  return (ot != unknown_optab
          && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
}

and you using target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)
|| target_supports_op_p (type, RSHIFT_EXPR, optab_vector)

Ok with that change.

Thanks,
Richard.

> Bootstrapped and tested on aarch64-none-linux-gnu with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2017-06-19  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* match.pd (A / (1 << B) -> A >> B): New.
> 	* generic-match-head.c: Include optabs-tree.h.
> 	* gimple-match-head.c: Likewise.
> 	* optabs-tree.h (target_has_vector_rshift_p): New.
> 	* optabs-tree.c (target_has_vector_rshift_p): New.
> 
> gcc/testsuite/
> 
> 2017-06-19  James Greenhalgh  <james.greenhalgh@arm.com>
> 
> 	* gcc.dg/tree-ssa/forwprop-37.c: New.
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch match.pd] Fold (A / (1 << B)) to (A >> B)
  2017-06-21  6:59       ` Richard Biener
@ 2017-06-22  8:41         ` James Greenhalgh
  0 siblings, 0 replies; 7+ messages in thread
From: James Greenhalgh @ 2017-06-22  8:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rguenther

[-- Attachment #1: Type: text/plain, Size: 1182 bytes --]


On Wed, Jun 21, 2017 at 08:59:29AM +0200, Richard Biener wrote:
> Actually I was looking for a bit more generic
>
> bool
> target_supports_op_p (tree type, enum tree_code code,
> 		      enum optab_subtype = optab_default)
> {
>   optab ot = optab_for_tree_code (code, type, optab_subtype);
>   return (ot != unknown_optab
>           && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
> }
>
> and you using target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)
> || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
>
> Ok with that change.

Oh, I see what you mean. Sorry for not getting that correct last time round.

This is what I committed to trunk as revision 249502 after bootstrapping and
testing on aarch64-none-linux-gnu without any issues.

Thanks,
James

gcc/

2017-06-22  James Greenhalgh  <james.greenhalgh@arm.com>

	* match.pd (A / (1 << B) -> A >> B): New.
	* generic-match-head.c: Include optabs-tree.h.
	* gimple-match-head.c: Likewise.
	* optabs-tree.h (target_supports_op_p): New.
	* optabs-tree.c (target_supports_op_p): New.

gcc/testsuite/

2017-06-22  James Greenhalgh  <james.greenhalgh@arm.com>

	* gcc.dg/tree-ssa/forwprop-37.c: New.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch --]
[-- Type: text/x-patch; name="0001-Re-Patch-match.pd-Fold-A-1-B-to-A-B.patch", Size: 3643 bytes --]

diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c
index 0c0d182..4504401 100644
--- a/gcc/generic-match-head.c
+++ b/gcc/generic-match-head.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Routine to determine if the types T1 and T2 are effectively
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index e7e9839..5f6aa27 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimplify.h"
+#include "optabs-tree.h"
 
 
 /* Forward declarations of the private auto-generated matchers.
diff --git a/gcc/match.pd b/gcc/match.pd
index 244e9eb..862372a 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -147,6 +147,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (op @0 integer_onep)
     (non_lvalue @0)))
 
+/* (A / (1 << B)) -> (A >> B).
+   Only for unsigned A.  For signed A, this would not preserve rounding
+   toward zero.
+   For example: (-1 / ( 1 << B)) !=  -1 >> B.  */
+(simplify
+ (trunc_div @0 (lshift integer_onep@1 @2))
+ (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0))
+      && (!VECTOR_TYPE_P (type)
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_vector)
+	  || target_supports_op_p (type, RSHIFT_EXPR, optab_scalar)))
+  (rshift @0 @2)))
+
 /* Preserve explicit divisions by 0: the C++ front-end wants to detect
    undefined behavior in constexpr evaluation, and assuming that the division
    traps enables better optimizations than these anyway.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 4bb54ba..c183b14 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -376,3 +376,18 @@ init_tree_optimization_optabs (tree optnode)
       ggc_free (tmp_optabs);
     }
 }
+
+/* Return TRUE if the target has support for vector right shift of an
+   operand of type TYPE.  If OT_TYPE is OPTAB_DEFAULT, check for existence
+   of a shift by either a scalar or a vector.  Otherwise, check only
+   for a shift that matches OT_TYPE.  */
+
+bool
+target_supports_op_p (tree type, enum tree_code code,
+		      enum optab_subtype ot_subtype)
+{
+  optab ot = optab_for_tree_code (code, type, ot_subtype);
+  return (ot != unknown_optab
+	  && optab_handler (ot, TYPE_MODE (type)) != CODE_FOR_nothing);
+}
+
diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h
index d0b27e0..52e842b 100644
--- a/gcc/optabs-tree.h
+++ b/gcc/optabs-tree.h
@@ -41,5 +41,7 @@ bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
 bool expand_vec_cmp_expr_p (tree, tree, enum tree_code);
 bool expand_vec_cond_expr_p (tree, tree, enum tree_code);
 void init_tree_optimization_optabs (tree);
+bool target_supports_op_p (tree, enum tree_code,
+			   enum optab_subtype = optab_default);
 
 #endif
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
new file mode 100644
index 0000000..dec826c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-37.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-forwprop1-raw" } */
+
+unsigned int
+f1 (unsigned int a, unsigned int b)
+{
+  unsigned int x = 1U << b;
+  return a / x;
+}
+
+unsigned long
+f2 (unsigned long a, int b)
+{
+  unsigned long x = 1UL << b;
+  return a / x;
+}
+
+unsigned long long
+f3 (unsigned long long a, int b)
+{
+  unsigned long long x = 1ULL << b;
+  return a / x;
+}
+
+/* { dg-final { scan-tree-dump-not "trunc_div_expr" "forwprop1" } } */

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-06-22  8:41 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-12 13:20 [Patch match.pd] Fold (A / (1 << B)) to (A >> B) James Greenhalgh
2017-06-12 13:56 ` Richard Biener
2017-06-16  9:23 ` James Greenhalgh
2017-06-16  9:41   ` Richard Biener
2017-06-20 15:44     ` James Greenhalgh
2017-06-21  6:59       ` Richard Biener
2017-06-22  8:41         ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).