public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] match.pd: Optimize __builtin_mul_overflow_p (x, cst, (stype)0) [PR105777]
@ 2022-06-02 13:11 Jakub Jelinek
  2022-06-02 13:46 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2022-06-02 13:11 UTC (permalink / raw)
  To: Richard Biener, Jeff Law; +Cc: gcc-patches

Hi!

The following patch is an incremental change to the PR30314 enhancement,
this one handles signed types.
For signed types (but still, the same for 1st and result element type
and non-zero constant that fits into that type), we actually need to
watch for overflow in direction to positive and negative infinity
and it also depends on whether the cst operand is positive or negative.
For __builtin_mul_overflow_p (x, cst, (stype) 0):
For cst > 0, we can simplify it to:
x > INT_MAX / cst || x < INT_MIN / cst
aka:
x + (unsigned) (INT_MIN / cst) > (unsigned) (INT_MAX / cst) - (unsigned) (INT_MIN / cst)
and for cst < 0 to:
x < INT_MAX / cst || x > INT_MIN / cst
aka:
x + (unsigned) (INT_MAX / cst) > (unsigned) (INT_MIN / cst) - (unsigned) (INT_MAX / cst)

Additionally, I've added executable testcases, so we don't just check for
the optimization to be performed, but also that it is correct (done that
even for the other PR's testcase).

Starting x86_64-linux and i686-linux bootstrap/regtest, ok for trunk if
it passes them?

2022-06-02  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/30314
	PR middle-end/105777
	* match.pd (__builtin_mul_overflow_p (x, cst, (stype) 0) ->
	x > stype_max / cst || x < stype_min / cst): New simplification.

	* gcc.dg/tree-ssa/pr30314.c: Add noipa attribute to all functions.
	* gcc.dg/tree-ssa/pr105777.c: New test.
	* gcc.c-torture/execute/pr30314.c: New test.
	* gcc.c-torture/execute/pr105777.c: New test.

--- gcc/match.pd.jj	2022-06-01 17:54:30.536372912 +0200
+++ gcc/match.pd	2022-06-02 13:16:17.171415948 +0200
@@ -5970,15 +5970,37 @@ (define_operator_list SYNC_FETCH_AND_AND
    (ovf @1 @0))))
 
 /* Optimize __builtin_mul_overflow_p (x, cst, (utype) 0) if all 3 types
-   are unsigned to x > (umax / cst).  */
+   are unsigned to x > (umax / cst).  Similarly for signed type, but
+   in that case it needs to be outside of a range.  */
 (simplify
  (imagpart (IFN_MUL_OVERFLOW:cs@2 @0 integer_nonzerop@1))
   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-       && TYPE_UNSIGNED (TREE_TYPE (@0))
        && TYPE_MAX_VALUE (TREE_TYPE (@0))
        && types_match (TREE_TYPE (@0), TREE_TYPE (TREE_TYPE (@2)))
        && int_fits_type_p (@1, TREE_TYPE (@0)))
-   (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))))
+   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+    (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))
+    (if (TYPE_MIN_VALUE (TREE_TYPE (@0)))
+     (with
+      {
+	tree lo = int_const_binop (TRUNC_DIV_EXPR,
+				   TYPE_MIN_VALUE (TREE_TYPE (@0)),
+				   fold_convert (TREE_TYPE (@0), @1));
+	tree hi = int_const_binop (TRUNC_DIV_EXPR,
+				   TYPE_MAX_VALUE (TREE_TYPE (@0)),
+				   fold_convert (TREE_TYPE (@0), @1));
+	tree etype = range_check_type (TREE_TYPE (@0));
+	if (etype)
+	  {
+	    if (wi::neg_p (wi::to_wide (@1)))
+	      std::swap (lo, hi);
+	    lo = fold_convert (etype, lo);
+	    hi = fold_convert (etype, hi);
+	    hi = int_const_binop (MINUS_EXPR, hi, lo);
+	  }
+      }
+      (if (etype)
+       (convert (gt (minus (convert:etype @0) { lo; }) { hi; }))))))))
 
 /* Simplification of math builtins.  These rules must all be optimizations
    as well as IL simplifications.  If there is a possibility that the new
--- gcc/testsuite/gcc.dg/tree-ssa/pr30314.c.jj	2022-06-02 11:17:23.689835550 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr30314.c	2022-06-02 14:23:19.294093445 +0200
@@ -7,25 +7,25 @@
 /* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
 /* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
 
-int
+__attribute__((noipa)) int
 foo (unsigned int x)
 {
   return __builtin_mul_overflow_p (x, 35U, 0U);
 }
 
-int
+__attribute__((noipa)) int
 bar (unsigned long int x)
 {
   return __builtin_mul_overflow_p (x, 35UL, 0UL);
 }
 
-int
+__attribute__((noipa)) int
 baz (unsigned int x)
 {
   return __builtin_mul_overflow_p (42, x, 0U);
 }
 
-int
+__attribute__((noipa)) int
 qux (unsigned long int x)
 {
   return __builtin_mul_overflow_p (42, x, 0UL);
--- gcc/testsuite/gcc.dg/tree-ssa/pr105777.c.jj	2022-06-02 14:22:57.017328731 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr105777.c	2022-06-02 14:19:29.399521503 +0200
@@ -0,0 +1,68 @@
+/* PR middle-end/105777 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "\.MUL_OVERFLOW " "optimized" } } */
+/* { dg-final { scan-tree-dump " \\+ 61356675;" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 122713350" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 263524915338707880" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 527049830677415760" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 51130563" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 219604096115589900" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 55063683;" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 110127366" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 236496718893712200" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 472993437787424400" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 46684427" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 93368854" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 200508087757712517" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 401016175515425034" "optimized" { target lp64 } } } */
+
+__attribute__((noipa)) int
+foo (int x)
+{
+  return __builtin_mul_overflow_p (x, 35, 0);
+}
+
+__attribute__((noipa)) int
+bar (long int x)
+{
+  return __builtin_mul_overflow_p (x, 35L, 0L);
+}
+
+__attribute__((noipa)) int
+baz (int x)
+{
+  return __builtin_mul_overflow_p (42, x, 0);
+}
+
+__attribute__((noipa)) int
+qux (long int x)
+{
+  return __builtin_mul_overflow_p (42, x, 0L);
+}
+
+__attribute__((noipa)) int
+corge (int x)
+{
+  return __builtin_mul_overflow_p (x, -39, 0);
+}
+
+__attribute__((noipa)) int
+garply (long int x)
+{
+  return __builtin_mul_overflow_p (x, -39L, 0L);
+}
+
+__attribute__((noipa)) int
+grault (int x)
+{
+  return __builtin_mul_overflow_p (-46, x, 0);
+}
+
+__attribute__((noipa)) int
+waldo (long int x)
+{
+  return __builtin_mul_overflow_p (-46, x, 0L);
+}
--- gcc/testsuite/gcc.c-torture/execute/pr30314.c.jj	2022-06-02 14:32:30.070276332 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr30314.c	2022-06-02 14:32:22.121360286 +0200
@@ -0,0 +1,29 @@
+/* PR middle-end/30314 */
+
+#include "../../gcc.dg/tree-ssa/pr30314.c"
+
+int
+main ()
+{
+  if (foo (0) != 0
+      || foo (~0U / 35) != 0
+      || foo (~0U / 35 + 1) != 1
+      || foo (~0U) != 1)
+    __builtin_abort ();
+  if (bar (0) != 0
+      || bar (~0UL / 35) != 0
+      || bar (~0UL / 35 + 1) != 1
+      || bar (~0UL) != 1)
+    __builtin_abort ();
+  if (baz (0) != 0
+      || baz (~0U / 42) != 0
+      || baz (~0U / 42 + 1) != 1
+      || baz (~0U) != 1)
+    __builtin_abort ();
+  if (qux (0) != 0
+      || qux (~0UL / 42) != 0
+      || qux (~0UL / 42 + 1) != 1
+      || qux (~0UL) != 1)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr105777.c.jj	2022-06-02 14:32:50.287062810 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr105777.c	2022-06-02 14:38:36.289409212 +0200
@@ -0,0 +1,73 @@
+/* PR middle-end/105777 */
+
+#include "../../gcc.dg/tree-ssa/pr105777.c"
+
+int
+main ()
+{
+  if (foo (0) != 0
+      || foo (__INT_MAX__ / 35) != 0
+      || foo (__INT_MAX__ / 35 + 1) != 1
+      || foo (__INT_MAX__) != 1
+      || foo ((-__INT_MAX__ - 1) / 35) != 0
+      || foo ((-__INT_MAX__ - 1) / 35 - 1) != 1
+      || foo (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (bar (0) != 0
+      || bar (__LONG_MAX__ / 35) != 0
+      || bar (__LONG_MAX__ / 35 + 1) != 1
+      || bar (__LONG_MAX__) != 1
+      || bar ((-__LONG_MAX__ - 1) / 35) != 0
+      || bar ((-__LONG_MAX__ - 1) / 35 - 1) != 1
+      || bar (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (baz (0) != 0
+      || baz (__INT_MAX__ / 42) != 0
+      || baz (__INT_MAX__ / 42 + 1) != 1
+      || baz (__INT_MAX__) != 1
+      || baz ((-__INT_MAX__ - 1) / 42) != 0
+      || baz ((-__INT_MAX__ - 1) / 42 - 1) != 1
+      || baz (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (qux (0) != 0
+      || qux (__LONG_MAX__ / 42) != 0
+      || qux (__LONG_MAX__ / 42 + 1) != 1
+      || qux (__LONG_MAX__) != 1
+      || qux ((-__LONG_MAX__ - 1) / 42) != 0
+      || qux ((-__LONG_MAX__ - 1) / 42 - 1) != 1
+      || qux (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (corge (0) != 0
+      || corge (__INT_MAX__ / -39) != 0
+      || corge (__INT_MAX__ / -39 - 1) != 1
+      || corge (__INT_MAX__) != 1
+      || corge ((-__INT_MAX__ - 1) / -39) != 0
+      || corge ((-__INT_MAX__ - 1) / -39 + 1) != 1
+      || corge (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (garply (0) != 0
+      || garply (__LONG_MAX__ / -39) != 0
+      || garply (__LONG_MAX__ / -39 - 1) != 1
+      || garply (__LONG_MAX__) != 1
+      || garply ((-__LONG_MAX__ - 1) / -39) != 0
+      || garply ((-__LONG_MAX__ - 1) / -39 + 1) != 1
+      || garply (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (grault (0) != 0
+      || grault (__INT_MAX__ / -46) != 0
+      || grault (__INT_MAX__ / -46 - 1) != 1
+      || grault (__INT_MAX__) != 1
+      || grault ((-__INT_MAX__ - 1) / -46) != 0
+      || grault ((-__INT_MAX__ - 1) / -46 + 1) != 1
+      || grault (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (waldo (0) != 0
+      || waldo (__LONG_MAX__ / -46) != 0
+      || waldo (__LONG_MAX__ / -46 - 1) != 1
+      || waldo (__LONG_MAX__) != 1
+      || waldo ((-__LONG_MAX__ - 1) / -46) != 0
+      || waldo ((-__LONG_MAX__ - 1) / -46 + 1) != 1
+      || waldo (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  return 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] match.pd: Optimize __builtin_mul_overflow_p (x, cst, (stype)0) [PR105777]
  2022-06-02 13:11 [PATCH] match.pd: Optimize __builtin_mul_overflow_p (x, cst, (stype)0) [PR105777] Jakub Jelinek
@ 2022-06-02 13:46 ` Richard Biener
  2022-06-03  9:49   ` Jakub Jelinek
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2022-06-02 13:46 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, gcc-patches

On Thu, 2 Jun 2022, Jakub Jelinek wrote:

> Hi!
> 
> The following patch is an incremental change to the PR30314 enhancement,
> this one handles signed types.
> For signed types (but still, the same for 1st and result element type
> and non-zero constant that fits into that type), we actually need to
> watch for overflow in direction to positive and negative infinity
> and it also depends on whether the cst operand is positive or negative.
> For __builtin_mul_overflow_p (x, cst, (stype) 0):
> For cst > 0, we can simplify it to:
> x > INT_MAX / cst || x < INT_MIN / cst
> aka:
> x + (unsigned) (INT_MIN / cst) > (unsigned) (INT_MAX / cst) - (unsigned) (INT_MIN / cst)
> and for cst < 0 to:
> x < INT_MAX / cst || x > INT_MIN / cst
> aka:
> x + (unsigned) (INT_MAX / cst) > (unsigned) (INT_MIN / cst) - (unsigned) (INT_MAX / cst)
> 
> Additionally, I've added executable testcases, so we don't just check for
> the optimization to be performed, but also that it is correct (done that
> even for the other PR's testcase).
> 
> Starting x86_64-linux and i686-linux bootstrap/regtest, ok for trunk if
> it passes them?

OK.

Thanks,
Richard.

> 2022-06-02  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/30314
> 	PR middle-end/105777
> 	* match.pd (__builtin_mul_overflow_p (x, cst, (stype) 0) ->
> 	x > stype_max / cst || x < stype_min / cst): New simplification.
> 
> 	* gcc.dg/tree-ssa/pr30314.c: Add noipa attribute to all functions.
> 	* gcc.dg/tree-ssa/pr105777.c: New test.
> 	* gcc.c-torture/execute/pr30314.c: New test.
> 	* gcc.c-torture/execute/pr105777.c: New test.
> 
> --- gcc/match.pd.jj	2022-06-01 17:54:30.536372912 +0200
> +++ gcc/match.pd	2022-06-02 13:16:17.171415948 +0200
> @@ -5970,15 +5970,37 @@ (define_operator_list SYNC_FETCH_AND_AND
>     (ovf @1 @0))))
>  
>  /* Optimize __builtin_mul_overflow_p (x, cst, (utype) 0) if all 3 types
> -   are unsigned to x > (umax / cst).  */
> +   are unsigned to x > (umax / cst).  Similarly for signed type, but
> +   in that case it needs to be outside of a range.  */
>  (simplify
>   (imagpart (IFN_MUL_OVERFLOW:cs@2 @0 integer_nonzerop@1))
>    (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> -       && TYPE_UNSIGNED (TREE_TYPE (@0))
>         && TYPE_MAX_VALUE (TREE_TYPE (@0))
>         && types_match (TREE_TYPE (@0), TREE_TYPE (TREE_TYPE (@2)))
>         && int_fits_type_p (@1, TREE_TYPE (@0)))
> -   (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))))
> +   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
> +    (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))
> +    (if (TYPE_MIN_VALUE (TREE_TYPE (@0)))
> +     (with
> +      {
> +	tree lo = int_const_binop (TRUNC_DIV_EXPR,
> +				   TYPE_MIN_VALUE (TREE_TYPE (@0)),
> +				   fold_convert (TREE_TYPE (@0), @1));
> +	tree hi = int_const_binop (TRUNC_DIV_EXPR,
> +				   TYPE_MAX_VALUE (TREE_TYPE (@0)),
> +				   fold_convert (TREE_TYPE (@0), @1));
> +	tree etype = range_check_type (TREE_TYPE (@0));
> +	if (etype)
> +	  {
> +	    if (wi::neg_p (wi::to_wide (@1)))
> +	      std::swap (lo, hi);
> +	    lo = fold_convert (etype, lo);
> +	    hi = fold_convert (etype, hi);
> +	    hi = int_const_binop (MINUS_EXPR, hi, lo);
> +	  }
> +      }
> +      (if (etype)
> +       (convert (gt (minus (convert:etype @0) { lo; }) { hi; }))))))))
>  
>  /* Simplification of math builtins.  These rules must all be optimizations
>     as well as IL simplifications.  If there is a possibility that the new
> --- gcc/testsuite/gcc.dg/tree-ssa/pr30314.c.jj	2022-06-02 11:17:23.689835550 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr30314.c	2022-06-02 14:23:19.294093445 +0200
> @@ -7,25 +7,25 @@
>  /* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
>  /* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
>  
> -int
> +__attribute__((noipa)) int
>  foo (unsigned int x)
>  {
>    return __builtin_mul_overflow_p (x, 35U, 0U);
>  }
>  
> -int
> +__attribute__((noipa)) int
>  bar (unsigned long int x)
>  {
>    return __builtin_mul_overflow_p (x, 35UL, 0UL);
>  }
>  
> -int
> +__attribute__((noipa)) int
>  baz (unsigned int x)
>  {
>    return __builtin_mul_overflow_p (42, x, 0U);
>  }
>  
> -int
> +__attribute__((noipa)) int
>  qux (unsigned long int x)
>  {
>    return __builtin_mul_overflow_p (42, x, 0UL);
> --- gcc/testsuite/gcc.dg/tree-ssa/pr105777.c.jj	2022-06-02 14:22:57.017328731 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr105777.c	2022-06-02 14:19:29.399521503 +0200
> @@ -0,0 +1,68 @@
> +/* PR middle-end/105777 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-not "\.MUL_OVERFLOW " "optimized" } } */
> +/* { dg-final { scan-tree-dump " \\+ 61356675;" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " > 122713350" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 263524915338707880" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " > 527049830677415760" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 51130563" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 219604096115589900" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 55063683;" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " > 110127366" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 236496718893712200" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " > 472993437787424400" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 46684427" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " > 93368854" "optimized" { target int32 } } } */
> +/* { dg-final { scan-tree-dump " \\+ 200508087757712517" "optimized" { target lp64 } } } */
> +/* { dg-final { scan-tree-dump " > 401016175515425034" "optimized" { target lp64 } } } */
> +
> +__attribute__((noipa)) int
> +foo (int x)
> +{
> +  return __builtin_mul_overflow_p (x, 35, 0);
> +}
> +
> +__attribute__((noipa)) int
> +bar (long int x)
> +{
> +  return __builtin_mul_overflow_p (x, 35L, 0L);
> +}
> +
> +__attribute__((noipa)) int
> +baz (int x)
> +{
> +  return __builtin_mul_overflow_p (42, x, 0);
> +}
> +
> +__attribute__((noipa)) int
> +qux (long int x)
> +{
> +  return __builtin_mul_overflow_p (42, x, 0L);
> +}
> +
> +__attribute__((noipa)) int
> +corge (int x)
> +{
> +  return __builtin_mul_overflow_p (x, -39, 0);
> +}
> +
> +__attribute__((noipa)) int
> +garply (long int x)
> +{
> +  return __builtin_mul_overflow_p (x, -39L, 0L);
> +}
> +
> +__attribute__((noipa)) int
> +grault (int x)
> +{
> +  return __builtin_mul_overflow_p (-46, x, 0);
> +}
> +
> +__attribute__((noipa)) int
> +waldo (long int x)
> +{
> +  return __builtin_mul_overflow_p (-46, x, 0L);
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr30314.c.jj	2022-06-02 14:32:30.070276332 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr30314.c	2022-06-02 14:32:22.121360286 +0200
> @@ -0,0 +1,29 @@
> +/* PR middle-end/30314 */
> +
> +#include "../../gcc.dg/tree-ssa/pr30314.c"
> +
> +int
> +main ()
> +{
> +  if (foo (0) != 0
> +      || foo (~0U / 35) != 0
> +      || foo (~0U / 35 + 1) != 1
> +      || foo (~0U) != 1)
> +    __builtin_abort ();
> +  if (bar (0) != 0
> +      || bar (~0UL / 35) != 0
> +      || bar (~0UL / 35 + 1) != 1
> +      || bar (~0UL) != 1)
> +    __builtin_abort ();
> +  if (baz (0) != 0
> +      || baz (~0U / 42) != 0
> +      || baz (~0U / 42 + 1) != 1
> +      || baz (~0U) != 1)
> +    __builtin_abort ();
> +  if (qux (0) != 0
> +      || qux (~0UL / 42) != 0
> +      || qux (~0UL / 42 + 1) != 1
> +      || qux (~0UL) != 1)
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr105777.c.jj	2022-06-02 14:32:50.287062810 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr105777.c	2022-06-02 14:38:36.289409212 +0200
> @@ -0,0 +1,73 @@
> +/* PR middle-end/105777 */
> +
> +#include "../../gcc.dg/tree-ssa/pr105777.c"
> +
> +int
> +main ()
> +{
> +  if (foo (0) != 0
> +      || foo (__INT_MAX__ / 35) != 0
> +      || foo (__INT_MAX__ / 35 + 1) != 1
> +      || foo (__INT_MAX__) != 1
> +      || foo ((-__INT_MAX__ - 1) / 35) != 0
> +      || foo ((-__INT_MAX__ - 1) / 35 - 1) != 1
> +      || foo (-__INT_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (bar (0) != 0
> +      || bar (__LONG_MAX__ / 35) != 0
> +      || bar (__LONG_MAX__ / 35 + 1) != 1
> +      || bar (__LONG_MAX__) != 1
> +      || bar ((-__LONG_MAX__ - 1) / 35) != 0
> +      || bar ((-__LONG_MAX__ - 1) / 35 - 1) != 1
> +      || bar (-__LONG_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (baz (0) != 0
> +      || baz (__INT_MAX__ / 42) != 0
> +      || baz (__INT_MAX__ / 42 + 1) != 1
> +      || baz (__INT_MAX__) != 1
> +      || baz ((-__INT_MAX__ - 1) / 42) != 0
> +      || baz ((-__INT_MAX__ - 1) / 42 - 1) != 1
> +      || baz (-__INT_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (qux (0) != 0
> +      || qux (__LONG_MAX__ / 42) != 0
> +      || qux (__LONG_MAX__ / 42 + 1) != 1
> +      || qux (__LONG_MAX__) != 1
> +      || qux ((-__LONG_MAX__ - 1) / 42) != 0
> +      || qux ((-__LONG_MAX__ - 1) / 42 - 1) != 1
> +      || qux (-__LONG_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (corge (0) != 0
> +      || corge (__INT_MAX__ / -39) != 0
> +      || corge (__INT_MAX__ / -39 - 1) != 1
> +      || corge (__INT_MAX__) != 1
> +      || corge ((-__INT_MAX__ - 1) / -39) != 0
> +      || corge ((-__INT_MAX__ - 1) / -39 + 1) != 1
> +      || corge (-__INT_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (garply (0) != 0
> +      || garply (__LONG_MAX__ / -39) != 0
> +      || garply (__LONG_MAX__ / -39 - 1) != 1
> +      || garply (__LONG_MAX__) != 1
> +      || garply ((-__LONG_MAX__ - 1) / -39) != 0
> +      || garply ((-__LONG_MAX__ - 1) / -39 + 1) != 1
> +      || garply (-__LONG_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (grault (0) != 0
> +      || grault (__INT_MAX__ / -46) != 0
> +      || grault (__INT_MAX__ / -46 - 1) != 1
> +      || grault (__INT_MAX__) != 1
> +      || grault ((-__INT_MAX__ - 1) / -46) != 0
> +      || grault ((-__INT_MAX__ - 1) / -46 + 1) != 1
> +      || grault (-__INT_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  if (waldo (0) != 0
> +      || waldo (__LONG_MAX__ / -46) != 0
> +      || waldo (__LONG_MAX__ / -46 - 1) != 1
> +      || waldo (__LONG_MAX__) != 1
> +      || waldo ((-__LONG_MAX__ - 1) / -46) != 0
> +      || waldo ((-__LONG_MAX__ - 1) / -46 + 1) != 1
> +      || waldo (-__LONG_MAX__ - 1) != 1)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] match.pd: Optimize __builtin_mul_overflow_p (x, cst, (stype)0) [PR105777]
  2022-06-02 13:46 ` Richard Biener
@ 2022-06-03  9:49   ` Jakub Jelinek
  0 siblings, 0 replies; 3+ messages in thread
From: Jakub Jelinek @ 2022-06-03  9:49 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, gcc-patches

On Thu, Jun 02, 2022 at 01:46:25PM +0000, Richard Biener wrote:
> > Starting x86_64-linux and i686-linux bootstrap/regtest, ok for trunk if
> > it passes them?
> 
> OK.

Testing found one special case that wasn't handled correctly in the patch,
when @1 is -1, INT_MIN / -1 during the lo computation overflows and in the
end it resulted in __builtin_mul_overflow_p (x, -1, 0) being always 0.
The correct test is x == INT_MIN though.

Here is the slightly adjusted testcase (just addition of
      (if (integer_minus_onep (@1))
       (convert (eq @0 { TYPE_MIN_VALUE (TREE_TYPE (@0)); }))
and one ) and some reindentation), which I've now committed.

Thanks.

2022-06-03  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/30314
	PR middle-end/105777
	* match.pd (__builtin_mul_overflow_p (x, cst, (stype) 0) ->
	x > stype_max / cst || x < stype_min / cst): New simplification.

	* gcc.dg/tree-ssa/pr30314.c: Add noipa attribute to all functions.
	* gcc.dg/tree-ssa/pr105777.c: New test.
	* gcc.c-torture/execute/pr30314.c: New test.
	* gcc.c-torture/execute/pr105777.c: New test.

--- gcc/match.pd.jj	2022-06-01 17:54:30.536372912 +0200
+++ gcc/match.pd	2022-06-03 09:16:39.872567123 +0200
@@ -5970,15 +5970,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (ovf @1 @0))))
 
 /* Optimize __builtin_mul_overflow_p (x, cst, (utype) 0) if all 3 types
-   are unsigned to x > (umax / cst).  */
+   are unsigned to x > (umax / cst).  Similarly for signed type, but
+   in that case it needs to be outside of a range.  */
 (simplify
  (imagpart (IFN_MUL_OVERFLOW:cs@2 @0 integer_nonzerop@1))
   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
-       && TYPE_UNSIGNED (TREE_TYPE (@0))
        && TYPE_MAX_VALUE (TREE_TYPE (@0))
        && types_match (TREE_TYPE (@0), TREE_TYPE (TREE_TYPE (@2)))
        && int_fits_type_p (@1, TREE_TYPE (@0)))
-   (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))))
+   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+    (convert (gt @0 (trunc_div! { TYPE_MAX_VALUE (TREE_TYPE (@0)); } @1)))
+    (if (TYPE_MIN_VALUE (TREE_TYPE (@0)))
+     (if (integer_minus_onep (@1))
+      (convert (eq @0 { TYPE_MIN_VALUE (TREE_TYPE (@0)); }))
+      (with
+       {
+	 tree lo = int_const_binop (TRUNC_DIV_EXPR,
+				    TYPE_MIN_VALUE (TREE_TYPE (@0)),
+				    fold_convert (TREE_TYPE (@0), @1));
+	 tree hi = int_const_binop (TRUNC_DIV_EXPR,
+				    TYPE_MAX_VALUE (TREE_TYPE (@0)),
+				    fold_convert (TREE_TYPE (@0), @1));
+	 tree etype = range_check_type (TREE_TYPE (@0));
+	 if (etype)
+	   {
+	     if (wi::neg_p (wi::to_wide (@1)))
+	       std::swap (lo, hi);
+	     lo = fold_convert (etype, lo);
+	     hi = fold_convert (etype, hi);
+	     hi = int_const_binop (MINUS_EXPR, hi, lo);
+	   }
+       }
+       (if (etype)
+        (convert (gt (minus (convert:etype @0) { lo; }) { hi; })))))))))
 
 /* Simplification of math builtins.  These rules must all be optimizations
    as well as IL simplifications.  If there is a possibility that the new
--- gcc/testsuite/gcc.dg/tree-ssa/pr30314.c.jj	2022-06-02 11:17:23.689835550 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr30314.c	2022-06-02 14:23:19.294093445 +0200
@@ -7,25 +7,25 @@
 /* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
 /* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
 
-int
+__attribute__((noipa)) int
 foo (unsigned int x)
 {
   return __builtin_mul_overflow_p (x, 35U, 0U);
 }
 
-int
+__attribute__((noipa)) int
 bar (unsigned long int x)
 {
   return __builtin_mul_overflow_p (x, 35UL, 0UL);
 }
 
-int
+__attribute__((noipa)) int
 baz (unsigned int x)
 {
   return __builtin_mul_overflow_p (42, x, 0U);
 }
 
-int
+__attribute__((noipa)) int
 qux (unsigned long int x)
 {
   return __builtin_mul_overflow_p (42, x, 0UL);
--- gcc/testsuite/gcc.dg/tree-ssa/pr105777.c.jj	2022-06-02 14:22:57.017328731 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr105777.c	2022-06-02 14:19:29.399521503 +0200
@@ -0,0 +1,68 @@
+/* PR middle-end/105777 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "\.MUL_OVERFLOW " "optimized" } } */
+/* { dg-final { scan-tree-dump " \\+ 61356675;" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 122713350" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 263524915338707880" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 527049830677415760" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 51130563" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 102261126" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 219604096115589900" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 439208192231179800" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 55063683;" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 110127366" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 236496718893712200" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 472993437787424400" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " \\+ 46684427" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " > 93368854" "optimized" { target int32 } } } */
+/* { dg-final { scan-tree-dump " \\+ 200508087757712517" "optimized" { target lp64 } } } */
+/* { dg-final { scan-tree-dump " > 401016175515425034" "optimized" { target lp64 } } } */
+
+__attribute__((noipa)) int
+foo (int x)
+{
+  return __builtin_mul_overflow_p (x, 35, 0);
+}
+
+__attribute__((noipa)) int
+bar (long int x)
+{
+  return __builtin_mul_overflow_p (x, 35L, 0L);
+}
+
+__attribute__((noipa)) int
+baz (int x)
+{
+  return __builtin_mul_overflow_p (42, x, 0);
+}
+
+__attribute__((noipa)) int
+qux (long int x)
+{
+  return __builtin_mul_overflow_p (42, x, 0L);
+}
+
+__attribute__((noipa)) int
+corge (int x)
+{
+  return __builtin_mul_overflow_p (x, -39, 0);
+}
+
+__attribute__((noipa)) int
+garply (long int x)
+{
+  return __builtin_mul_overflow_p (x, -39L, 0L);
+}
+
+__attribute__((noipa)) int
+grault (int x)
+{
+  return __builtin_mul_overflow_p (-46, x, 0);
+}
+
+__attribute__((noipa)) int
+waldo (long int x)
+{
+  return __builtin_mul_overflow_p (-46, x, 0L);
+}
--- gcc/testsuite/gcc.c-torture/execute/pr30314.c.jj	2022-06-02 14:32:30.070276332 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr30314.c	2022-06-02 14:32:22.121360286 +0200
@@ -0,0 +1,29 @@
+/* PR middle-end/30314 */
+
+#include "../../gcc.dg/tree-ssa/pr30314.c"
+
+int
+main ()
+{
+  if (foo (0) != 0
+      || foo (~0U / 35) != 0
+      || foo (~0U / 35 + 1) != 1
+      || foo (~0U) != 1)
+    __builtin_abort ();
+  if (bar (0) != 0
+      || bar (~0UL / 35) != 0
+      || bar (~0UL / 35 + 1) != 1
+      || bar (~0UL) != 1)
+    __builtin_abort ();
+  if (baz (0) != 0
+      || baz (~0U / 42) != 0
+      || baz (~0U / 42 + 1) != 1
+      || baz (~0U) != 1)
+    __builtin_abort ();
+  if (qux (0) != 0
+      || qux (~0UL / 42) != 0
+      || qux (~0UL / 42 + 1) != 1
+      || qux (~0UL) != 1)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr105777.c.jj	2022-06-02 14:32:50.287062810 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr105777.c	2022-06-02 14:38:36.289409212 +0200
@@ -0,0 +1,73 @@
+/* PR middle-end/105777 */
+
+#include "../../gcc.dg/tree-ssa/pr105777.c"
+
+int
+main ()
+{
+  if (foo (0) != 0
+      || foo (__INT_MAX__ / 35) != 0
+      || foo (__INT_MAX__ / 35 + 1) != 1
+      || foo (__INT_MAX__) != 1
+      || foo ((-__INT_MAX__ - 1) / 35) != 0
+      || foo ((-__INT_MAX__ - 1) / 35 - 1) != 1
+      || foo (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (bar (0) != 0
+      || bar (__LONG_MAX__ / 35) != 0
+      || bar (__LONG_MAX__ / 35 + 1) != 1
+      || bar (__LONG_MAX__) != 1
+      || bar ((-__LONG_MAX__ - 1) / 35) != 0
+      || bar ((-__LONG_MAX__ - 1) / 35 - 1) != 1
+      || bar (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (baz (0) != 0
+      || baz (__INT_MAX__ / 42) != 0
+      || baz (__INT_MAX__ / 42 + 1) != 1
+      || baz (__INT_MAX__) != 1
+      || baz ((-__INT_MAX__ - 1) / 42) != 0
+      || baz ((-__INT_MAX__ - 1) / 42 - 1) != 1
+      || baz (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (qux (0) != 0
+      || qux (__LONG_MAX__ / 42) != 0
+      || qux (__LONG_MAX__ / 42 + 1) != 1
+      || qux (__LONG_MAX__) != 1
+      || qux ((-__LONG_MAX__ - 1) / 42) != 0
+      || qux ((-__LONG_MAX__ - 1) / 42 - 1) != 1
+      || qux (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (corge (0) != 0
+      || corge (__INT_MAX__ / -39) != 0
+      || corge (__INT_MAX__ / -39 - 1) != 1
+      || corge (__INT_MAX__) != 1
+      || corge ((-__INT_MAX__ - 1) / -39) != 0
+      || corge ((-__INT_MAX__ - 1) / -39 + 1) != 1
+      || corge (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (garply (0) != 0
+      || garply (__LONG_MAX__ / -39) != 0
+      || garply (__LONG_MAX__ / -39 - 1) != 1
+      || garply (__LONG_MAX__) != 1
+      || garply ((-__LONG_MAX__ - 1) / -39) != 0
+      || garply ((-__LONG_MAX__ - 1) / -39 + 1) != 1
+      || garply (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (grault (0) != 0
+      || grault (__INT_MAX__ / -46) != 0
+      || grault (__INT_MAX__ / -46 - 1) != 1
+      || grault (__INT_MAX__) != 1
+      || grault ((-__INT_MAX__ - 1) / -46) != 0
+      || grault ((-__INT_MAX__ - 1) / -46 + 1) != 1
+      || grault (-__INT_MAX__ - 1) != 1)
+    __builtin_abort ();
+  if (waldo (0) != 0
+      || waldo (__LONG_MAX__ / -46) != 0
+      || waldo (__LONG_MAX__ / -46 - 1) != 1
+      || waldo (__LONG_MAX__) != 1
+      || waldo ((-__LONG_MAX__ - 1) / -46) != 0
+      || waldo ((-__LONG_MAX__ - 1) / -46 + 1) != 1
+      || waldo (-__LONG_MAX__ - 1) != 1)
+    __builtin_abort ();
+  return 0;
+}


	Jakub


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-06-03  9:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-02 13:11 [PATCH] match.pd: Optimize __builtin_mul_overflow_p (x, cst, (stype)0) [PR105777] Jakub Jelinek
2022-06-02 13:46 ` Richard Biener
2022-06-03  9:49   ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).