public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR42108
@ 2014-12-09 13:57 Richard Biener
  2014-12-10  9:28 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2014-12-09 13:57 UTC (permalink / raw)
  To: gcc-patches


The following finally fixes PR42108 (well, hopefully...) by using
range-information on SSA names to allow the integer divisions introduced
by Fortran array lowering being hoisted out of loops, thus detecting
them as not trapping.

I chose to enhance tree_single_nonzero_warnv_p for this and adjusted
operation_could_trap_helper_p to use this helper.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2014-12-09  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/42108
	* fold-const.c (tree_single_nonzero_warnv_p): Use range
	information associated with SSA names.
	* tree-eh.c (operation_could_trap_helper_p): Use
	tree_single_nonzero_warnv_p to check for trapping non-fp
	operation.
	* tree-vrp.c (remove_range_assertions): Remove bogus assert.

	* gfortran.dg/pr42108.f90: Adjust testcase.

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 218479)
+++ gcc/fold-const.c	(working copy)
@@ -83,6 +83,8 @@ along with GCC; see the file COPYING3.
 #include "cgraph.h"
 #include "generic-match.h"
 #include "optabs.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
 
 /* Nonzero if we are folding constants inside an initializer; zero
    otherwise.  */
@@ -15362,6 +15381,26 @@ tree_single_nonzero_warnv_p (tree t, boo
 	}
       break;
 
+    case SSA_NAME:
+      if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
+	{
+	  wide_int minv, maxv;
+	  enum value_range_type rtype = get_range_info (t, &minv, &maxv);
+	  if (rtype == VR_RANGE)
+	    {
+	      if (wi::lt_p (maxv, 0, TYPE_SIGN (TREE_TYPE (t)))
+		  || wi::gt_p (minv, 0, TYPE_SIGN (TREE_TYPE (t))))
+		return true;
+	    }
+	  else if (rtype == VR_ANTI_RANGE)
+	    {
+	      if (wi::le_p (minv, 0, TYPE_SIGN (TREE_TYPE (t)))
+		  && wi::ge_p (maxv, 0, TYPE_SIGN (TREE_TYPE (t))))
+		return true;
+	    }
+	}
+      break;
+
     default:
       break;
     }
Index: gcc/tree-eh.c
===================================================================
--- gcc/tree-eh.c	(revision 218479)
+++ gcc/tree-eh.c	(working copy)
@@ -2440,13 +2440,16 @@ operation_could_trap_helper_p (enum tree
     case ROUND_MOD_EXPR:
     case TRUNC_MOD_EXPR:
     case RDIV_EXPR:
-      if (honor_snans || honor_trapv)
-	return true;
-      if (fp_operation)
-	return flag_trapping_math;
-      if (!TREE_CONSTANT (divisor) || integer_zerop (divisor))
-        return true;
-      return false;
+      {
+	if (honor_snans || honor_trapv)
+	  return true;
+	if (fp_operation)
+	  return flag_trapping_math;
+	bool sop;
+	if (!tree_single_nonzero_warnv_p (divisor, &sop))
+	  return true;
+	return false;
+      }
 
     case LT_EXPR:
     case LE_EXPR:
Index: gcc/testsuite/gfortran.dg/pr42108.f90
===================================================================
--- gcc/testsuite/gfortran.dg/pr42108.f90	(revision 218479)
+++ gcc/testsuite/gfortran.dg/pr42108.f90	(working copy)
@@ -1,5 +1,5 @@
 ! { dg-do compile }
-! { dg-options "-O2 -fdump-tree-fre1" }
+! { dg-options "-O2 -fdump-tree-fre1 -fdump-tree-lim1-details" }
 
 subroutine  eval(foo1,foo2,foo3,foo4,x,n,nnd)
   implicit real*8 (a-h,o-z)
@@ -21,7 +21,10 @@ subroutine  eval(foo1,foo2,foo3,foo4,x,n
   end do
 end subroutine eval
 
-! There should be only one load from n left
+! There should be only one load from n left and the division should
+! be hoisted out of the loop
 
 ! { dg-final { scan-tree-dump-times "\\*n_" 1 "fre1" } }
+! { dg-final { scan-tree-dump-times "Moving statement" 5 "lim1" } }
 ! { dg-final { cleanup-tree-dump "fre1" } }
+! { dg-final { cleanup-tree-dump "lim1" } }
Index: gcc/tree-vrp.c
===================================================================
--- gcc/tree-vrp.c	(revision 218479)
+++ gcc/tree-vrp.c	(working copy)
@@ -6866,12 +6866,9 @@ remove_range_assertions (void)
 	    tree lhs = gimple_assign_lhs (stmt);
 	    tree rhs = gimple_assign_rhs1 (stmt);
 	    tree var;
-	    tree cond = fold (ASSERT_EXPR_COND (rhs));
 	    use_operand_p use_p;
 	    imm_use_iterator iter;
 
-	    gcc_assert (cond != boolean_false_node);
-
 	    var = ASSERT_EXPR_VAR (rhs);
 	    gcc_assert (TREE_CODE (var) == SSA_NAME);
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix PR42108
  2014-12-09 13:57 [PATCH] Fix PR42108 Richard Biener
@ 2014-12-10  9:28 ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2014-12-10  9:28 UTC (permalink / raw)
  To: gcc-patches

On Tue, 9 Dec 2014, Richard Biener wrote:

> 
> The following finally fixes PR42108 (well, hopefully...) by using
> range-information on SSA names to allow the integer divisions introduced
> by Fortran array lowering being hoisted out of loops, thus detecting
> them as not trapping.
> 
> I chose to enhance tree_single_nonzero_warnv_p for this and adjusted
> operation_could_trap_helper_p to use this helper.

Unfortunately it can't be done this way as range-information is
dependent on the location of the definition - but if we change
predicates the way I tried code motion optimizations (like LIM)
will happily move that definition before dominating conditions
which may make the range information invalid.

The patch caused

                === acats tests ===
FAIL:   c450001
FAIL:   cxg2023
FAIL:   cxg2024

in testing (not investigated).

> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> 
> Richard.
> 
> 2014-12-09  Richard Biener  <rguenther@suse.de>
> 
> 	PR tree-optimization/42108
> 	* fold-const.c (tree_single_nonzero_warnv_p): Use range
> 	information associated with SSA names.
> 	* tree-eh.c (operation_could_trap_helper_p): Use
> 	tree_single_nonzero_warnv_p to check for trapping non-fp
> 	operation.
> 	* tree-vrp.c (remove_range_assertions): Remove bogus assert.
> 
> 	* gfortran.dg/pr42108.f90: Adjust testcase.
> 
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c	(revision 218479)
> +++ gcc/fold-const.c	(working copy)
> @@ -83,6 +83,8 @@ along with GCC; see the file COPYING3.
>  #include "cgraph.h"
>  #include "generic-match.h"
>  #include "optabs.h"
> +#include "stringpool.h"
> +#include "tree-ssanames.h"
>  
>  /* Nonzero if we are folding constants inside an initializer; zero
>     otherwise.  */
> @@ -15362,6 +15381,26 @@ tree_single_nonzero_warnv_p (tree t, boo
>  	}
>        break;
>  
> +    case SSA_NAME:
> +      if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
> +	{
> +	  wide_int minv, maxv;
> +	  enum value_range_type rtype = get_range_info (t, &minv, &maxv);
> +	  if (rtype == VR_RANGE)
> +	    {
> +	      if (wi::lt_p (maxv, 0, TYPE_SIGN (TREE_TYPE (t)))
> +		  || wi::gt_p (minv, 0, TYPE_SIGN (TREE_TYPE (t))))
> +		return true;
> +	    }
> +	  else if (rtype == VR_ANTI_RANGE)
> +	    {
> +	      if (wi::le_p (minv, 0, TYPE_SIGN (TREE_TYPE (t)))
> +		  && wi::ge_p (maxv, 0, TYPE_SIGN (TREE_TYPE (t))))
> +		return true;
> +	    }
> +	}
> +      break;
> +
>      default:
>        break;
>      }
> Index: gcc/tree-eh.c
> ===================================================================
> --- gcc/tree-eh.c	(revision 218479)
> +++ gcc/tree-eh.c	(working copy)
> @@ -2440,13 +2440,16 @@ operation_could_trap_helper_p (enum tree
>      case ROUND_MOD_EXPR:
>      case TRUNC_MOD_EXPR:
>      case RDIV_EXPR:
> -      if (honor_snans || honor_trapv)
> -	return true;
> -      if (fp_operation)
> -	return flag_trapping_math;
> -      if (!TREE_CONSTANT (divisor) || integer_zerop (divisor))
> -        return true;
> -      return false;
> +      {
> +	if (honor_snans || honor_trapv)
> +	  return true;
> +	if (fp_operation)
> +	  return flag_trapping_math;
> +	bool sop;
> +	if (!tree_single_nonzero_warnv_p (divisor, &sop))
> +	  return true;
> +	return false;
> +      }
>  
>      case LT_EXPR:
>      case LE_EXPR:
> Index: gcc/testsuite/gfortran.dg/pr42108.f90
> ===================================================================
> --- gcc/testsuite/gfortran.dg/pr42108.f90	(revision 218479)
> +++ gcc/testsuite/gfortran.dg/pr42108.f90	(working copy)
> @@ -1,5 +1,5 @@
>  ! { dg-do compile }
> -! { dg-options "-O2 -fdump-tree-fre1" }
> +! { dg-options "-O2 -fdump-tree-fre1 -fdump-tree-lim1-details" }
>  
>  subroutine  eval(foo1,foo2,foo3,foo4,x,n,nnd)
>    implicit real*8 (a-h,o-z)
> @@ -21,7 +21,10 @@ subroutine  eval(foo1,foo2,foo3,foo4,x,n
>    end do
>  end subroutine eval
>  
> -! There should be only one load from n left
> +! There should be only one load from n left and the division should
> +! be hoisted out of the loop
>  
>  ! { dg-final { scan-tree-dump-times "\\*n_" 1 "fre1" } }
> +! { dg-final { scan-tree-dump-times "Moving statement" 5 "lim1" } }
>  ! { dg-final { cleanup-tree-dump "fre1" } }
> +! { dg-final { cleanup-tree-dump "lim1" } }
> Index: gcc/tree-vrp.c
> ===================================================================
> --- gcc/tree-vrp.c	(revision 218479)
> +++ gcc/tree-vrp.c	(working copy)
> @@ -6866,12 +6866,9 @@ remove_range_assertions (void)
>  	    tree lhs = gimple_assign_lhs (stmt);
>  	    tree rhs = gimple_assign_rhs1 (stmt);
>  	    tree var;
> -	    tree cond = fold (ASSERT_EXPR_COND (rhs));
>  	    use_operand_p use_p;
>  	    imm_use_iterator iter;
>  
> -	    gcc_assert (cond != boolean_false_node);
> -
>  	    var = ASSERT_EXPR_VAR (rhs);
>  	    gcc_assert (TREE_CODE (var) == SSA_NAME);
>  
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Jeff Hawn, Jennifer Guild, Felix Imendoerffer, HRB 21284
(AG Nuernberg)
Maxfeldstrasse 5, 90409 Nuernberg, Germany

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Fix PR42108
  2014-12-11 10:11 Richard Biener
@ 2014-12-11 15:47 ` Steve Kargl
  0 siblings, 0 replies; 4+ messages in thread
From: Steve Kargl @ 2014-12-11 15:47 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, fortran

On Thu, Dec 11, 2014 at 11:05:13AM +0100, Richard Biener wrote:
> 
> The following patch fixes the performance regression in PR42108
> by allowing PRE and LIM to see the division (to - from) / step
> in translating do loops executed unconditionally.  This makes
> them not care for the fact that step might be zero and thus
> the division might trap.
> 

step cannot be zero in standard conforming code.  From F95 8.1.4.4,
"Execution of a DO construct":

When the DO statement is executed, the DO construct becomes active.
If loop-control is

  [ , ] do-variable = scalar-int-expr1, scalar-int-expr2 [, scalar-int-expr3]

the following steps are performed in sequence:

  (1) The initial parameter m1, the terminal parameter m2, and the
      incrementation parameter m3 are of type integer with the same kind
      type parameter as the do-variable.  Their values are established by
      evaluating scalar-int-expr1, scalar-int-expr2, and scalar-int-expr3,
      respectively, including, if necessary, conversion to the kind type
      parameter of the do-variable according to the rules for numeric
      conversion (Table 7.10).  If scalar-int-expr3 does not appear, m3
      has the value 1. The value m3 shall not be zero.

"The value m3 shall not be zero" is a not an enumerated constraint,
so the compiler does not need to catch and report m3 being zero.
The prohibition is on the programmerr.  So, if the division traps,
it is a bug in the program.

> This makes the runtime of the testcase improve from 10.7s to
> 8s (same as gfortran 4.3).
> 
> The caveat is that iff the loop is not executed (to < from
> for positive step for example) then there will be an additional
> executed division computing the unused countm1.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu, ok
> for trunk?
> 

OK.

-- 
Steve

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] Fix PR42108
@ 2014-12-11 10:11 Richard Biener
  2014-12-11 15:47 ` Steve Kargl
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2014-12-11 10:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: fortran


The following patch fixes the performance regression in PR42108
by allowing PRE and LIM to see the division (to - from) / step
in translating do loops executed unconditionally.  This makes
them not care for the fact that step might be zero and thus
the division might trap.

This makes the runtime of the testcase improve from 10.7s to
8s (same as gfortran 4.3).

The caveat is that iff the loop is not executed (to < from
for positive step for example) then there will be an additional
executed division computing the unused countm1.

Bootstrap and regtest running on x86_64-unknown-linux-gnu, ok
for trunk?

Thanks,
Richard.

2014-12-11  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/42108
	* trans-stmt.c (gfc_trans_do): Execute the division computing
	countm1 before the loop entry check.

	* gfortran.dg/pr42108.f90: Amend.

Index: gcc/fortran/trans-stmt.c
===================================================================
--- gcc/fortran/trans-stmt.c	(revision 218515)
+++ gcc/fortran/trans-stmt.c	(working copy)
@@ -1645,15 +1645,15 @@ gfc_trans_do (gfc_code * code, tree exit
      This code is executed before we enter the loop body. We generate:
      if (step > 0)
        {
+	 countm1 = (to - from) / step;
 	 if (to < from)
 	   goto exit_label;
-	 countm1 = (to - from) / step;
        }
      else
        {
+	 countm1 = (from - to) / -step;
 	 if (to > from)
 	   goto exit_label;
-	 countm1 = (from - to) / -step;
        }
    */
 
@@ -1675,11 +1675,12 @@ gfc_trans_do (gfc_code * code, tree exit
 			      fold_build2_loc (loc, MINUS_EXPR, utype,
 					       tou, fromu),
 			      stepu);
-      pos = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp,
-			     fold_build1_loc (loc, GOTO_EXPR, void_type_node,
-					      exit_label),
-			     fold_build2 (MODIFY_EXPR, void_type_node,
-					  countm1, tmp2));
+      pos = build2 (COMPOUND_EXPR, void_type_node,
+		    fold_build2 (MODIFY_EXPR, void_type_node,
+				 countm1, tmp2),
+		    build3_loc (loc, COND_EXPR, void_type_node, tmp,
+				build1_loc (loc, GOTO_EXPR, void_type_node,
+					    exit_label), NULL_TREE));
 
       /* For a negative step, when to > from, exit, otherwise compute
          countm1 = ((unsigned)from - (unsigned)to) / -(unsigned)step  */
@@ -1688,11 +1689,12 @@ gfc_trans_do (gfc_code * code, tree exit
 			      fold_build2_loc (loc, MINUS_EXPR, utype,
 					       fromu, tou),
 			      fold_build1_loc (loc, NEGATE_EXPR, utype, stepu));
-      neg = fold_build3_loc (loc, COND_EXPR, void_type_node, tmp,
-			     fold_build1_loc (loc, GOTO_EXPR, void_type_node,
-					      exit_label),
-			     fold_build2 (MODIFY_EXPR, void_type_node,
-					  countm1, tmp2));
+      neg = build2 (COMPOUND_EXPR, void_type_node,
+		    fold_build2 (MODIFY_EXPR, void_type_node,
+				 countm1, tmp2),
+		    build3_loc (loc, COND_EXPR, void_type_node, tmp,
+				build1_loc (loc, GOTO_EXPR, void_type_node,
+					    exit_label), NULL_TREE));
 
       tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, step,
 			     build_int_cst (TREE_TYPE (step), 0));
Index: gcc/testsuite/gfortran.dg/pr42108.f90
===================================================================
--- gcc/testsuite/gfortran.dg/pr42108.f90	(revision 218584)
+++ gcc/testsuite/gfortran.dg/pr42108.f90	(working copy)
@@ -1,5 +1,5 @@
 ! { dg-do compile }
-! { dg-options "-O2 -fdump-tree-fre1" }
+! { dg-options "-O2 -fdump-tree-fre1 -fdump-tree-pre-details" }
 
 subroutine  eval(foo1,foo2,foo3,foo4,x,n,nnd)
   implicit real*8 (a-h,o-z)
@@ -21,7 +21,9 @@ subroutine  eval(foo1,foo2,foo3,foo4,x,n
   end do
 end subroutine eval
 
+! We should have hoisted the division
+! { dg-final { scan-tree-dump "in all uses of countm1\[^\n\]* / " "pre" } }
 ! There should be only one load from n left
-
 ! { dg-final { scan-tree-dump-times "\\*n_" 1 "fre1" } }
 ! { dg-final { cleanup-tree-dump "fre1" } }
+! { dg-final { cleanup-tree-dump "pre" } }

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-12-11 15:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-09 13:57 [PATCH] Fix PR42108 Richard Biener
2014-12-10  9:28 ` Richard Biener
2014-12-11 10:11 Richard Biener
2014-12-11 15:47 ` Steve Kargl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).