[PATCH] Fix PR67790

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] Fix PR67790
@ 2015-11-18  8:27 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2015-11-18  8:27 UTC (permalink / raw)
  To: gcc-patches


This PR exposes an issue with how we deal with vectorization of
reductions with minus (x -= ...).  We do that by re-writing the
IL to a plus and a new negate stmt.  Unfortunately that messes
up UIDs of stmts in the loop which are used for dominance
checks by the vectorizer.

I've tried using a pattern for this but this fails miserably
(requiring some larger re-org of reduction detection).

Instead the much simpler fix is to not do the IL rewriting
but just fix up the reduction epilogue to use a plus when
the reduction code is a minus.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-11-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/67790
	* tree-vect-loop.c (vect_is_simple_reduction_1): Remove
	IL rewrite for MINUS_EXPR reductions, rename back to ...
	(vect_is_simple_reduction): ... this, removing the wrapper.
	(vect_force_simple_reduction): Adjust.
	(vectorizable_reduction): Adjust reduc_index for MINUS_EXPR
	reductions and make use if reduc_index in all places.  For
	the final reduction of MINUS_EXPR use PLUS_EXPR.

	* gcc.dg/vect/pr67790.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
*** gcc/tree-vect-loop.c	(revision 230455)
--- gcc/tree-vect-loop.c	(working copy)
*************** vect_is_slp_reduction (loop_vec_info loo
*** 2468,2483 ****
         if (a[i] < val)
  	ret_val = a[i];
  
-    If MODIFY is true it tries also to rework the code in-place to enable
-    detection of more reduction patterns.  For the time being we rewrite
-    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
  */
  
  static gimple *
! vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
! 			    bool check_reduction, bool *double_reduc,
! 			    bool modify, bool need_wrapping_integral_overflow,
! 			    enum vect_reduction_type *v_reduc_type)
  {
    struct loop *loop = (gimple_bb (phi))->loop_father;
    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
--- 2477,2489 ----
         if (a[i] < val)
  	ret_val = a[i];
  
  */
  
  static gimple *
! vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
! 			  bool check_reduction, bool *double_reduc,
! 			  bool need_wrapping_integral_overflow,
! 			  enum vect_reduction_type *v_reduc_type)
  {
    struct loop *loop = (gimple_bb (phi))->loop_father;
    struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2634,2640 ****
       gimple instruction for the first simple tests and only do this
       if we're allowed to change code at all.  */
    if (code == MINUS_EXPR
-       && modify
        && (op1 = gimple_assign_rhs1 (def_stmt))
        && TREE_CODE (op1) == SSA_NAME
        && SSA_NAME_DEF_STMT (op1) == phi)
--- 2640,2645 ----
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2791,2813 ****
  	}
      }
  
-   /* If we detected "res -= x[i]" earlier, rewrite it into
-      "res += -x[i]" now.  If this turns out to be useless reassoc
-      will clean it up again.  */
-   if (orig_code == MINUS_EXPR)
-     {
-       tree rhs = gimple_assign_rhs2 (def_stmt);
-       tree negrhs = make_ssa_name (TREE_TYPE (rhs));
-       gimple *negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
-       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
-       set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt, 
- 							  loop_info));
-       gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
-       gimple_assign_set_rhs2 (def_stmt, negrhs);
-       gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
-       update_stmt (def_stmt);
-     }
- 
    /* Reduction is safe. We're dealing with one of the following:
       1) integer arithmetic and no trapv
       2) floating point arithmetic, and special flags permit this optimization
--- 2796,2801 ----
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2863,2869 ****
                            == vect_internal_def
  		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
      {
!       if (check_reduction)
          {
  	  if (code == COND_EXPR)
  	    {
--- 2851,2858 ----
                            == vect_internal_def
  		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
      {
!       if (check_reduction
! 	  && orig_code != MINUS_EXPR)
          {
  	  if (code == COND_EXPR)
  	    {
*************** vect_is_simple_reduction_1 (loop_vec_inf
*** 2915,2935 ****
    return NULL;
  }
  
- /* Wrapper around vect_is_simple_reduction_1, that won't modify code
-    in-place.  Arguments as there.  */
- 
- static gimple *
- vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
- 			  bool check_reduction, bool *double_reduc,
- 			  bool need_wrapping_integral_overflow,
- 			  enum vect_reduction_type *v_reduc_type)
- {
-   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
- 				     double_reduc, false,
- 				     need_wrapping_integral_overflow,
- 				     v_reduc_type);
- }
- 
  /* Wrapper around vect_is_simple_reduction_1, which will modify code
     in-place if it enables detection of more reductions.  Arguments
     as there.  */
--- 2904,2909 ----
*************** vect_force_simple_reduction (loop_vec_in
*** 2940,2949 ****
  			     bool need_wrapping_integral_overflow)
  {
    enum vect_reduction_type v_reduc_type;
!   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
! 				     double_reduc, true,
! 				     need_wrapping_integral_overflow,
! 				     &v_reduc_type);
  }
  
  /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
--- 2914,2923 ----
  			     bool need_wrapping_integral_overflow)
  {
    enum vect_reduction_type v_reduc_type;
!   return vect_is_simple_reduction (loop_info, phi, check_reduction,
! 				   double_reduc,
! 				   need_wrapping_integral_overflow,
! 				   &v_reduc_type);
  }
  
  /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
*************** vectorizable_reduction (gimple *stmt, gi
*** 5398,5403 ****
--- 5372,5379 ----
      }
    /* The default is that the reduction variable is the last in statement.  */
    int reduc_index = op_type - 1;
+   if (code == MINUS_EXPR)
+     reduc_index = 0;
  
    if (code == COND_EXPR && slp_node)
      return false;
*************** vectorizable_reduction (gimple *stmt, gi
*** 5417,5424 ****
       The last use is the reduction variable.  In case of nested cycle this
       assumption is not true: we use reduc_index to record the index of the
       reduction variable.  */
!   for (i = 0; i < op_type - 1; i++)
      {
        /* The condition of COND_EXPR is checked in vectorizable_condition().  */
        if (i == 0 && code == COND_EXPR)
          continue;
--- 5393,5403 ----
       The last use is the reduction variable.  In case of nested cycle this
       assumption is not true: we use reduc_index to record the index of the
       reduction variable.  */
!   for (i = 0; i < op_type; i++)
      {
+       if (i == reduc_index)
+ 	continue;
+ 
        /* The condition of COND_EXPR is checked in vectorizable_condition().  */
        if (i == 0 && code == COND_EXPR)
          continue;
*************** vectorizable_reduction (gimple *stmt, gi
*** 5454,5460 ****
  	}
      }
  
!   is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &dt, &tem);
    if (!vectype_in)
      vectype_in = tem;
    gcc_assert (is_simple_use);
--- 5433,5440 ----
  	}
      }
  
!   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
! 				      &def_stmt, &dt, &tem);
    if (!vectype_in)
      vectype_in = tem;
    gcc_assert (is_simple_use);
*************** vectorizable_reduction (gimple *stmt, gi
*** 5625,5630 ****
--- 5605,5613 ----
           the vector code inside the loop can be used for the epilog code. */
        orig_code = code;
  
+       if (code == MINUS_EXPR)
+ 	orig_code = PLUS_EXPR;
+ 
        /* For simple condition reductions, replace with the actual expression
  	 we want to base our reduction around.  */
        if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
Index: gcc/testsuite/gcc.dg/vect/pr67790.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr67790.c	(revision 0)
--- gcc/testsuite/gcc.dg/vect/pr67790.c	(working copy)
***************
*** 0 ****
--- 1,40 ----
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+ 
+ struct {
+     int x_advance;
+     int y_advance;
+ } a[256];
+ 
+ int b, c;
+ 
+ void __attribute__((noinline,noclone)) fn1()
+ {
+   for (int i = 0; i < 256; i++)
+     {
+       c -= a[i].x_advance;
+       b -= a[i].y_advance;
+     }
+ }
+ 
+ int main()
+ {
+   check_vect ();
+ 
+   for (int i = 0; i < 256; ++i)
+     {
+       a[i].x_advance = i;
+       a[i].y_advance = -i + 3;
+       __asm__ volatile ("" : : : "memory");
+     }
+   
+   fn1();
+ 
+   if (c != -32640 || b != 31872)
+     abort ();
+ 
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-11-18  8:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-18  8:27 [PATCH] Fix PR67790 Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).