public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Eliminate vectorizer analysis side effects
@ 2013-08-30  0:30 Xinliang David Li
  2013-08-30  8:26 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Xinliang David Li @ 2013-08-30  0:30 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Guenther

[-- Attachment #1: Type: text/plain, Size: 775 bytes --]

I was debugging a runtime failure of SPEC06 xalancbmk built with LIPO.
Using -fdisable-<pass> option pinpoints the problem in slp vectorize
pass on a particular function. dbgcnt support is added to to track
down the individual BB, but it  fails even when the dbg count is set
to 0.

It turns out that no BB was actually vectorized for that function, but
turning on/off slp-vectorize does make a difference in generated code
-- the only difference between the good and bad case is stack layout.
 The problem is  in the alignment analysis phase -- which
speculatively changes the base declaration's alignment regardless
whether the vectorization transformation will be performed or not
later.

The attached patch fixes the problem. Testing is ok. Ok for trunk?

thanks,

David

[-- Attachment #2: vect_analy_fix.txt --]
[-- Type: text/plain, Size: 5418 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 202088)
+++ ChangeLog	(working copy)
@@ -1,5 +1,17 @@
 2013-08-29  Xinliang David Li  <davidxl@google.com>
 
+	* tree-vect-data-refs.c (vect_compute_data_ref_alignment):
+	Delay base decl alignment adjustment.
+	* tree-vectorizer.c (ensure_base_alignment): New function.
+	(vectorize_loops): Add dbg_cnt support. Perform alignment
+	adjustment.
+	(execute_vect_slp): Ditto.
+	* dbgcnt.def: New debug counter.
+	* tree-data-ref.h: New fields.
+	* Makefile: New dependency.
+
+2013-08-29  Xinliang David Li  <davidxl@google.com>
+
 	* loop-unroll.c (report_unroll_peel): Minor message
 	change.
 	* tree-vect-loop-manip.c (vect_do_peeling_for_alignment):
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 202088)
+++ tree-vect-data-refs.c	(working copy)
@@ -763,15 +763,10 @@ vect_compute_data_ref_alignment (struct
           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
         }
 
-      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
-      DECL_USER_ALIGN (base) = 1;
+      DR_BASE_DECL (dr) = base;
+      DR_BASE_MISALIGNED (dr) = true;
     }
 
-  /* At this point we assume that the base is aligned.  */
-  gcc_assert (base_aligned
-	      || (TREE_CODE (base) == VAR_DECL
-		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
-
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
      Adjust misalign accordingly.  */
Index: dbgcnt.def
===================================================================
--- dbgcnt.def	(revision 202088)
+++ dbgcnt.def	(working copy)
@@ -172,6 +172,8 @@ DEBUG_COUNTER (pre_insn)
 DEBUG_COUNTER (treepre_insert)
 DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (eipa_sra)
+DEBUG_COUNTER (vect_loop)
+DEBUG_COUNTER (vect_slp)
 DEBUG_COUNTER (sched2_func)
 DEBUG_COUNTER (sched_block)
 DEBUG_COUNTER (sched_func)
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 202088)
+++ tree-vectorizer.c	(working copy)
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "hash-table.h"
 #include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
 
 /* Loop or bb location.  */
 LOC vect_location;
@@ -279,6 +280,37 @@ note_simd_array_uses (hash_table <simd_a
       }
 }
 \f
+/* A helper function to enforce base alignment
+   before transforamtion.  */
+
+static void
+ensure_base_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+{
+  vec<data_reference_p> datarefs;
+  struct data_reference *dr;
+  unsigned int i;
+
+ if (loop_vinfo)
+    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+  else
+    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
+    {
+      tree base_decl = DR_BASE_DECL (dr);
+      if (base_decl && DR_BASE_MISALIGNED (dr))
+        {
+          gimple stmt = DR_STMT (dr);
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+          tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+          DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
+          DECL_USER_ALIGN (base_decl) = 1;
+          DR_BASE_MISALIGNED (dr) = false;
+        }
+    }
+}
+
+
 /* Function vectorize_loops.
 
    Entry point to loop vectorization phase.  */
@@ -331,10 +363,14 @@ vectorize_loops (void)
 	if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
 	  continue;
 
+        if (!dbg_cnt (vect_loop))
+	  break;
+
         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
 	    && dump_enabled_p ())
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
                            "loop vectorized\n");
+        ensure_base_alignment (loop_vinfo, NULL);
 	vect_transform_loop (loop_vinfo);
 	num_vectorized_loops++;
 	/* Now that the loop has been vectorized, allow it to be unrolled
@@ -431,6 +467,7 @@ static unsigned int
 execute_vect_slp (void)
 {
   basic_block bb;
+  bb_vec_info bb_vinfo;
 
   init_stmt_vec_info_vec ();
 
@@ -438,8 +475,12 @@ execute_vect_slp (void)
     {
       vect_location = find_bb_location (bb);
 
-      if (vect_slp_analyze_bb (bb))
+      if ((bb_vinfo = vect_slp_analyze_bb (bb)))
         {
+	  if (!dbg_cnt (vect_slp))
+	    break;
+
+          ensure_base_alignment (NULL, bb_vinfo);
           vect_slp_transform_bb (bb);
           if (dump_enabled_p ())
             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
Index: tree-data-ref.h
===================================================================
--- tree-data-ref.h	(revision 202088)
+++ tree-data-ref.h	(working copy)
@@ -176,6 +176,12 @@ struct data_reference
   /* Auxiliary info specific to a pass.  */
   void *aux;
 
+  /* Base var decl.  */
+  tree base_decl;
+
+  /* A boolean indicating if the base decl is misaligned.  */
+  bool base_misaligned;
+
   /* True when the data reference is in RHS of a stmt.  */
   bool is_read;
 
@@ -193,6 +199,8 @@ struct data_reference
 };
 
 #define DR_STMT(DR)                (DR)->stmt
+#define DR_BASE_DECL(DR)           (DR)->base_decl
+#define DR_BASE_MISALIGNED(DR)     (DR)->base_misaligned
 #define DR_REF(DR)                 (DR)->ref
 #define DR_BASE_OBJECT(DR)         (DR)->indices.base_object
 #define DR_UNCONSTRAINED_BASE(DR)  (DR)->indices.unconstrained_base

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Eliminate vectorizer analysis side effects
  2013-08-30  0:30 Eliminate vectorizer analysis side effects Xinliang David Li
@ 2013-08-30  8:26 ` Richard Biener
  2013-08-30 21:37   ` Xinliang David Li
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2013-08-30  8:26 UTC (permalink / raw)
  To: Xinliang David Li; +Cc: GCC Patches

On Fri, Aug 30, 2013 at 1:28 AM, Xinliang David Li <davidxl@google.com> wrote:
> I was debugging a runtime failure of SPEC06 xalancbmk built with LIPO.
> Using -fdisable-<pass> option pinpoints the problem in slp vectorize
> pass on a particular function. dbgcnt support is added to to track
> down the individual BB, but it  fails even when the dbg count is set
> to 0.
>
> It turns out that no BB was actually vectorized for that function, but
> turning on/off slp-vectorize does make a difference in generated code
> -- the only difference between the good and bad case is stack layout.
>  The problem is  in the alignment analysis phase -- which
> speculatively changes the base declaration's alignment regardless
> whether the vectorization transformation will be performed or not
> later.
>
> The attached patch fixes the problem. Testing is ok. Ok for trunk?

Not in this form.  I'd rather not put extra fields in the data-refs this way.
(As for the xalancbmk runtime problem - doesn't this patch just paper
over the real issue?)

For BB SLP you still adjust DR bases that do not take part in
vectorization - the DR vector contains all refs in the BB, not just
those in the SLP instances we are going to vectorize.  So the
commit of the re-aligning decision should be done from where
we vectorize the DR, in vectorizable_load/store in its transform
phase.

If we decide to integrate the fields into the data-ref then the
analysis and commit parts should go into the data-ref machinery
as well.  Otherwise the vectorizer should use data-ref->aux or some
other way to hang off its private data.

Other than that, modifying alignment of variables that are not
vectorized is indeed a bug worth fixing.

Richard.

> thanks,
>
> David

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Eliminate vectorizer analysis side effects
  2013-08-30  8:26 ` Richard Biener
@ 2013-08-30 21:37   ` Xinliang David Li
  2013-09-02 13:20     ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Xinliang David Li @ 2013-08-30 21:37 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2409 bytes --]

On Fri, Aug 30, 2013 at 1:23 AM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Fri, Aug 30, 2013 at 1:28 AM, Xinliang David Li <davidxl@google.com> wrote:
>> I was debugging a runtime failure of SPEC06 xalancbmk built with LIPO.
>> Using -fdisable-<pass> option pinpoints the problem in slp vectorize
>> pass on a particular function. dbgcnt support is added to to track
>> down the individual BB, but it  fails even when the dbg count is set
>> to 0.
>>
>> It turns out that no BB was actually vectorized for that function, but
>> turning on/off slp-vectorize does make a difference in generated code
>> -- the only difference between the good and bad case is stack layout.
>>  The problem is  in the alignment analysis phase -- which
>> speculatively changes the base declaration's alignment regardless
>> whether the vectorization transformation will be performed or not
>> later.
>>
>> The attached patch fixes the problem. Testing is ok. Ok for trunk?
>
> Not in this form.  I'd rather not put extra fields in the data-refs this way.
> (As for the xalancbmk runtime problem - doesn't this patch just paper
> over the real issue?)

I believe it is stack-limit related. This program has some recursive
call chains that can generate a call frame ~9k deep. The vectorizer
side effect causes the affected function in the call frame to grow
~100 byte in stack size. Since this function appears lots of times in
the callstack, the overall stack consumption increase a lot. Combined
with the aggressive cross module inlining, it ends up blowing up the
stack limit.


>
> For BB SLP you still adjust DR bases that do not take part in
> vectorization - the DR vector contains all refs in the BB, not just
> those in the SLP instances we are going to vectorize.  So the
> commit of the re-aligning decision should be done from where
> we vectorize the DR, in vectorizable_load/store in its transform
> phase.
>
> If we decide to integrate the fields into the data-ref then the
> analysis and commit parts should go into the data-ref machinery
> as well.  Otherwise the vectorizer should use data-ref->aux or some
> other way to hang off its private data.
>

Good point.

> Other than that, modifying alignment of variables that are not
> vectorized is indeed a bug worth fixing.

The new version of the patch is attached. Ok for trunk after testing?

thanks,

David

>
> Richard.
>
>> thanks,
>>
>> David

[-- Attachment #2: vect_analy_fix_tot_2.txt --]
[-- Type: text/plain, Size: 12398 bytes --]

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 202088)
+++ ChangeLog	(working copy)
@@ -1,5 +1,25 @@
 2013-08-29  Xinliang David Li  <davidxl@google.com>
 
+	* tree-vect-slp.c (destroy_bb_vec_info): Data ref cleanup.
+	* tree-vect-loop.c (destroy_bb_vec_info): Ditto.
+	* tree-vect-data-refs.c (vect_compute_data_ref_alignment):
+	Delay base decl alignment adjustment.
+	* tree-vectorizer.c (destroy_datarefs): New function.
+	* tree-vectorizer.h: New data structure.
+	(set_dr_misalignment): New function.
+	(dr_misalignment): Ditto.
+	* tree-vect-stmts.c (vectorizable_store_1): Name change.
+	(vectorizable_load_1): Ditto.
+	(vectorizable_store): New function.
+	(vectorizable_load): Ditto.
+	(ensure_base_align): Ditto.
+	(vectorize_loops): Add dbg_cnt support.
+	(execute_vect_slp): Ditto.
+	* dbgcnt.def: New debug counter.
+	* Makefile: New dependency.
+
+2013-08-29  Xinliang David Li  <davidxl@google.com>
+
 	* loop-unroll.c (report_unroll_peel): Minor message
 	change.
 	* tree-vect-loop-manip.c (vect_do_peeling_for_alignment):
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 202088)
+++ tree-vect-loop.c	(working copy)
@@ -957,7 +957,7 @@ destroy_loop_vec_info (loop_vec_info loo
     }
 
   free (LOOP_VINFO_BBS (loop_vinfo));
-  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+  destroy_datarefs (loop_vinfo, NULL);
   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
   LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
   LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
Index: dbgcnt.def
===================================================================
--- dbgcnt.def	(revision 202088)
+++ dbgcnt.def	(working copy)
@@ -172,6 +172,8 @@ DEBUG_COUNTER (pre_insn)
 DEBUG_COUNTER (treepre_insert)
 DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (eipa_sra)
+DEBUG_COUNTER (vect_loop)
+DEBUG_COUNTER (vect_slp)
 DEBUG_COUNTER (sched2_func)
 DEBUG_COUNTER (sched_block)
 DEBUG_COUNTER (sched_func)
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 202088)
+++ Makefile.in	(working copy)
@@ -2645,7 +2645,7 @@ tree-vect-data-refs.o: tree-vect-data-re
 tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(DUMPFILE_H) $(TM_H) $(GGC_H) $(TREE_H) $(TREE_FLOW_H) \
    $(CFGLOOP_H) $(TREE_PASS_H) $(TREE_VECTORIZER_H) \
-   $(TREE_PRETTY_PRINT_H)
+   $(TREE_PRETTY_PRINT_H) $(DBGCNT_H)
 vtable-verify.o: vtable-verify.c vtable-verify.h $(CONFIG_H) \
    $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) cp/cp-tree.h $(TM_P_H) \
    $(BASIC_BLOCK_H) output.h $(TREE_FLOW_H) $(TREE_DUMP_H) $(TREE_PASS_H) \
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 202088)
+++ tree-vect-slp.c	(working copy)
@@ -1825,7 +1825,7 @@ destroy_bb_vec_info (bb_vec_info bb_vinf
         free_stmt_vec_info (stmt);
     }
 
-  free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
+  destroy_datarefs (NULL, bb_vinfo);
   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 202088)
+++ tree-vectorizer.c	(working copy)
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.
 #include "tree-pass.h"
 #include "hash-table.h"
 #include "tree-ssa-propagate.h"
+#include "dbgcnt.h"
 
 /* Loop or bb location.  */
 LOC vect_location;
@@ -279,6 +280,31 @@ note_simd_array_uses (hash_table <simd_a
       }
 }
 \f
+/* A helper function to free data refs.  */
+
+void
+destroy_datarefs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
+{
+  vec<data_reference_p> datarefs;
+  struct data_reference *dr;
+  unsigned int i;
+
+ if (loop_vinfo)
+    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+  else
+    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
+
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
+    if (dr->aux)
+      {
+        free (dr->aux);
+        dr->aux = NULL;
+      }
+
+  free_data_refs (datarefs);
+}
+
+
 /* Function vectorize_loops.
 
    Entry point to loop vectorization phase.  */
@@ -331,6 +357,9 @@ vectorize_loops (void)
 	if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
 	  continue;
 
+        if (!dbg_cnt (vect_loop))
+	  break;
+
         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOC
 	    && dump_enabled_p ())
           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
@@ -431,6 +460,7 @@ static unsigned int
 execute_vect_slp (void)
 {
   basic_block bb;
+  bb_vec_info bb_vinfo;
 
   init_stmt_vec_info_vec ();
 
@@ -438,8 +468,11 @@ execute_vect_slp (void)
     {
       vect_location = find_bb_location (bb);
 
-      if (vect_slp_analyze_bb (bb))
+      if ((bb_vinfo = vect_slp_analyze_bb (bb)))
         {
+          if (!dbg_cnt (vect_slp))
+            break;
+
           vect_slp_transform_bb (bb);
           if (dump_enabled_p ())
             dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 202088)
+++ tree-vectorizer.h	(working copy)
@@ -629,6 +629,12 @@ typedef struct _stmt_vec_info {
 #define PURE_SLP_STMT(S)                  ((S)->slp_type == pure_slp)
 #define STMT_SLP_TYPE(S)                   (S)->slp_type
 
+typedef struct _dataref_aux {
+  tree base_decl;
+  bool base_misaligned;
+  int misalignment;
+} dataref_aux;
+
 #define VECT_MAX_COST 1000
 
 /* The maximum number of intermediate steps required in multi-step type
@@ -831,11 +837,31 @@ destroy_cost_data (void *data)
 /*-----------------------------------------------------------------*/
 /* Info on data references alignment.                              */
 /*-----------------------------------------------------------------*/
+inline void
+set_dr_misalignment (struct data_reference *dr, int val)
+{
+  dataref_aux *data_aux = (dataref_aux *) dr->aux;
+
+  if (!data_aux)
+    {
+      data_aux = XCNEW (dataref_aux);
+      dr->aux = data_aux;
+    }
+
+  data_aux->misalignment = val;
+}
+
+inline int
+dr_misalignment (struct data_reference *dr)
+{
+  gcc_assert (dr->aux);
+  return ((dataref_aux *) dr->aux)->misalignment;
+}
 
 /* Reflects actual alignment of first access in the vectorized loop,
    taking into account peeling/versioning if applied.  */
-#define DR_MISALIGNMENT(DR)   ((int) (size_t) (DR)->aux)
-#define SET_DR_MISALIGNMENT(DR, VAL)   ((DR)->aux = (void *) (size_t) (VAL))
+#define DR_MISALIGNMENT(DR) dr_misalignment (DR)
+#define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
 
 /* Return TRUE if the data access is aligned, and FALSE otherwise.  */
 
@@ -1014,5 +1040,6 @@ void vect_pattern_recog (loop_vec_info,
 
 /* In tree-vectorizer.c.  */
 unsigned vectorize_loops (void);
+void destroy_datarefs (loop_vec_info, bb_vec_info);
 
 #endif  /* GCC_TREE_VECTORIZER_H  */
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 202088)
+++ tree-vect-data-refs.c	(working copy)
@@ -763,15 +763,11 @@ vect_compute_data_ref_alignment (struct
           dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
         }
 
-      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
-      DECL_USER_ALIGN (base) = 1;
+      gcc_assert (dr->aux);
+      ((dataref_aux *)dr->aux)->base_decl = base;
+      ((dataref_aux *)dr->aux)->base_misaligned = true;
     }
 
-  /* At this point we assume that the base is aligned.  */
-  gcc_assert (base_aligned
-	      || (TREE_CODE (base) == VAR_DECL
-		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
-
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
      Adjust misalign accordingly.  */
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 202088)
+++ tree-vect-stmts.c	(working copy)
@@ -3819,15 +3819,16 @@ vectorizable_operation (gimple stmt, gim
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
-		    slp_tree slp_node)
+vectorizable_store_1 (gimple stmt, stmt_vec_info stmt_info,
+                      struct data_reference *dr,
+                      gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                      slp_tree slp_node)
 {
   tree scalar_dest;
   tree data_ref;
   tree op;
   tree vec_oprnd = NULL_TREE;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
+  struct data_reference *first_dr = NULL;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -4284,6 +4285,43 @@ vectorizable_store (gimple stmt, gimple_
   return true;
 }
 
+/* A helper function to ensure data reference DR's base alignment
+   for STMT_INFO.  */
+
+static void
+ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
+{
+  if (!dr->aux)
+    return;
+
+  if (((dataref_aux *)dr->aux)->base_misaligned)
+    {
+      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
+
+      DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
+      DECL_USER_ALIGN (base_decl) = 1;
+      ((dataref_aux *)dr->aux)->base_misaligned = false;
+    }
+}
+
+/* A wrapper function to vectorizable_store.  */
+
+static bool
+vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                    slp_tree slp_node)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  bool ret;
+
+  if ((ret = vectorizable_store_1 (stmt, stmt_info, dr, gsi,
+                                   vec_stmt, slp_node)))
+    ensure_base_align (stmt_info, dr);
+
+  return ret;
+}
+
 /* Given a vector type VECTYPE and permutation SEL returns
    the VECTOR_CST mask that implements the permutation of the
    vector elements.  If that is impossible to do, returns NULL.  */
@@ -4354,7 +4392,7 @@ permute_vec_elements (tree x, tree y, tr
   return data_ref;
 }
 
-/* vectorizable_load.
+/* vectorizable_load_1.
 
    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
    can be vectorized.
@@ -4363,19 +4401,20 @@ permute_vec_elements (tree x, tree y, tr
    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 static bool
-vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
-		   slp_tree slp_node, slp_instance slp_node_instance)
+vectorizable_load_1 (gimple stmt, stmt_vec_info stmt_info,
+                     struct data_reference *dr,
+                     gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                     slp_tree slp_node, slp_instance slp_node_instance)
 {
   tree scalar_dest;
   tree vec_dest = NULL;
   tree data_ref = NULL;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   stmt_vec_info prev_stmt_info;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *loop = NULL;
   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
   bool nested_in_vect_loop = false;
-  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+  struct data_reference *first_dr;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree elem_type;
   tree new_temp;
@@ -5299,6 +5338,23 @@ vectorizable_load (gimple stmt, gimple_s
   return true;
 }
 
+/* A wrapper function to vectorizable_load_1.  */
+
+static bool
+vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                   slp_tree slp_node, slp_instance slp_node_instance)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  bool ret;
+
+  if ((ret = vectorizable_load_1 (stmt, stmt_info, dr, gsi,
+                                  vec_stmt, slp_node,
+                                  slp_node_instance)))
+    ensure_base_align (stmt_info, dr);
+  return ret;
+}
+
 /* Function vect_is_simple_cond.
 
    Input:

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Eliminate vectorizer analysis side effects
  2013-08-30 21:37   ` Xinliang David Li
@ 2013-09-02 13:20     ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2013-09-02 13:20 UTC (permalink / raw)
  To: Xinliang David Li; +Cc: GCC Patches

On Fri, Aug 30, 2013 at 11:34 PM, Xinliang David Li <davidxl@google.com> wrote:
> On Fri, Aug 30, 2013 at 1:23 AM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Fri, Aug 30, 2013 at 1:28 AM, Xinliang David Li <davidxl@google.com> wrote:
>>> I was debugging a runtime failure of SPEC06 xalancbmk built with LIPO.
>>> Using -fdisable-<pass> option pinpoints the problem in slp vectorize
>>> pass on a particular function. dbgcnt support is added to to track
>>> down the individual BB, but it  fails even when the dbg count is set
>>> to 0.
>>>
>>> It turns out that no BB was actually vectorized for that function, but
>>> turning on/off slp-vectorize does make a difference in generated code
>>> -- the only difference between the good and bad case is stack layout.
>>>  The problem is  in the alignment analysis phase -- which
>>> speculatively changes the base declaration's alignment regardless
>>> whether the vectorization transformation will be performed or not
>>> later.
>>>
>>> The attached patch fixes the problem. Testing is ok. Ok for trunk?
>>
>> Not in this form.  I'd rather not put extra fields in the data-refs this way.
>> (As for the xalancbmk runtime problem - doesn't this patch just paper
>> over the real issue?)
>
> I believe it is stack-limit related. This program has some recursive
> call chains that can generate a call frame ~9k deep. The vectorizer
> side effect causes the affected function in the call frame to grow
> ~100 byte in stack size. Since this function appears lots of times in
> the callstack, the overall stack consumption increase a lot. Combined
> with the aggressive cross module inlining, it ends up blowing up the
> stack limit.
>
>
>>
>> For BB SLP you still adjust DR bases that do not take part in
>> vectorization - the DR vector contains all refs in the BB, not just
>> those in the SLP instances we are going to vectorize.  So the
>> commit of the re-aligning decision should be done from where
>> we vectorize the DR, in vectorizable_load/store in its transform
>> phase.
>>
>> If we decide to integrate the fields into the data-ref then the
>> analysis and commit parts should go into the data-ref machinery
>> as well.  Otherwise the vectorizer should use data-ref->aux or some
>> other way to hang off its private data.
>>
>
> Good point.
>
>> Other than that, modifying alignment of variables that are not
>> vectorized is indeed a bug worth fixing.
>
> The new version of the patch is attached. Ok for trunk after testing?

+/* A helper function to free data refs.  */
+
+void
+destroy_datarefs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)

please rename to vect_destroy_datarefs

@@ -431,6 +460,7 @@ static unsigned int
 execute_vect_slp (void)
 {
   basic_block bb;
+  bb_vec_info bb_vinfo;

   init_stmt_vec_info_vec ();

@@ -438,8 +468,11 @@ execute_vect_slp (void)
     {
       vect_location = find_bb_location (bb);

-      if (vect_slp_analyze_bb (bb))
+      if ((bb_vinfo = vect_slp_analyze_bb (bb)))
         {

spurious change?  bb_vinfo seems to be unused.

+typedef struct _dataref_aux {
+  tree base_decl;
+  bool base_misaligned;
+  int misalignment;
+} dataref_aux;

we no longer need that typedef stuff with C++ ...

+      gcc_assert (dr->aux);
+      ((dataref_aux *)dr->aux)->base_decl = base;
+      ((dataref_aux *)dr->aux)->base_misaligned = true;

dereferencing dr->aux will trap, so no need to assert (dr->aux).
Please add a comment before this code explaining what this is for.

-vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
-                   slp_tree slp_node)
+vectorizable_store_1 (gimple stmt, stmt_vec_info stmt_info,
+                      struct data_reference *dr,
+                      gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                      slp_tree slp_node)
...
+/* A wrapper function to vectorizable_store.  */
+
+static bool
+vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
+                    slp_tree slp_node)
+{

it shouldn't be necessary to split the functions, after

  if (!vec_stmt) /* transformation not required.  */
    {
      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
                             NULL, NULL, NULL);
      return true;
    }

  /** Transform.  **/

the function is no longer allowed to fail, so at this point you can call
ensure_base_align.  Similar for the load case.

Ok with those minor cases fixed.

Thanks,
Richard.


> thanks,
>
> David
>
>>
>> Richard.
>>
>>> thanks,
>>>
>>> David

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2013-09-02 13:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-30  0:30 Eliminate vectorizer analysis side effects Xinliang David Li
2013-08-30  8:26 ` Richard Biener
2013-08-30 21:37   ` Xinliang David Li
2013-09-02 13:20     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).