public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch] Vectorizer cleanup
@ 2010-09-14 14:23 Ira Rosen
  2010-09-14 16:34 ` Sebastian Pop
  0 siblings, 1 reply; 7+ messages in thread
From: Ira Rosen @ 2010-09-14 14:23 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1255 bytes --]


Hi,

Following Sebastian's comments I fixed vectorizer documentation (mostly
"dot space space" problem) and added some comments. I also fixed two
occasions of not-freed memory. I'll commit the patch after proper testing
if nobody objects or have additional comments.

Ira

ChangeLog:

	* tree-vectorizer.c: Fix documentation.
	* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
	(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
	is_pattern_stmt_p, is_loop_header_bb_p,
	stmt_vinfo_set_inside_of_loop_cost,
	stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
	known_alignment_for_access_p): Likewise.
	* tree-vect-loop.c: Fix documentation.
	(vect_get_cost): Start function name from new line.
	* tree-vect-data-refs.c: Fix documentation.
      * tree-vect_stmts.c: Likewise.
	(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
	(vectorizable_store): Free vec_oprnds if allocated.
	(vectorizable_condition): Initialize several variables to avoid
	warnings.
	* tree-vect-slp.c: Fix documentation.

testsuite/ChangeLog:

	* gcc.dg/vect/bb-slp-8.c: Fix documentation, add space between
function
	name and parentheses.
	* gcc.dg/vect/bb-slp-8a.c, gcc.dg/vect/bb-slp-8b.c: Likewise.

(See attached file: vect-cleanup.txt)

[-- Attachment #2: vect-cleanup.txt --]
[-- Type: text/plain, Size: 83941 bytes --]

Index: testsuite/gcc.dg/vect/bb-slp-8.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8.c	(revision 164269)
+++ testsuite/gcc.dg/vect/bb-slp-8.c	(working copy)
@@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, u
   int i;
   unsigned int a0, a1, a2, a3;
  
-  /* pin and pout may alias. But since all the loads are before the first store
-     the basic block is vectorizable.  */
+  /* pin and pout may alias.  But since all the loads are before the first
+     store the basic block is vectorizable.  */
   a0 = *pin++ + 23;
   a1 = *pin++ + 142;
   a2 = *pin++ + 2;
@@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, u
       || out[1] != (in[1] + 142) * y
       || out[2] != (in[2] + 2) * x
       || out[3] != (in[3] + 31) * y)
-    abort();
+    abort ();
 
   return 0;
 }
Index: testsuite/gcc.dg/vect/bb-slp-8a.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8a.c	(revision 164269)
+++ testsuite/gcc.dg/vect/bb-slp-8a.c	(working copy)
@@ -15,7 +15,7 @@ main1 (unsigned int x, unsigned int y, u
   int i;
   unsigned int a0, a1, a2, a3;
  
-  /* pin and pout may alias, and loads and stores are mixed. The basic block 
+  /* pin and pout may alias, and loads and stores are mixed.  The basic block
      cannot be vectorized.  */
   a0 = *pin++ + 23;
   *pout++ = a0 * x;
@@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, u
       || out[1] != (in[1] + 142) * y
       || out[2] != (in[2] + 2) * x
       || out[3] != (in[3] + 31) * y)
-    abort();
+    abort ();
 
   return 0;
 }
Index: testsuite/gcc.dg/vect/bb-slp-8b.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-8b.c	(revision 164269)
+++ testsuite/gcc.dg/vect/bb-slp-8b.c	(working copy)
@@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y)
       || out[1] != (in[1] + 142) * y
       || out[2] != (in[2] + 2) * x
       || out[3] != (in[3] + 31) * y)
-    abort();
+    abort ();
 
   return 0;
 }
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 164269)
+++ tree-vectorizer.c	(working copy)
@@ -208,7 +208,7 @@ vectorize_loops (void)
   /*  ----------- Analyze loops. -----------  */
 
   /* If some loop was duplicated, it gets bigger number
-     than all previously defined loops. This fact allows us to run
+     than all previously defined loops.  This fact allows us to run
      only over initial loops skipping newly generated ones.  */
   FOR_EACH_LOOP (li, loop, 0)
     if (optimize_loop_nest_for_speed_p (loop))
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 164269)
+++ tree-vectorizer.h	(working copy)
@@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_in
 void init_stmt_vec_info_vec (void);
 void free_stmt_vec_info_vec (void);
 
+/* Return a stmt_vec_info corresponding to STMT.  */
+
 static inline stmt_vec_info
 vinfo_for_stmt (gimple stmt)
 {
@@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt)
   return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
 }
 
+/* Set vectorizer information INFO for STMT.  */
+
 static inline void
 set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
 {
@@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_ve
     VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
 }
 
+/* Return the earlier statement between STMT1 and STMT2.  */
+
 static inline gimple
 get_earlier_stmt (gimple stmt1, gimple stmt2)
 {
@@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple s
     return stmt2;
 }
 
+/* Return the later statement between STMT1 and STMT2.  */
+
 static inline gimple
 get_later_stmt (gimple stmt1, gimple stmt2)
 {
@@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stm
     return stmt2;
 }
 
+/* Return TRUE if a statement represented by STMT_INFO is a part of a
+   pattern.  */
+
 static inline bool
 is_pattern_stmt_p (stmt_vec_info stmt_info)
 {
@@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_in
   return false;
 }
 
+/* Return true if BB is a loop header.  */
+
 static inline bool
 is_loop_header_bb_p (basic_block bb)
 {
@@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb)
   return false;
 }
 
+/* Set inside loop vectorization cost.  */
+
 static inline void
 stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
 				    int cost)
@@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt
     STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
 }
 
+/* Set inside loop vectorization cost.  */
+
 static inline void
 stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
 				     int cost)
@@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stm
     STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
 }
 
+/* Return pow2 (X).  */
+
 static inline int
 vect_pow2 (int x)
 {
@@ -723,12 +742,17 @@ vect_pow2 (int x)
 #define DR_MISALIGNMENT(DR)   ((int) (size_t) (DR)->aux)
 #define SET_DR_MISALIGNMENT(DR, VAL)   ((DR)->aux = (void *) (size_t) (VAL))
 
+/* Return TRUE if the data access is aligned, and FALSE otherwise.  */
+
 static inline bool
 aligned_access_p (struct data_reference *data_ref_info)
 {
   return (DR_MISALIGNMENT (data_ref_info) == 0);
 }
 
+/* Return TRUE if the alignment of the data access is known, and FALSE
+   otherwise.  */
+
 static inline bool
 known_alignment_for_access_p (struct data_reference *data_ref_info)
 {
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 164269)
+++ tree-vect-loop.c	(working copy)
@@ -76,7 +76,7 @@ along with GCC; see the file COPYING3.  
    had successfully passed the analysis phase.
         Throughout this pass we make a distinction between two types of
    data: scalars (which are represented by SSA_NAMES), and memory references
-   ("data-refs"). These two types of data require different handling both
+   ("data-refs").  These two types of data require different handling both
    during analysis and transformation. The types of data-refs that the
    vectorizer currently supports are ARRAY_REFS which base is an array DECL
    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
@@ -97,10 +97,10 @@ along with GCC; see the file COPYING3.  
    =====================
         The loop transformation phase scans all the stmts in the loop, and
    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
-   the loop that needs to be vectorized. It inserts the vector code sequence
+   the loop that needs to be vectorized.  It inserts the vector code sequence
    just before the scalar stmt S, and records a pointer to the vector code
    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
-   attached to S). This pointer will be used for the vectorization of following
+   attached to S).  This pointer will be used for the vectorization of following
    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
    otherwise, we rely on dead code elimination for removing it.
 
@@ -112,7 +112,7 @@ along with GCC; see the file COPYING3.  
 
    To vectorize stmt S2, the vectorizer first finds the stmt that defines
    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
-   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
+   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)).  The
    resulting sequence would be:
 
    VS1: vb = px[i];
@@ -126,14 +126,14 @@ along with GCC; see the file COPYING3.  
    Target modeling:
    =================
         Currently the only target specific information that is used is the
-   size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
+   size of the vector (in bytes) - "UNITS_PER_SIMD_WORD".  Targets that can
    support different sizes of vectors, for now will need to specify one value
-   for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
+   for "UNITS_PER_SIMD_WORD".  More flexibility will be added in the future.
 
         Since we only vectorize operations which vector form can be
    expressed using existing tree codes, to verify that an operation is
    supported, the vectorizer checks the relevant optab at the relevant
-   machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
+   machine_mode (e.g, optab_handler (add_optab, V8HImode)).  If
    the value found is CODE_FOR_nothing, then there's no target support, and
    we can't vectorize the stmt.
 
@@ -143,14 +143,14 @@ along with GCC; see the file COPYING3.  
 
 /* Function vect_determine_vectorization_factor
 
-   Determine the vectorization factor (VF). VF is the number of data elements
+   Determine the vectorization factor (VF).  VF is the number of data elements
    that are operated upon in parallel in a single iteration of the vectorized
-   loop. For example, when vectorizing a loop that operates on 4byte elements,
+   loop.  For example, when vectorizing a loop that operates on 4byte elements,
    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
    elements can fit in a single vector register.
 
    We currently support vectorization of loops in which all types operated upon
-   are of the same size. Therefore this function currently sets VF according to
+   are of the same size.  Therefore this function currently sets VF according to
    the size of the types operated upon, and fails if there are multiple sizes
    in the loop.
 
@@ -437,7 +437,7 @@ vect_is_simple_iv_evolution (unsigned lo
 /* Function vect_analyze_scalar_cycles_1.
 
    Examine the cross iteration def-use cycles of scalar variables
-   in LOOP. LOOP_VINFO represents the loop that is now being
+   in LOOP.  LOOP_VINFO represents the loop that is now being
    considered for vectorization (can be LOOP, or an outer-loop
    enclosing LOOP).  */
 
@@ -453,7 +453,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
 
-  /* First - identify all inductions. Reduction detection assumes that all the
+  /* First - identify all inductions.  Reduction detection assumes that all the
      inductions have been identified, therefore, this order must not be
      changed.  */
   for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -469,7 +469,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
 	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
 	}
 
-      /* Skip virtual phi's. The data dependences that are associated with
+      /* Skip virtual phi's.  The data dependences that are associated with
          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
       if (!is_gimple_reg (SSA_NAME_VAR (def)))
 	continue;
@@ -568,7 +568,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
 /* Function vect_analyze_scalar_cycles.
 
    Examine the cross iteration def-use cycles of scalar variables, by
-   analyzing the loop-header PHIs of scalar variables; Classify each
+   analyzing the loop-header PHIs of scalar variables.  Classify each
    cycle as one of the following: invariant, induction, reduction, unknown.
    We do that for the loop represented by LOOP_VINFO, and also to its
    inner-loop, if exists.
@@ -1124,8 +1124,8 @@ vect_analyze_loop_form (struct loop *loo
 
 /* Get cost by calling cost target builtin.  */
 
-static inline 
-int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
+static inline int
+vect_get_cost (enum vect_cost_for_stmt type_of_cost)
 {
   tree dummy_type = NULL;
   int dummy = 0;
@@ -1300,7 +1300,7 @@ vect_analyze_loop_operations (loop_vec_i
       return false;
     }
 
-  /* Analyze cost. Decide if worth while to vectorize.  */
+  /* Analyze cost.  Decide if worth while to vectorize.  */
 
   /* Once VF is set, SLP costs should be updated since the number of created
      vector stmts depends on VF.  */
@@ -1373,7 +1373,7 @@ vect_analyze_loop_operations (loop_vec_i
 /* Function vect_analyze_loop.
 
    Apply a set of analyses on LOOP, and create a loop_vec_info struct
-   for it. The different analyses will record information in the
+   for it.  The different analyses will record information in the
    loop_vec_info struct.  */
 loop_vec_info
 vect_analyze_loop (struct loop *loop)
@@ -1593,7 +1593,7 @@ reduction_code_for_scalar_code (enum tre
 }
 
 
-/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
+/* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
    STMT is printed with a message MSG. */
 
 static void
@@ -1607,7 +1607,7 @@ report_vect_op (gimple stmt, const char 
 /* Function vect_is_simple_reduction_1
 
    (1) Detect a cross-iteration def-use cycle that represents a simple
-   reduction computation. We look for the following pattern:
+   reduction computation.  We look for the following pattern:
 
    loop_header:
      a1 = phi < a0, a2 >
@@ -2022,7 +2022,7 @@ vect_get_single_scalar_iteraion_cost (lo
   int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
   int innerloop_iters, i, stmt_cost;
 
-  /* Count statements in scalar loop. Using this as scalar cost for a single
+  /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
 
      TODO: Add outer loop support.
@@ -2307,7 +2307,7 @@ vect_estimate_min_profitable_iters (loop
      something more reasonable.  */
 
   /* If the number of iterations is known and we do not do versioning, we can
-     decide whether to vectorize at compile time. Hence the scalar version
+     decide whether to vectorize at compile time.  Hence the scalar version
      do not carry cost model guard costs.  */
   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
@@ -2338,7 +2338,7 @@ vect_estimate_min_profitable_iters (loop
     }
 
   /* Calculate number of iterations required to make the vector version
-     profitable, relative to the loop bodies only. The following condition
+     profitable, relative to the loop bodies only.  The following condition
      must hold true:
      SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
      where
@@ -2555,7 +2555,7 @@ vect_model_induction_cost (stmt_vec_info
 
    Output:
    Return a vector variable, initialized with the first VF values of
-   the induction variable. E.g., for an iv with IV_PHI='X' and
+   the induction variable.  E.g., for an iv with IV_PHI='X' and
    evolution S, for a vector of 4 units, we want to return:
    [X, X + S, X + 2*S, X + 3*S].  */
 
@@ -2637,8 +2637,8 @@ get_initial_def_for_induction (gimple iv
   if (nested_in_vect_loop)
     {
       /* iv_loop is nested in the loop to be vectorized.  init_expr had already
-	 been created during vectorization of previous stmts; We obtain it from
-	 the STMT_VINFO_VEC_STMT of the defining stmt. */
+	 been created during vectorization of previous stmts.  We obtain it
+	 from the STMT_VINFO_VEC_STMT of the defining stmt.  */
       tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
                                            loop_preheader_edge (iv_loop));
       vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
@@ -2904,7 +2904,7 @@ get_initial_def_for_reduction (gimple st
     gcc_assert (loop == (gimple_bb (stmt))->loop_father);
 
   /* In case of double reduction we only create a vector variable to be put
-     in the reduction phi node. The actual statement creation is done in
+     in the reduction phi node.  The actual statement creation is done in
      vect_create_epilog_for_reduction.  */
   if (adjustment_def && nested_in_vect_loop
       && TREE_CODE (init_val) == SSA_NAME
@@ -3022,7 +3022,7 @@ get_initial_def_for_reduction (gimple st
      reduction statements. 
    STMT is the scalar reduction stmt that is being vectorized.
    NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
-     number of elements that we can fit in a vectype (nunits). In this case
+     number of elements that we can fit in a vectype (nunits).  In this case
      we have to generate more than one vector stmt - i.e - we need to "unroll"
      the vector stmt by a factor VF/nunits.  For more details see documentation
      in vectorizable_operation.
@@ -3313,7 +3313,7 @@ vect_create_epilog_for_reduction (VEC (t
   /* In case this is a reduction in an inner-loop while vectorizing an outer
      loop - we don't need to extract a single scalar result at the end of the
      inner-loop (unless it is double reduction, i.e., the use of reduction is
-     outside the outer-loop). The final vector of partial results will be used
+     outside the outer-loop).  The final vector of partial results will be used
      in the vectorized outer-loop, or reduced to a scalar result at the end of
      the outer-loop.  */
   if (nested_in_vect_loop && !double_reduc)
@@ -3472,7 +3472,7 @@ vect_create_epilog_for_reduction (VEC (t
             }
 
           /* The only case where we need to reduce scalar results in SLP, is
-             unrolling. If the size of SCALAR_RESULTS is greater than 
+             unrolling.  If the size of SCALAR_RESULTS is greater than
              GROUP_SIZE, we reduce them combining elements modulo 
              GROUP_SIZE.  */
           if (slp_node)
@@ -3578,7 +3578,7 @@ vect_finalize_reduction:
       VEC_replace (gimple, new_phis, 0, epilog_stmt);
     }
 
-  /* 2.6  Handle the loop-exit phis. Replace the uses of scalar loop-exit
+  /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
           phis with new adjusted scalar results, i.e., replace use <s_out0>
           with use <s_out4>.        
 
@@ -3604,8 +3604,8 @@ vect_finalize_reduction:
           use <s_out4> */
 
   /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in 
-     case that GROUP_SIZE is greater than vectorization factor). Therefore, we
-     need to match SCALAR_RESULTS with corresponding statements. The first
+     case that GROUP_SIZE is greater than vectorization factor).  Therefore, we
+     need to match SCALAR_RESULTS with corresponding statements.  The first
      (GROUP_SIZE / number of new vector stmts) scalar results correspond to
      the first vector stmt, etc.  
      (RATIO is equal to (GROUP_SIZE / number of new vector stmts)).  */ 
@@ -3638,7 +3638,7 @@ vect_finalize_reduction:
 
       phis = VEC_alloc (gimple, heap, 3);
       /* Find the loop-closed-use at the loop exit of the original scalar
-         result. (The reduction result is expected to have two immediate uses -
+         result.  (The reduction result is expected to have two immediate uses -
          one at the latch block, and one at the loop exit).  */
       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
         if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
@@ -3739,7 +3739,7 @@ vect_finalize_reduction:
                   vect_phi_res = PHI_RESULT (vect_phi);
 
                   /* Replace the use, i.e., set the correct vs1 in the regular
-                     reduction phi node. FORNOW, NCOPIES is always 1, so the
+                     reduction phi node.  FORNOW, NCOPIES is always 1, so the
                      loop is redundant.  */
                   use = reduction_phi;
                   for (j = 0; j < ncopies; j++)
@@ -3763,8 +3763,8 @@ vect_finalize_reduction:
 
       phis = VEC_alloc (gimple, heap, 3);
       /* Find the loop-closed-use at the loop exit of the original scalar
-         result. (The reduction result is expected to have two immediate uses -
-         one at the latch block, and one at the loop exit). For double 
+         result.  (The reduction result is expected to have two immediate uses,
+         one at the latch block, and one at the loop exit).  For double
          reductions we are looking for exit phis of the outer loop.  */
       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
         {
@@ -3813,7 +3813,7 @@ vect_finalize_reduction:
    Return FALSE if not a vectorizable STMT, TRUE otherwise.
 
    This function also handles reduction idioms (patterns) that have been
-   recognized in advance during vect_pattern_recog. In this case, STMT may be
+   recognized in advance during vect_pattern_recog.  In this case, STMT may be
    of this form:
      X = pattern_expr (arg0, arg1, ..., X)
    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
@@ -3834,9 +3834,9 @@ vect_finalize_reduction:
 
    Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
    indicates what is the actual level of parallelism (V8HI in the example), so
-   that the right vectorization factor would be derived. This vectype
+   that the right vectorization factor would be derived.  This vectype
    corresponds to the type of arguments to the reduction stmt, and should *NOT*
-   be used to create the vectorized stmt. The right vectype for the vectorized
+   be used to create the vectorized stmt.  The right vectype for the vectorized
    stmt is obtained from the type of the result X:
         get_vectype_for_scalar_type (TREE_TYPE (X))
 
@@ -3933,7 +3933,7 @@ vectorizable_reduction (gimple stmt, gim
       gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
     }
 
-  /* 3. Check the operands of the operation. The first operands are defined
+  /* 3. Check the operands of the operation.  The first operands are defined
         inside the loop body. The last operand is the reduction variable,
         which is defined by the loop-header-phi.  */
 
@@ -3978,7 +3978,7 @@ vectorizable_reduction (gimple stmt, gim
     return false;
 
   /* All uses but the last are expected to be defined in the loop.
-     The last use is the reduction variable. In case of nested cycle this
+     The last use is the reduction variable.  In case of nested cycle this
      assumption is not true: we use reduc_index to record the index of the
      reduction variable.  */
   for (i = 0; i < op_type-1; i++)
@@ -4109,7 +4109,7 @@ vectorizable_reduction (gimple stmt, gim
           1. The tree-code that is used to create the vector operation in the
              epilog code (that reduces the partial results) is not the
              tree-code of STMT, but is rather the tree-code of the original
-             stmt from the pattern that STMT is replacing. I.e, in the example
+             stmt from the pattern that STMT is replacing.  I.e, in the example
              above we want to use 'widen_sum' in the loop, but 'plus' in the
              epilog.
           2. The type (mode) we use to check available target support
@@ -4512,7 +4512,7 @@ vectorizable_induction (gimple phi, gimp
 
 /* Function vectorizable_live_operation.
 
-   STMT computes a value that is used outside the loop. Check if
+   STMT computes a value that is used outside the loop.  Check if
    it can be supported.  */
 
 bool
@@ -4553,7 +4553,7 @@ vectorizable_live_operation (gimple stmt
   gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
   gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
 
-  /* FORNOW: support only if all uses are invariant. This means
+  /* FORNOW: support only if all uses are invariant.  This means
      that the scalar operations can remain in place, unvectorized.
      The original last scalar value that they compute will be used.  */
 
@@ -4664,7 +4664,7 @@ vect_transform_loop (loop_vec_info loop_
      compile time constant), or it is a constant that doesn't divide by the
      vectorization factor, then an epilog loop needs to be created.
      We therefore duplicate the loop: the original loop will be vectorized,
-     and will compute the first (n/VF) iterations. The second copy of the loop
+     and will compute the first (n/VF) iterations.  The second copy of the loop
      will remain scalar and will compute the remaining (n%VF) iterations.
      (VF is the vectorization factor).  */
 
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 164270)
+++ tree-vect-data-refs.c	(working copy)
@@ -45,19 +45,19 @@ along with GCC; see the file COPYING3.  
 #include "optabs.h"
 
 /* Return the smallest scalar part of STMT.
-   This is used to determine the vectype of the stmt. We generally set the
-   vectype according to the type of the result (lhs). For stmts whose
+   This is used to determine the vectype of the stmt.  We generally set the
+   vectype according to the type of the result (lhs).  For stmts whose
    result-type is different than the type of the arguments (e.g., demotion,
    promotion), vectype will be reset appropriately (later).  Note that we have
    to visit the smallest datatype in this function, because that determines the
-   VF. If the smallest datatype in the loop is present only as the rhs of a
+   VF.  If the smallest datatype in the loop is present only as the rhs of a
    promotion operation - we'd miss it.
    Such a case, where a variable of this datatype does not appear in the lhs
    anywhere in the loop, can only occur if it's an invariant: e.g.:
    'int_x = (int) short_inv', which we'd expect to have been optimized away by
-   invariant motion. However, we cannot rely on invariant motion to always take
-   invariants out of the loop, and so in the case of promotion we also have to
-   check the rhs.
+   invariant motion.  However, we cannot rely on invariant motion to always
+   take invariants out of the loop, and so in the case of promotion we also
+   have to check the rhs.
    LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
    types.  */
 
@@ -89,7 +89,7 @@ vect_get_smallest_scalar_type (gimple st
 
 
 /* Find the place of the data-ref in STMT in the interleaving chain that starts
-   from FIRST_STMT. Return -1 if the data-ref is not a part of the chain.  */
+   from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
 
 int
 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
@@ -151,7 +151,7 @@ vect_insert_into_interleaving_chain (str
 /* Function vect_update_interleaving_chain.
 
    For two data-refs DRA and DRB that are a part of a chain interleaved data
-   accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
+   accesses, update the interleaving chain.  DRB's INIT is smaller than DRA's.
 
    There are four possible cases:
    1. New stmts - both DRA and DRB are not a part of any chain:
@@ -211,7 +211,7 @@ vect_update_interleaving_chain (struct d
       if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
 	{
 	  /* DRB's init is smaller than the init of the stmt previously marked
-	     as the first stmt of the interleaving chain of DRA. Therefore, we
+	     as the first stmt of the interleaving chain of DRA.  Therefore, we
 	     update FIRST_STMT and put DRB in the head of the list.  */
 	  DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
 	  DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
@@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree o
 }
 
 
-/* Check dependence between DRA and DRB for basic block vectorization.  */
+/* Check dependence between DRA and DRB for basic block vectorization.
+   If the accesses share same bases and offsets, we can compare their initial
+   constant offsets to decide whether they differ or not.  In case of a read-
+   write dependence we check that the load is before the store to ensure that
+   vectorization will not change the order of the accesses.  */
 
 static bool
 vect_drs_dependent_in_basic_block (struct data_reference *dra,
@@ -342,7 +346,7 @@ vect_drs_dependent_in_basic_block (struc
         return true;
     }
 
-  /* Check that the data-refs have same bases and offsets. If not, we can't
+  /* Check that the data-refs have same bases and offsets.  If not, we can't
      determine if they are dependent.  */
   if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
        && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
@@ -368,10 +372,10 @@ vect_drs_dependent_in_basic_block (struc
   if (init_a != init_b)
     return false;
 
-  /* We have a read-write dependence. Check that the load is before the store.
+  /* We have a read-write dependence.  Check that the load is before the store.
      When we vectorize basic blocks, vector load can be only before 
      corresponding scalar load, and vector store can be only after its
-     corresponding scalar store. So the order of the acceses is preserved in 
+     corresponding scalar store.  So the order of the acceses is preserved in
      case the load is before the store.  */
   earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));   
   if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
@@ -383,7 +387,7 @@ vect_drs_dependent_in_basic_block (struc
 
 /* Function vect_check_interleaving.
 
-   Check if DRA and DRB are a part of interleaving. In case they are, insert
+   Check if DRA and DRB are a part of interleaving.  In case they are, insert
    DRA and DRB in an interleaving chain.  */
 
 static bool
@@ -813,7 +817,7 @@ vect_compute_data_ref_alignment (struct 
 
   /* In case the dataref is in an inner-loop of the loop that is being
      vectorized (LOOP), we use the base and misalignment information
-     relative to the outer-loop (LOOP). This is ok only if the misalignment
+     relative to the outer-loop (LOOP).  This is ok only if the misalignment
      stays the same throughout the execution of the inner-loop, which is why
      we have to check that the stride of the dataref in the inner-loop evenly
      divides by the vector size.  */
@@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (voi
 }
 
 
-/* Traverse peeling hash table and calculate cost for each peeling option. Find
-   one with the lowest cost.  */
+/* Traverse peeling hash table and calculate cost for each peeling option.
+   Find the one with the lowest cost.  */
 
 static int
 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
@@ -1326,15 +1330,15 @@ vect_peeling_hash_choose_best_peeling (l
    the alignment of data references in the loop.
 
    FOR NOW: we assume that whatever versioning/peeling takes place, only the
-   original loop is to be vectorized; Any other loops that are created by
+   original loop is to be vectorized.  Any other loops that are created by
    the transformations performed in this pass - are not supposed to be
-   vectorized. This restriction will be relaxed.
+   vectorized.  This restriction will be relaxed.
 
    This pass will require a cost model to guide it whether to apply peeling
-   or versioning or a combination of the two. For example, the scheme that
+   or versioning or a combination of the two.  For example, the scheme that
    intel uses when given a loop with several memory accesses, is as follows:
    choose one memory access ('p') which alignment you want to force by doing
-   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
+   peeling.  Then, either (1) generate a loop in which 'p' is aligned and all
    other accesses are not necessarily aligned, or (2) use loop versioning to
    generate one loop in which all accesses are aligned, and another loop in
    which only 'p' is necessarily aligned.
@@ -1343,9 +1347,9 @@ vect_peeling_hash_choose_best_peeling (l
    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
 
-   Devising a cost model is the most critical aspect of this work. It will
+   Devising a cost model is the most critical aspect of this work.  It will
    guide us on which access to peel for, whether to use loop versioning, how
-   many versions to create, etc. The cost model will probably consist of
+   many versions to create, etc.  The cost model will probably consist of
    generic considerations as well as target specific considerations (on
    powerpc for example, misaligned stores are more painful than misaligned
    loads).
@@ -1406,7 +1410,7 @@ vect_peeling_hash_choose_best_peeling (l
 	}
      }
 
-     These loops are later passed to loop_transform to be vectorized. The
+     These loops are later passed to loop_transform to be vectorized.  The
      vectorizer will use the alignment information to guide the transformation
      (whether to generate regular loads/stores, or with special handling for
      misalignment).  */
@@ -1500,11 +1504,11 @@ vect_enhance_data_refs_alignment (loop_v
               npeel_tmp = (nelements - mis) % vf;
 
               /* For multiple types, it is possible that the bigger type access
-                 will have more than one peeling option. E.g., a loop with two
+                 will have more than one peeling option.  E.g., a loop with two
                  types: one of size (vector size / 4), and the other one of
-                 size (vector size / 8). Vectorization factor will 8. If both
+                 size (vector size / 8).  Vectorization factor will 8.  If both
                  access are misaligned by 3, the first one needs one scalar
-                 iteration to be aligned, and the second one needs 5. But the
+                 iteration to be aligned, and the second one needs 5.  But the
                  the first one will be aligned also by peeling 5 scalar
                  iterations, and in that case both accesses will be aligned.
                  Hence, except for the immediate peeling amount, we also want
@@ -1996,7 +2000,7 @@ vect_analyze_data_refs_alignment (loop_v
 
 
 /* Analyze groups of strided accesses: check that DR belongs to a group of
-   strided accesses of legal size, step, etc. Detect gaps, single element
+   strided accesses of legal size, step, etc.  Detect gaps, single element
    interleaving, and other special cases. Set strided access info.
    Collect groups of strided stores for further use in SLP analysis.  */
 
@@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_r
 
       while (next)
         {
-          /* Skip same data-refs. In case that two or more stmts share data-ref
-             (supported only for loads), we vectorize only the first stmt, and
-             the rest get their vectorized loads from the first one.  */
+          /* Skip same data-refs.  In case that two or more stmts share
+             data-ref (supported only for loads), we vectorize only the first
+             stmt, and the rest get their vectorized loads from the first
+             one.  */
           if (!tree_int_cst_compare (DR_INIT (data_ref),
                                      DR_INIT (STMT_VINFO_DATA_REF (
 						   vinfo_for_stmt (next)))))
@@ -2196,7 +2201,7 @@ vect_analyze_group_access (struct data_r
 
       /* FORNOW: we handle only interleaving that is a power of 2.
          We don't fail here if it may be still possible to vectorize the
-         group using SLP. If not, the size of the group will be checked in
+         group using SLP.  If not, the size of the group will be checked in
          vect_analyze_operations, and the vectorization will fail.  */
       if (exact_log2 (stride) == -1)
 	{
@@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info lo
       datarefs = BB_VINFO_DATAREFS (bb_vinfo);
     }
 
-  /* Go through the data-refs, check that the analysis succeeded. Update pointer
-     from stmt_vec_info struct to DR and vectype.  */
+  /* Go through the data-refs, check that the analysis succeeded.  Update
+     pointer from stmt_vec_info struct to DR and vectype.  */
 
   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
     {
@@ -2572,7 +2577,7 @@ vect_analyze_data_refs (loop_vec_info lo
 	  tree dinit;
 
 	  /* Build a reference to the first location accessed by the
-	     inner-loop: *(BASE+INIT). (The first location is actually
+	     inner-loop: *(BASE+INIT).  (The first location is actually
 	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
           tree inner_base = build_fold_indirect_ref
                                 (fold_build2 (POINTER_PLUS_EXPR,
@@ -2712,7 +2717,7 @@ vect_analyze_data_refs (loop_vec_info lo
 
 /* Function vect_get_new_vect_var.
 
-   Returns a name for a new variable. The current naming scheme appends the
+   Returns a name for a new variable.  The current naming scheme appends the
    prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
    the name of vectorizer generated variables, and appends that to NAME if
    provided.  */
@@ -2767,7 +2772,7 @@ vect_get_new_vect_var (tree type, enum v
    LOOP:    Specify relative to which loop-nest should the address be computed.
             For example, when the dataref is in an inner-loop nested in an
 	    outer-loop that is now being vectorized, LOOP can be either the
-	    outer-loop, or the inner-loop. The first memory location accessed
+	    outer-loop, or the inner-loop.  The first memory location accessed
 	    by the following dataref ('in' points to short):
 
 		for (i=0; i<N; i++)
@@ -2937,7 +2942,7 @@ vect_create_addr_base_for_vector_ref (gi
       Return the increment stmt that updates the pointer in PTR_INCR.
 
    3. Set INV_P to true if the access pattern of the data reference in the
-      vectorized loop is invariant. Set it to false otherwise.
+      vectorized loop is invariant.  Set it to false otherwise.
 
    4. Return the pointer.  */
 
@@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, s
       print_generic_expr (vect_dump, base_name, TDF_SLIM);
     }
 
-  /** (1) Create the new vector-pointer variable:  **/
+  /* (1) Create the new vector-pointer variable.  */
   vect_ptr_type = build_pointer_type (vectype);
   base = get_base_address (DR_REF (dr));
   if (base
@@ -3067,16 +3072,16 @@ vect_create_data_ref_ptr (gimple stmt, s
 
   add_referenced_var (vect_ptr);
 
-  /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
-      vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
-      def-use update cycles for the pointer: One relative to the outer-loop
-      (LOOP), which is what steps (3) and (4) below do. The other is relative
-      to the inner-loop (which is the inner-most loop containing the dataref),
-      and this is done be step (5) below.
-
-      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
-      inner-most loop, and so steps (3),(4) work the same, and step (5) is
-      redundant.  Steps (3),(4) create the following:
+  /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
+     vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
+     def-use update cycles for the pointer: one relative to the outer-loop
+     (LOOP), which is what steps (3) and (4) below do.  The other is relative
+     to the inner-loop (which is the inner-most loop containing the dataref),
+     and this is done be step (5) below.
+
+     When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
+     inner-most loop, and so steps (3),(4) work the same, and step (5) is
+     redundant.  Steps (3),(4) create the following:
 
 	vp0 = &base_addr;
 	LOOP:	vp1 = phi(vp0,vp2)
@@ -3085,8 +3090,8 @@ vect_create_data_ref_ptr (gimple stmt, s
 		vp2 = vp1 + step
 		goto LOOP
 
-      If there is an inner-loop nested in loop, then step (5) will also be
-      applied, and an additional update in the inner-loop will be created:
+     If there is an inner-loop nested in loop, then step (5) will also be
+     applied, and an additional update in the inner-loop will be created:
 
 	vp0 = &base_addr;
 	LOOP:   vp1 = phi(vp0,vp2)
@@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, s
 		vp2 = vp1 + step
 		if () goto LOOP   */
 
-  /** (3) Calculate the initial address the vector-pointer, and set
-          the vector-pointer to point to it before the loop:  **/
+  /* (2) Calculate the initial address the vector-pointer, and set
+         the vector-pointer to point to it before the loop.  */
 
   /* Create: (&(base[init_val+offset]) in the loop preheader.  */
 
@@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, s
   else
     vect_ptr_init = new_temp;
 
-  /** (4) Handle the updating of the vector-pointer inside the loop.
-	  This is needed when ONLY_INIT is false, and also when AT_LOOP
-	  is the inner-loop nested in LOOP (during outer-loop vectorization).
-   **/
+  /* (3) Handle the updating of the vector-pointer inside the loop.
+     This is needed when ONLY_INIT is false, and also when AT_LOOP is the
+     inner-loop nested in LOOP (during outer-loop vectorization).  */
 
   /* No update in loop is required.  */
   if (only_init && (!loop_vinfo || at_loop == loop))
@@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, s
     return vptr;
 
 
-  /** (5) Handle the updating of the vector-pointer inside the inner-loop
-	  nested in LOOP, if exists: **/
+  /* (4) Handle the updating of the vector-pointer inside the inner-loop
+     nested in LOOP, if exists.  */
 
   gcc_assert (nested_in_vect_loop);
   if (!only_init)
@@ -3358,12 +3362,12 @@ vect_strided_store_supported (tree vecty
 
    Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
    a power of 2, generate interleave_high/low stmts to reorder the data
-   correctly for the stores. Return the final references for stores in
+   correctly for the stores.  Return the final references for stores in
    RESULT_CHAIN.
 
    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
-   The input is 4 vectors each containing 8 elements. We assign a number to each
-   element, the input sequence is:
+   The input is 4 vectors each containing 8 elements.  We assign a number to
+   each element, the input sequence is:
 
    1st vec:   0  1  2  3  4  5  6  7
    2nd vec:   8  9 10 11 12 13 14 15
@@ -3379,18 +3383,18 @@ vect_strided_store_supported (tree vecty
 
    i.e., we interleave the contents of the four vectors in their order.
 
-   We use interleave_high/low instructions to create such output. The input of
+   We use interleave_high/low instructions to create such output.  The input of
    each interleave_high/low operation is two vectors:
    1st vec    2nd vec
    0 1 2 3    4 5 6 7
    the even elements of the result vector are obtained left-to-right from the
-   high/low elements of the first vector. The odd elements of the result are
+   high/low elements of the first vector.  The odd elements of the result are
    obtained left-to-right from the high/low elements of the second vector.
    The output of interleave_high will be:   0 4 1 5
    and of interleave_low:                   2 6 3 7
 
 
-   The permutation is done in log LENGTH stages. In each stage interleave_high
+   The permutation is done in log LENGTH stages.  In each stage interleave_high
    and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
    where the first argument is taken from the first half of DR_CHAIN and the
    second argument from it's second half.
@@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gim
      1. the misalignment computation
      2. the extra vector load (for the optimized realignment scheme).
      3. the phi node for the two vectors from which the realignment is
-      done (for the optimized realignment scheme).
-   */
+      done (for the optimized realignment scheme).  */
 
   /* 1. Determine where to generate the misalignment computation.
 
@@ -3807,7 +3810,7 @@ vect_strided_load_supported (tree vectyp
 
    Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
    a power of 2, generate extract_even/odd stmts to reorder the input data
-   correctly. Return the final references for loads in RESULT_CHAIN.
+   correctly.  Return the final references for loads in RESULT_CHAIN.
 
    E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
    The input is 4 vectors each containing 8 elements. We assign a number to each
@@ -3828,19 +3831,19 @@ vect_strided_load_supported (tree vectyp
    i.e., the first output vector should contain the first elements of each
    interleaving group, etc.
 
-   We use extract_even/odd instructions to create such output. The input of each
-   extract_even/odd operation is two vectors
+   We use extract_even/odd instructions to create such output.  The input of
+   each extract_even/odd operation is two vectors
    1st vec    2nd vec
    0 1 2 3    4 5 6 7
 
-   and the output is the vector of extracted even/odd elements. The output of
+   and the output is the vector of extracted even/odd elements.  The output of
    extract_even will be:   0 2 4 6
    and of extract_odd:     1 3 5 7
 
 
-   The permutation is done in log LENGTH stages. In each stage extract_even and
-   extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
-   order. In our example,
+   The permutation is done in log LENGTH stages.  In each stage extract_even
+   and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
+   their order.  In our example,
 
    E1: extract_even (1st vec, 2nd vec)
    E2: extract_odd (1st vec, 2nd vec)
@@ -3977,13 +3980,12 @@ vect_transform_strided_load (gimple stmt
       if (!next_stmt)
 	break;
 
-      /* Skip the gaps. Loads created for the gaps will be removed by dead
-       code elimination pass later. No need to check for the first stmt in
+      /* Skip the gaps.  Loads created for the gaps will be removed by dead
+       code elimination pass later.  No need to check for the first stmt in
        the group, since it always exists.
        DR_GROUP_GAP is the number of steps in elements from the previous
-       access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
-       correspond to the gaps.
-      */
+       access (if there is no gap DR_GROUP_GAP is 1).  We skip loads that
+       correspond to the gaps.  */
       if (next_stmt != first_stmt
           && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
       {
@@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct da
 
   /* We can choose between using the implicit realignment scheme (generating
      a misaligned_move stmt) and the explicit realignment scheme (generating
-     aligned loads with a REALIGN_LOAD). There are two variants to the explicit
-     realignment scheme: optimized, and unoptimized.
+     aligned loads with a REALIGN_LOAD).  There are two variants to the
+     explicit realignment scheme: optimized, and unoptimized.
      We can optimize the realignment only if the step between consecutive
      vector loads is equal to the vector size.  Since the vector memory
      accesses advance in steps of VS (Vector Size) in the vectorized loop, it
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 164270)
+++ tree-vect-stmts.c	(working copy)
@@ -166,7 +166,7 @@ vect_stmt_relevant_p (gimple stmt, loop_
 
 /* Function exist_non_indexing_operands_for_use_p
 
-   USE is one of the uses attached to STMT. Check if USE is
+   USE is one of the uses attached to STMT.  Check if USE is
    used in STMT for anything other than indexing an array.  */
 
 static bool
@@ -175,7 +175,7 @@ exist_non_indexing_operands_for_use_p (t
   tree operand;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
-  /* USE corresponds to some operand in STMT. If there is no data
+  /* USE corresponds to some operand in STMT.  If there is no data
      reference in STMT, then any operand that corresponds to USE
      is not indexing an array.  */
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -215,7 +215,7 @@ exist_non_indexing_operands_for_use_p (t
    Inputs:
    - a USE in STMT in a loop represented by LOOP_VINFO
    - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
-     that defined USE. This is done by calling mark_relevant and passing it
+     that defined USE.  This is done by calling mark_relevant and passing it
      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
 
    Outputs:
@@ -466,7 +466,7 @@ vect_mark_stmts_to_be_vectorized (loop_v
 	   relevant = vect_used_by_reduction
 	 This is because we distinguish between two kinds of relevant stmts -
 	 those that are used by a reduction computation, and those that are
-	 (also) used by a regular computation. This allows us later on to
+	 (also) used by a regular computation.  This allows us later on to
 	 identify stmts that are used solely by a reduction, and therefore the
 	 order of the results that they produce does not have to be kept.  */
 
@@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_f
                                                        dummy_type, dummy);
 }
 
+
+/* Get cost for STMT.  */
+
 int
 cost_for_stmt (gimple stmt)
 {
@@ -870,10 +873,10 @@ vect_get_load_cost (struct data_referenc
                    "pipelined.");
 
         /* Unaligned software pipeline has a load of an address, an initial
-           load, and possibly a mask operation to "prime" the loop. However,
+           load, and possibly a mask operation to "prime" the loop.  However,
            if this is an access in a group of loads, which provide strided
            access, then the above cost should only be considered for one
-           access in the group. Inside the loop, there is a load op
+           access in the group.  Inside the loop, there is a load op
            and a realignment op.  */
 
         if (add_realign_cost)
@@ -897,8 +900,8 @@ vect_get_load_cost (struct data_referenc
 /* Function vect_init_vector.
 
    Insert a new stmt (INIT_STMT) that initializes a new vector variable with
-   the vector elements of VECTOR_VAR. Place the initialization at BSI if it
-   is not NULL. Otherwise, place the initialization at the loop preheader.
+   the vector elements of VECTOR_VAR.  Place the initialization at BSI if it
+   is not NULL.  Otherwise, place the initialization at the loop preheader.
    Return the DEF of INIT_STMT.
    It will be used in the vectorization of STMT.  */
 
@@ -963,7 +966,7 @@ vect_init_vector (gimple stmt, tree vect
 
 /* Function vect_get_vec_def_for_operand.
 
-   OP is an operand in STMT. This function returns a (vector) def that will be
+   OP is an operand in STMT.  This function returns a (vector) def that will be
    used in the vectorized stmt for STMT.
 
    In the case that OP is an SSA_NAME which is defined in the loop, then
@@ -1117,10 +1120,10 @@ vect_get_vec_def_for_operand (tree op, g
 
 /* Function vect_get_vec_def_for_stmt_copy
 
-   Return a vector-def for an operand. This function is used when the
+   Return a vector-def for an operand.  This function is used when the
    vectorized stmt to be created (by the caller to this function) is a "copy"
    created in case the vectorized result cannot fit in one vector, and several
-   copies of the vector-stmt are required. In this case the vector-def is
+   copies of the vector-stmt are required.  In this case the vector-def is
    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
    of the stmt that defines VEC_OPRND.
    DT is the type of the vector def VEC_OPRND.
@@ -1128,7 +1131,7 @@ vect_get_vec_def_for_operand (tree op, g
    Context:
         In case the vectorization factor (VF) is bigger than the number
    of elements that can fit in a vectype (nunits), we have to generate
-   more than one vector stmt to vectorize the scalar stmt. This situation
+   more than one vector stmt to vectorize the scalar stmt.  This situation
    arises when there are multiple data-types operated upon in the loop; the
    smallest data-type determines the VF, and as a result, when vectorizing
    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
@@ -1153,7 +1156,7 @@ vect_get_vec_def_for_operand (tree op, g
    The vectorization of S2:
         To create the first vector-stmt out of the 4 copies - VSnew.0 -
    the function 'vect_get_vec_def_for_operand' is called to
-   get the relevant vector-def for each operand of S2. For operand x it
+   get the relevant vector-def for each operand of S2.  For operand x it
    returns  the vector-def 'vx.0'.
 
         To create the remaining copies of the vector-stmt (VSnew.j), this
@@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vec
 
 
 /* Get vectorized definitions for the operands to create a copy of an original
-   stmt. See vect_get_vec_def_for_stmt_copy() for details.  */
+   stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
 
 static void
 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
@@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum ve
 }
 
 
-/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL.  */
+/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
+   NULL.  */
 
 static void
 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
@@ -1594,7 +1598,7 @@ vectorizable_call (gimple stmt, gimple_s
 
    Create a vector stmt whose code, type, number of arguments, and result
    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
-   VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
+   VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
    In the case that CODE is a CALL_EXPR, this means that a call to DECL
    needs to be created (DECL is a function-decl of a target-builtin).
    STMT is the original scalar stmt that we are vectorizing.  */
@@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gi
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
 
-  /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
-     this, so we can safely override NCOPIES with 1 here.  */
+  /* Multiple types in SLP are handled by creating the appropriate number of
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
+     case of SLP.  */
   if (slp_node)
     ncopies = 1;
 
@@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gi
 
   return true;
 }
+
+
 /* Function vectorizable_assignment.
 
    Check if STMT performs an assignment (copy) that can be vectorized.
@@ -2156,7 +2163,7 @@ vectorizable_operation (gimple stmt, gim
     vf = 1;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
   if (slp_node)
     ncopies = 1;
@@ -2243,7 +2250,7 @@ vectorizable_operation (gimple stmt, gim
 	fprintf (vect_dump, "proceeding using word mode.");
     }
 
-  /* Worthwhile without SIMD support? Check only during analysis.  */
+  /* Worthwhile without SIMD support?  Check only during analysis.  */
   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
       && vf < vect_min_worthwhile_factor (code)
       && !vec_stmt)
@@ -2270,12 +2277,12 @@ vectorizable_operation (gimple stmt, gim
   /* Handle def.  */
   vec_dest = vect_create_destination_var (scalar_dest, vectype);
 
-  /* Allocate VECs for vector operands. In case of SLP, vector operands are
+  /* Allocate VECs for vector operands.  In case of SLP, vector operands are
      created in the previous stages of the recursion, so no allocation is
-     needed, except for the case of shift with scalar shift argument. In that
+     needed, except for the case of shift with scalar shift argument.  In that
      case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
      be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
-     In case of loop-based vectorization we allocate VECs of size 1. We
+     In case of loop-based vectorization we allocate VECs of size 1.  We
      allocate VEC_OPRNDS1 only in case of binary operation.  */
   if (!slp_node)
     {
@@ -2289,13 +2296,13 @@ vectorizable_operation (gimple stmt, gim
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
      more than one vector stmt - i.e - we need to "unroll" the
-     vector stmt by a factor VF/nunits. In doing so, we record a pointer
+     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
      from one copy of the vector stmt to the next, in the field
-     STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
+     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
      stages to find the correct vector defs to be used when vectorizing
-     stmts that use the defs of the current stmt. The example below illustrates
-     the vectorization process when VF=16 and nunits=4 (i.e - we need to create
-     4 vectorized stmts):
+     stmts that use the defs of the current stmt.  The example below
+     illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
+     we need to create 4 vectorized stmts):
 
      before vectorization:
                                 RELATED_STMT    VEC_STMT
@@ -2314,18 +2321,18 @@ vectorizable_operation (gimple stmt, gim
 
      step2: vectorize stmt S2 (done here):
         To vectorize stmt S2 we first need to find the relevant vector
-        def for the first operand 'x'. This is, as usual, obtained from
+        def for the first operand 'x'.  This is, as usual, obtained from
         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
-        that defines 'x' (S1). This way we find the stmt VS1_0, and the
-        relevant vector def 'vx0'. Having found 'vx0' we can generate
+        that defines 'x' (S1).  This way we find the stmt VS1_0, and the
+        relevant vector def 'vx0'.  Having found 'vx0' we can generate
         the vector stmt VS2_0, and as usual, record it in the
         STMT_VINFO_VEC_STMT of stmt S2.
         When creating the second copy (VS2_1), we obtain the relevant vector
         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
-        stmt VS1_0. This way we find the stmt VS1_1 and the relevant
-        vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
+        stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
+        vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
-        Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
+        Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
         chain of stmts and pointers:
                                 RELATED_STMT    VEC_STMT
         VS1_0:  vx0 = memref0   VS1_1           -
@@ -2348,7 +2355,7 @@ vectorizable_operation (gimple stmt, gim
 	  if (op_type == binary_op && scalar_shift_arg)
 	    {
 	      /* Vector shl and shr insn patterns can be defined with scalar
-		 operand 2 (shift operand). In this case, use constant or loop
+		 operand 2 (shift operand).  In this case, use constant or loop
 		 invariant op1 directly, without extending it to vector mode
 		 first.  */
 	      optab_op2_mode = insn_data[icode].operand[2].mode;
@@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gim
 	          if (slp_node)
 	            {
 	              /* Store vec_oprnd1 for every vector stmt to be created
-	                 for SLP_NODE. We check during the analysis that all the
-                         shift arguments are the same.
+	                 for SLP_NODE.  We check during the analysis that all
+                         the shift arguments are the same.
 	                 TODO: Allow different constants for different vector
 	                 stmts generated for an SLP instance.  */
 	              for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
@@ -2415,7 +2422,7 @@ vectorizable_operation (gimple stmt, gim
 }
 
 
-/* Get vectorized definitions for loop-based vectorization. For the first
+/* Get vectorized definitions for loop-based vectorization.  For the first
    operand we call vect_get_vec_def_for_operand() (with OPRND containing
    scalar operand), and for the rest we get a copy with
    vect_get_vec_def_for_stmt_copy() using the previous vector definition
@@ -2612,7 +2619,7 @@ vectorizable_type_demotion (gimple stmt,
     return false;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
   if (slp_node)
     ncopies = 1;
@@ -2702,7 +2709,7 @@ vectorizable_type_demotion (gimple stmt,
 
 
 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
-   and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
+   and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
    the resulting vectors and call the function recursively.  */
 
 static void
@@ -2779,17 +2786,18 @@ vect_create_vectorized_promotion_stmts (
   if (multi_step_cvt)
     {
       /* For multi-step promotion operation we first generate we call the
-         function recurcively for every stage. We start from the input type,
+         function recurcively for every stage.  We start from the input type,
          create promotion operations to the intermediate types, and then
          create promotions to the output type.  */
       *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
-      VEC_free (tree, heap, vec_tmp);
       vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
                                               multi_step_cvt - 1, stmt,
                                               vec_dsts, gsi, slp_node, code1,
                                               code2, decl2, decl2, op_type,
                                               prev_stmt_info);
     }
+
+  VEC_free (tree, heap, vec_tmp);
 }
 
 
@@ -2891,7 +2899,7 @@ vectorizable_type_promotion (gimple stmt
     return false;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
   if (slp_node)
     ncopies = 1;
@@ -3259,7 +3267,7 @@ vectorizable_store (gimple stmt, gimple_
      the documentation of vect_permute_store_chain()).
 
      In case of both multiple types and interleaving, above vector stores and
-     permutation stmts are created for every copy. The result vector stmts are
+     permutation stmts are created for every copy.  The result vector stmts are
      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
      STMT_VINFO_RELATED_STMT for the next copies.
   */
@@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_
   VEC_free (tree, heap, oprnds);
   if (result_chain)
     VEC_free (tree, heap, result_chain);
+  if (vec_oprnds)
+    VEC_free (tree, heap, vec_oprnds);
 
   return true;
 }
@@ -3476,7 +3486,7 @@ vectorizable_load (gimple stmt, gimple_s
     vf = 1;
 
   /* Multiple types in SLP are handled by creating the appropriate number of
-     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
+     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
   if (slp)
     ncopies = 1;
@@ -3603,13 +3613,13 @@ vectorizable_load (gimple stmt, gimple_s
   /* In case the vectorization factor (VF) is bigger than the number
      of elements that we can fit in a vectype (nunits), we have to generate
      more than one vector stmt - i.e - we need to "unroll" the
-     vector stmt by a factor VF/nunits. In doing so, we record a pointer
+     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
      from one copy of the vector stmt to the next, in the field
-     STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
+     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
      stages to find the correct vector defs to be used when vectorizing
-     stmts that use the defs of the current stmt. The example below illustrates
-     the vectorization process when VF=16 and nunits=4 (i.e - we need to create
-     4 vectorized stmts):
+     stmts that use the defs of the current stmt.  The example below
+     illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
+     need to create 4 vectorized stmts):
 
      before vectorization:
                                 RELATED_STMT    VEC_STMT
@@ -3621,7 +3631,7 @@ vectorizable_load (gimple stmt, gimple_s
         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
         Next, we create the vector stmt VS1_1, and record a pointer to
         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
-        Similarly, for VS1_2 and VS1_3. This is the resulting chain of
+        Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
         stmts and pointers:
                                 RELATED_STMT    VEC_STMT
         VS1_0:  vx0 = memref0   VS1_1           -
@@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_s
      STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
 
      In case of both multiple types and interleaving, the vector loads and
-     permutation stmts above are created for every copy. The result vector stmts
-     are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
-     STMT_VINFO_RELATED_STMT for the next copies.  */
+     permutation stmts above are created for every copy.  The result vector
+     stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
+     corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
 
   /* If the data reference is aligned (dr_aligned) or potentially unaligned
      on a target that supports unaligned accesses (dr_unaligned_supported)
@@ -3699,7 +3709,7 @@ vectorizable_load (gimple stmt, gimple_s
 
   /* If the misalignment remains the same throughout the execution of the
      loop, we can create the init_addr and permutation mask at the loop
-     preheader. Otherwise, it needs to be created inside the loop.
+     preheader.  Otherwise, it needs to be created inside the loop.
      This can only occur when vectorizing memory accesses in the inner-loop
      nested within an outer-loop that is being vectorized.  */
 
@@ -3854,7 +3864,7 @@ vectorizable_load (gimple stmt, gimple_s
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 	  mark_symbols_for_renaming (new_stmt);
 
-	  /* 3. Handle explicit realignment if necessary/supported. Create in
+	  /* 3. Handle explicit realignment if necessary/supported.  Create in
 		loop: vec_dest = realign_load (msq, lsq, realignment_token)  */
 	  if (alignment_support_scheme == dr_explicit_realign_optimized
 	      || alignment_support_scheme == dr_explicit_realign)
@@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gim
   tree cond_expr, then_clause, else_clause;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
+  tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
+  tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
   tree vec_compare, vec_cond_expr;
   tree new_temp;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@@ -4365,7 +4376,7 @@ vect_analyze_stmt (gimple stmt, bool *ne
   if (!PURE_SLP_STMT (stmt_info))
     {
       /* Groups of strided accesses whose size is not a power of 2 are not
-         vectorizable yet using loop-vectorization. Therefore, if this stmt
+         vectorizable yet using loop-vectorization.  Therefore, if this stmt
 	 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
 	 loop-based vectorized), the loop cannot be vectorized.  */
       if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
@@ -4447,7 +4458,7 @@ vect_transform_stmt (gimple stmt, gimple
       if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
 	{
 	  /* In case of interleaving, the whole chain is vectorized when the
-	     last store in the chain is reached. Store stmts before the last
+	     last store in the chain is reached.  Store stmts before the last
 	     one are skipped, and there vec_stmt_info shouldn't be freed
 	     meanwhile.  */
 	  *strided_store = true;
@@ -4746,7 +4757,7 @@ get_same_sized_vectype (tree scalar_type
 
    Returns whether a stmt with OPERAND can be vectorized.
    For loops, supportable operands are constants, loop invariants, and operands
-   that are defined by the current iteration of the loop. Unsupportable
+   that are defined by the current iteration of the loop.  Unsupportable
    operands are those that are defined by a previous iteration of the loop (as
    is the case in reduction/induction computations).
    For basic blocks, supportable operands are constants and bb invariants.
@@ -4928,7 +4939,7 @@ vect_is_simple_use_1 (tree operand, loop
    - CODE1 and CODE2 are codes of vector operations to be used when
    vectorizing the operation, if available.
    - DECL1 and DECL2 are decls of target builtin functions to be used
-   when vectorizing the operation, if available. In this case,
+   when vectorizing the operation, if available.  In this case,
    CODE1 and CODE2 are CALL_EXPR.
    - MULTI_STEP_CVT determines the number of required intermediate steps in
    case of multi-step conversion (like char->short->int - in that case
@@ -4972,7 +4983,7 @@ supportable_widening_operation (enum tre
 
      When vectorizing outer-loops, we execute the inner-loop sequentially
      (each vectorized inner-loop iteration contributes to VF outer-loop
-     iterations in parallel). We therefore don't allow to change the order
+     iterations in parallel).  We therefore don't allow to change the order
      of the computation in the inner-loop during outer-loop vectorization.  */
 
    if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
@@ -5085,8 +5096,9 @@ supportable_widening_operation (enum tre
       *code2 = c2;
 
       /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
-         intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
-         to get to NARROW_VECTYPE, and fail if we do not.  */
+         intermediate steps in promotion sequence.  We try
+         MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
+         not.  */
       *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
       for (i = 0; i < 3; i++)
         {
@@ -5137,7 +5149,7 @@ supportable_widening_operation (enum tre
    and producing a result of type VECTYPE_OUT).
 
    Narrowing operations we currently support are NOP (CONVERT) and
-   FIX_TRUNC. This function checks if these operations are supported by
+   FIX_TRUNC.  This function checks if these operations are supported by
    the target platform directly via vector tree-codes.
 
    Output:
@@ -5205,8 +5217,9 @@ supportable_narrowing_operation (enum tr
       *code1 = c1;
       prev_type = vectype;
       /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
-         intermediate  steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
-         to get to NARROW_VECTYPE, and fail if we do not.  */
+         intermediate steps in promotion sequence.  We try
+         MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
+         not.  */
       *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
       for (i = 0; i < 3; i++)
         {
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 164269)
+++ tree-vect-slp.c	(working copy)
@@ -147,7 +147,7 @@ vect_get_and_check_slp_defs (loop_vec_in
 	}
 
       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
-         from the pattern. Check that all the stmts of the node are in the
+         from the pattern.  Check that all the stmts of the node are in the
          pattern.  */
       if (loop && def_stmt && gimple_bb (def_stmt)
           && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
@@ -299,7 +299,7 @@ vect_get_and_check_slp_defs (loop_vec_in
 
 /* Recursively build an SLP tree starting from NODE.
    Fail (and return FALSE) if def-stmts are not isomorphic, require data
-   permutation or are of unsupported types of operation. Otherwise, return
+   permutation or are of unsupported types of operation.  Otherwise, return
    TRUE.  */
 
 static bool
@@ -542,7 +542,7 @@ vect_build_slp_tree (loop_vec_info loop_
               if (prev_first_load)
                 {
                   /* Check that there are no loads from different interleaving
-                     chains in the same node. The only exception is complex
+                     chains in the same node.  The only exception is complex
                      numbers.  */
                   if (prev_first_load != first_load
                       && rhs_code != REALPART_EXPR 
@@ -582,7 +582,7 @@ vect_build_slp_tree (loop_vec_info loop_
                                         ncopies_for_cost, *node);
                 }
 
-              /* Store the place of this load in the interleaving chain. In
+              /* Store the place of this load in the interleaving chain.  In
                  case that permutation is needed we later decide if a specific
                  permutation is supported.  */
               load_place = vect_get_place_in_interleaving_chain (stmt,
@@ -729,7 +729,7 @@ vect_print_slp_tree (slp_tree node)
 
 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
-   J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
+   J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
    stmts in NODE are to be marked.  */
 
 static void
@@ -897,7 +897,7 @@ vect_supported_load_permutation_p (slp_i
 
   /* In case of reduction every load permutation is allowed, since the order
      of the reduction statements is not important (as opposed to the case of
-     strided stores). The only condition we need to check is that all the 
+     strided stores).  The only condition we need to check is that all the
      load nodes are of the same size and have the same permutation (and then
      rearrange all the nodes of the SLP instance according to this 
      permutation).  */
@@ -920,7 +920,7 @@ vect_supported_load_permutation_p (slp_i
       real_c = real_b + real_a;
       imag_c = imag_a + imag_b;
      i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of 
-     {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
+     {real_a, imag_a} and {real_b, imag_b}.  We check here that if interleaving
      chains are mixed, they match the above pattern.  */
   if (complex_numbers)
     {
@@ -969,7 +969,7 @@ vect_supported_load_permutation_p (slp_i
   stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
   /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
      instance, not all the loads belong to the same node or interleaving
-     group. Hence, we need to divide them into groups according to
+     group.  Hence, we need to divide them into groups according to
      GROUP_SIZE.  */
   number_of_groups = VEC_length (int, load_permutation) / group_size;
 
@@ -1002,7 +1002,7 @@ vect_supported_load_permutation_p (slp_i
 
       if (!bad_permutation)
         {
-          /* This permutaion is valid for reduction. Since the order of the
+          /* This permutaion is valid for reduction.  Since the order of the
              statements in the nodes is not important unless they are memory
              accesses, we can rearrange the statements in all the nodes 
              according to the order of the loads.  */
@@ -1064,9 +1064,10 @@ vect_supported_load_permutation_p (slp_i
 /* Find the first load in the loop that belongs to INSTANCE.
    When loads are in several SLP nodes, there can be a case in which the first
    load does not appear in the first SLP node to be transformed, causing
-   incorrect order of statements. Since we generate all the loads together,
+   incorrect order of statements.  Since we generate all the loads together,
    they must be inserted before the first load of the SLP instance and not
    before the first load of the first node of the instance.  */
+
 static gimple
 vect_find_first_load_in_slp_instance (slp_instance instance)
 {
@@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (sl
 
 
 /* Find the last store in SLP INSTANCE.  */
+
 static gimple
 vect_find_last_store_in_slp_instance (slp_instance instance)
 {
@@ -1100,7 +1102,7 @@ vect_find_last_store_in_slp_instance (sl
 }
 
 
-/* Analyze an SLP instance starting from a group of strided stores. Call
+/* Analyze an SLP instance starting from a group of strided stores.  Call
    vect_build_slp_tree to build a tree of packed stmts if possible.
    Return FALSE if it's impossible to SLP any stmt in the loop.  */
 
@@ -1274,7 +1276,7 @@ vect_analyze_slp_instance (loop_vec_info
 }
 
 
-/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
+/* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
    trees of packed scalar stmts if SLP is possible.  */
 
 bool
@@ -1339,9 +1341,9 @@ vect_make_slp_decision (loop_vec_info lo
       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
 	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
 
-      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
+      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
 	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
-	 loop-based vectorization. Such stmts will be marked as HYBRID.  */
+	 loop-based vectorization.  Such stmts will be marked as HYBRID.  */
       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
       decided_to_slp++;
     }
@@ -1355,7 +1357,7 @@ vect_make_slp_decision (loop_vec_info lo
 
 
 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
-   can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
+   can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
 
 static void
 vect_detect_hybrid_slp_stmts (slp_tree node)
@@ -1493,7 +1495,7 @@ vect_slp_analyze_node_operations (bb_vec
 }
 
 
-/* Analyze statements in SLP instances of the basic block. Return TRUE if the
+/* Analyze statements in SLP instances of the basic block.  Return TRUE if the
    operations are supported. */
 
 static bool
@@ -1523,7 +1525,7 @@ vect_slp_analyze_operations (bb_vec_info
 
 /* Check if loads and stores are mixed in the basic block (in that
    case if we are not sure that the accesses differ, we can't vectorize the
-   basic block). Also return FALSE in case that there is statement marked as
+   basic block).  Also return FALSE in case that there is statement marked as
    not vectorizable.  */
 
 static bool
@@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb)
 
 
 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
-   the number of created vector stmts depends on the unrolling factor). However,
-   the actual number of vector stmts for every SLP node depends on VF which is
-   set later in vect_analyze_operations(). Hence, SLP costs should be updated.
-   In this function we assume that the inside costs calculated in
-   vect_model_xxx_cost are linear in ncopies.  */
+   the number of created vector stmts depends on the unrolling factor).
+   However, the actual number of vector stmts for every SLP node depends on
+   VF which is set later in vect_analyze_operations ().  Hence, SLP costs
+   should be updated.  In this function we assume that the inside costs
+   calculated in vect_model_xxx_cost are linear in ncopies.  */
 
 void
 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
@@ -1846,7 +1848,7 @@ vect_get_constant_vectors (slp_tree slp_
       op_num = reduc_index - 1;
       op = gimple_op (stmt, op_num + 1);
       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
-         we need either neutral operands or the original operands. See
+         we need either neutral operands or the original operands.  See
          get_initial_def_for_reduction() for details.  */
       switch (code)
         {
@@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VE
       number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
       /* Number of vector stmts was calculated according to LHS in
          vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
-         necessary. See vect_get_smallest_scalar_type() for details.  */
+         necessary.  See vect_get_smallest_scalar_type () for details.  */
       vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
                                      &rhs_size_unit);
       if (rhs_size_unit != lhs_size_unit)
@@ -2065,7 +2067,7 @@ vect_get_slp_defs (slp_tree slp_node, VE
   *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
 
   /* SLP_NODE corresponds either to a group of stores or to a group of
-     unary/binary operations. We don't call this function for loads.  
+     unary/binary operations.  We don't call this function for loads.
      For reduction defs we call vect_get_constant_vectors(), since we are
      looking for initial loop invariant values.  */
   if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
@@ -2167,7 +2169,7 @@ vect_create_mask_and_perm (gimple stmt, 
 
 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
    return in CURRENT_MASK_ELEMENT its equivalent in target specific
-   representation. Check that the mask is valid and return FALSE if not.
+   representation.  Check that the mask is valid and return FALSE if not.
    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
    the next vector, i.e., the current first vector is not needed.  */
 
@@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stm
      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
      scpecific type, e.g., in bytes for Altivec.
      The last mask is illegal since we assume two operands for permute
-     operation, and the mask element values can't be outside that range. Hence,
-     the last mask must be converted into {2,5,5,5}.
+     operation, and the mask element values can't be outside that range.
+     Hence, the last mask must be converted into {2,5,5,5}.
      For the first two permutations we need the first and the second input
      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
      we need the second and the third vectors: {b1,c1,a2,b2} and
@@ -2438,7 +2440,7 @@ vect_schedule_slp_instance (slp_tree nod
   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
 
   /* For each SLP instance calculate number of vector stmts to be created
-     for the scalar stmts in each node of the SLP tree. Number of vector
+     for the scalar stmts in each node of the SLP tree.  Number of vector
      elements in one vector iteration is the number of scalar elements in
      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
      size.  */
@@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree nod
 }
 
 
+/* Generate vector code for all SLP instances in the loop/basic block.  */
+
 bool
 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [patch] Vectorizer cleanup
@ 2009-06-01  8:18 Ira Rosen
  0 siblings, 0 replies; 7+ messages in thread
From: Ira Rosen @ 2009-06-01  8:18 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1706 bytes --]


Hi,

This patch contains several fixes that, hopefully, improve vectorizer's
code and dump printings' readability.
It also fixes PR 39129 - "too many BBs in loop" warning was changed to
"control flow in loop".

Bootstrapped with vectorization enabled on powerpc64-suse-linux and
regtested on x86_64-suse-linux.
Committed revision 148036.

Ira

ChangeLog:

      PR tree-optimization/39129
      * tree-vect-loop-manip.c (conservative_cost_threshold): Change the
      printed message.
      (vect_do_peeling_for_loop_bound): Use
      LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT and
      LOOP_REQUIRES_VERSIONING_FOR_ALIAS macros.
      (vect_loop_versioning): Likewise.
      (vect_create_cond_for_alias_checks): Fix indentation.
      * tree-vectorizer.h (struct _loop_vec_info): Fix indentation of the
      macros.
      (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT): Define.
      (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Likewise.
      * tree-vect-loop.c (vect_analyze_loop_form): Change "too many BBs" to
      "control flow in loop".
      (vect_estimate_min_profitable_iters): Use
      LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT and
      LOOP_REQUIRES_VERSIONING_FOR_ALIAS macros.
      * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Likewise.
      (vect_create_data_ref_ptr): Don't mention array dimension in
printing.
      * tree-vect-stmts.c (vectorizable_store): Replace the check that the
      statement belongs to a group of strided accesses with the exact code
      check.
      (vectorizable_load): Likewise.
      * tree-vect-slp.c (vect_analyze_slp_instance): Spell out "basic
block".
      (vect_slp_analyze_bb, vect_slp_transform_bb): Likewise.

(See attached file: cleanup.txt)


[-- Attachment #2: cleanup.txt --]
[-- Type: text/plain, Size: 13473 bytes --]

Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c	(revision 148013)
+++ tree-vect-loop-manip.c	(working copy)
@@ -1680,7 +1680,7 @@ conservative_cost_threshold (loop_vec_in
     th = (unsigned) min_profitable_iters;
 
   if (th && vect_print_dump_info (REPORT_COST))
-    fprintf (vect_dump, "Vectorization may not be profitable.");
+    fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
 
   return th;
 }
@@ -1730,8 +1730,8 @@ vect_do_peeling_for_loop_bound (loop_vec
 
   /* If cost model check not done during versioning and 
      peeling for alignment.  */
-  if (!VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-      && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
+  if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+      && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
       && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
       && !cond_expr)
     {
@@ -2280,10 +2280,10 @@ vect_create_cond_for_alias_checks (loop_
       else
 	*cond_expr = part_cond_expr;
     }
-    if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
-      fprintf (vect_dump, "created %u versioning for alias checks.\n",
-               VEC_length (ddr_p, may_alias_ddrs));
 
+  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
+    fprintf (vect_dump, "created %u versioning for alias checks.\n",
+             VEC_length (ddr_p, may_alias_ddrs));
 }
 
 
@@ -2339,11 +2339,11 @@ vect_loop_versioning (loop_vec_info loop
   *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
 				     false, NULL_TREE);
 
-  if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
       vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
 					 cond_expr_stmt_list);
 
-  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
     vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
 				       cond_expr_stmt_list);
 
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 148013)
+++ tree-vectorizer.h	(working copy)
@@ -239,33 +239,38 @@ typedef struct _loop_vec_info {
 } *loop_vec_info;
 
 /* Access Functions.  */
-#define LOOP_VINFO_LOOP(L)            (L)->loop
-#define LOOP_VINFO_BBS(L)             (L)->bbs
-#define LOOP_VINFO_NITERS(L)          (L)->num_iters
+#define LOOP_VINFO_LOOP(L)                 (L)->loop
+#define LOOP_VINFO_BBS(L)                  (L)->bbs
+#define LOOP_VINFO_NITERS(L)               (L)->num_iters
 /* Since LOOP_VINFO_NITERS can change after prologue peeling
    retain total unchanged scalar loop iterations for cost model.  */
-#define LOOP_VINFO_NITERS_UNCHANGED(L)          (L)->num_iters_unchanged
-#define LOOP_VINFO_COST_MODEL_MIN_ITERS(L)	(L)->min_profitable_iters
-#define LOOP_VINFO_VECTORIZABLE_P(L)  (L)->vectorizable
-#define LOOP_VINFO_VECT_FACTOR(L)     (L)->vectorization_factor
-#define LOOP_VINFO_PTR_MASK(L)        (L)->ptr_mask
-#define LOOP_VINFO_DATAREFS(L)        (L)->datarefs
-#define LOOP_VINFO_DDRS(L)            (L)->ddrs
-#define LOOP_VINFO_INT_NITERS(L)      (TREE_INT_CST_LOW ((L)->num_iters))
-#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
-#define LOOP_VINFO_UNALIGNED_DR(L)    (L)->unaligned_dr
-#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
-#define LOOP_VINFO_LOC(L)             (L)->loop_line_number
-#define LOOP_VINFO_MAY_ALIAS_DDRS(L)  (L)->may_alias_ddrs
-#define LOOP_VINFO_STRIDED_STORES(L)  (L)->strided_stores
-#define LOOP_VINFO_SLP_INSTANCES(L)   (L)->slp_instances
+#define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
+#define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters
+#define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
+#define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
+#define LOOP_VINFO_PTR_MASK(L)             (L)->ptr_mask
+#define LOOP_VINFO_DATAREFS(L)             (L)->datarefs
+#define LOOP_VINFO_DDRS(L)                 (L)->ddrs
+#define LOOP_VINFO_INT_NITERS(L)           (TREE_INT_CST_LOW ((L)->num_iters))
+#define LOOP_PEELING_FOR_ALIGNMENT(L)      (L)->peeling_for_alignment
+#define LOOP_VINFO_UNALIGNED_DR(L)         (L)->unaligned_dr
+#define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
+#define LOOP_VINFO_LOC(L)                  (L)->loop_line_number
+#define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
+#define LOOP_VINFO_STRIDED_STORES(L)       (L)->strided_stores
+#define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
 
+#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
+VEC_length (gimple, (L)->may_misalign_stmts) > 0
+#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)     \
+VEC_length (ddr_p, (L)->may_alias_ddrs) > 0
+
 #define NITERS_KNOWN_P(n)                     \
 (host_integerp ((n),0)                        \
 && TREE_INT_CST_LOW ((n)) > 0)
 
-#define LOOP_VINFO_NITERS_KNOWN_P(L)                     \
+#define LOOP_VINFO_NITERS_KNOWN_P(L)          \
 NITERS_KNOWN_P((L)->num_iters)
 
 static inline loop_vec_info
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 148013)
+++ tree-vect-loop.c	(working copy)
@@ -846,7 +846,7 @@ vect_analyze_loop_form (struct loop *loo
       if (loop->num_nodes != 2)
         {
           if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
-            fprintf (vect_dump, "not vectorized: too many BBs in loop.");
+            fprintf (vect_dump, "not vectorized: control flow in loop.");
           return NULL;
         }
 
@@ -908,7 +908,7 @@ vect_analyze_loop_form (struct loop *loo
       if (loop->num_nodes != 5) 
         {
 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
-	    fprintf (vect_dump, "not vectorized: too many BBs in loop.");
+	    fprintf (vect_dump, "not vectorized: control flow in loop.");
 	  destroy_loop_vec_info (inner_loop_vinfo, true);
 	  return NULL;
         }
@@ -1756,7 +1756,7 @@ vect_estimate_min_profitable_iters (loop
     }
 
   /* Requires loop versioning tests to handle misalignment.  */
-  if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       vec_outside_cost +=
@@ -1766,7 +1766,8 @@ vect_estimate_min_profitable_iters (loop
                  "versioning to treat misalignment.\n");
     }
 
-  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+  /* Requires loop versioning with alias checks.  */
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       vec_outside_cost +=
@@ -1776,11 +1777,9 @@ vect_estimate_min_profitable_iters (loop
                  "versioning aliasing.\n");
     }
 
-  if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
-    {
-      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
-    }
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+    vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -1946,12 +1945,12 @@ vect_estimate_min_profitable_iters (loop
      decide whether to vectorize at compile time. Hence the scalar version
      do not carry cost model guard costs.  */
   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      || VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+      || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
     {
       /* Cost model check occurs at versioning.  */
-      if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-	  || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+      if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+          || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
 	scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
       else
 	{
@@ -3648,8 +3647,8 @@ vect_transform_loop (loop_vec_info loop_
        || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
 	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
 
-  if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
     vect_loop_versioning (loop_vinfo,
 			  !do_peeling_for_loop_bound,
 			  &cond_expr, &cond_expr_stmt_list);
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 148013)
+++ tree-vect-data-refs.c	(working copy)
@@ -1196,15 +1196,15 @@ vect_enhance_data_refs_alignment (loop_v
 	}
     }
 
-  vect_versioning_for_alias_required =
-    (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
+  vect_versioning_for_alias_required 
+    = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
 
   /* Temporarily, if versioning for alias is required, we disable peeling
      until we support peeling and versioning.  Often peeling for alignment
      will require peeling for loop-bound, which in turn requires that we
      know how to adjust the loop ivs after the loop.  */
   if (vect_versioning_for_alias_required
-       || !vect_can_advance_ivs_p (loop_vinfo)
+      || !vect_can_advance_ivs_p (loop_vinfo)
       || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
     do_peeling = false;
 
@@ -1366,7 +1366,7 @@ vect_enhance_data_refs_alignment (loop_v
         }
       
       /* Versioning requires at least one misaligned data reference.  */
-      if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
+      if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
         do_versioning = false;
       else if (!do_versioning)
         VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
@@ -2356,10 +2356,9 @@ vect_create_data_ref_ptr (gimple stmt, s
       tree data_ref_base = base_name;
       fprintf (vect_dump, "create vector-pointer variable to type: ");
       print_generic_expr (vect_dump, vectype, TDF_SLIM);
-      if (TREE_CODE (data_ref_base) == VAR_DECL)
-        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
-      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
-        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
+      if (TREE_CODE (data_ref_base) == VAR_DECL 
+          || TREE_CODE (data_ref_base) == ARRAY_REF)
+        fprintf (vect_dump, "  vectorizing an array ref: ");
       else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
         fprintf (vect_dump, "  vectorizing a record based array ref: ");
       else if (TREE_CODE (data_ref_base) == SSA_NAME)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 148013)
+++ tree-vect-stmts.c	(working copy)
@@ -2903,7 +2903,9 @@ vectorizable_store (gimple stmt, gimple_
   scalar_dest = gimple_assign_lhs (stmt);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
-      && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
+      && TREE_CODE (scalar_dest) != COMPONENT_REF
+      && TREE_CODE (scalar_dest) != IMAGPART_EXPR
+      && TREE_CODE (scalar_dest) != REALPART_EXPR)
     return false;
 
   gcc_assert (gimple_assign_single_p (stmt));
@@ -3285,7 +3287,9 @@ vectorizable_load (gimple stmt, gimple_s
   code = gimple_assign_rhs_code (stmt);
   if (code != ARRAY_REF
       && code != INDIRECT_REF
-      && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
+      && code != COMPONENT_REF
+      && code != IMAGPART_EXPR
+      && code != REALPART_EXPR)
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 148013)
+++ tree-vect-slp.c	(working copy)
@@ -912,7 +912,8 @@ vect_analyze_slp_instance (loop_vec_info
   if (unrolling_factor != 1 && !loop_vinfo)
     {
       if (vect_print_dump_info (REPORT_SLP))
-        fprintf (vect_dump, "Build SLP failed: unrolling required in BB SLP");
+        fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
+                            " block SLP");
       
       return false;
     }
@@ -1367,7 +1368,7 @@ vect_slp_analyze_bb (basic_block bb)
     }
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "BB will be vectorized using SLP\n");
+    fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
 
   return bb_vinfo;
 }
@@ -2088,7 +2089,7 @@ vect_slp_transform_bb (basic_block bb)
   update_ssa (TODO_update_ssa);
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "BB VECTORIZED\n");
+    fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
 
   destroy_bb_vec_info (bb_vinfo);
 }

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [patch] Vectorizer cleanup
@ 2009-04-28  9:02 Ira Rosen
  0 siblings, 0 replies; 7+ messages in thread
From: Ira Rosen @ 2009-04-28  9:02 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3423 bytes --]


Hi,

This is a cleanup patch towards basic block SLP. It mainly removes the word
"loop" from names that can be used in both basic block and loop-based
vectorization.

Bootstrapped with vectorization enabled on powerpc64-suse-linux and
regtested on x86_64-suse-linux.
Committed revision 146875.

Ira

ChangeLog:

      * tree-vect-loop-manip.c (vect_create_cond_for_alias_checks):
      Use REPORT_VECTORIZED_LOCATIONS instead
      REPORT_VECTORIZED_LOOPS.
      * tree-vectorizer.c (vect_verbosity_level): Make static.
      (vect_loop_location): Rename to vect_location.
      (vect_set_verbosity_level): Update comment.
      (vect_set_dump_settings): Use REPORT_VECTORIZED_LOCATIONS
      and vect_location.
      (vectorize_loops): Fix comment. Use REPORT_VECTORIZED_LOCATIONS
      and vect_location. Use REPORT_UNVECTORIZED_LOCATIONS
      instead REPORT_UNVECTORIZED_LOOPS.
      * tree-vectorizer.h (enum vect_def_type): Rename vect_invariant_def
and
      vect_loop_def to vect_external_def and vect_internal_def.
      (enum verbosity_levels): Rename REPORT_VECTORIZED_LOOPS
            and REPORT_UNVECTORIZED_LOOPS to
      REPORT_VECTORIZED_LOCATIONS and
      REPORT_UNVECTORIZED_LOCATIONS.
      (enum vect_relevant): Update comment. Rename vect_unused_in_loop
      and vect_used_in_loop and to vect_unused_in_scope and
      vect_used_in_scope.
      (STMT_VINFO_RELEVANT_P): Use vect_unused_in_scope.
      (vect_verbosity_level): Remove declaration.
      (vect_analyze_operations): Likewise.
      (vect_analyze_stmt): Declare.
      * tree-vect-loop.c (vect_determine_vectorization_factor): Use
      REPORT_UNVECTORIZED_LOCATIONS.
      (vect_get_loop_niters): Fix indentation.
      (vect_analyze_loop_form): Use REPORT_UNVECTORIZED_LOCATIONS.
      (vect_analyze_loop_operations): New function.
      (vect_analyze_loop): Call vect_analyze_loop_operations instead of
      vect_analyze_operations.
      (vect_is_simple_reduction): Use new names.
      (vectorizable_live_operation, vect_transform_loop): Likewise.
      * tree-vect-data-refs.c (vect_check_interleaving): Add a return value
to
      specify whether the data references can be a part of interleaving
chain.
      (vect_analyze_data_ref_dependence): Use new names.
      (vect_analyze_data_refs_alignment, vect_analyze_data_refs): Likewise.
      (vect_create_addr_base_for_vector_ref): Remove redundant code.
      * tree-vect-patterns.c (widened_name_p): Use new names.
      (vect_recog_dot_prod_pattern): Likewise.
      * tree-vect-stmts.c (vect_stmt_relevant_p): Use new names.
      (process_use, vect_mark_stmts_to_be_vectorized,
      vect_model_simple_cost, vect_model_store_cost,,
      vect_get_vec_def_for_operand, vect_get_vec_def_for_stmt_copy,.
      vectorizable_call, vectorizable_conversion, vectorizable_assignment,.
      vectorizable_operation, vectorizable_type_demotion,
      vectorizable_type_promotion, vectorizable_store, vectorizable_load,,.
      vectorizable_condition): Likewise.
      (vect_analyze_operations): Split into vect_analyze_loop_operations,,.
      and ...
      (vect_analyze_stmt): ... new function.
      (new_stmt_vec_info): Use new names.
      (vect_is_simple_use): Use new names and fix comment.
      * tree-vect-slp.c (vect_get_and_check_slp_defs): Use new names.
      (vect_build_slp_tree, vect_analyze_slp, vect_schedule_slp): Likewise.

Patch:
(See attached file: patch.txt)

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 62554 bytes --]

Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c	(revision 146873)
+++ tree-vect-loop-manip.c	(working copy)
@@ -2290,7 +2290,7 @@ vect_create_cond_for_alias_checks (loop_
       else
 	*cond_expr = part_cond_expr;
     }
-    if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
+    if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
       fprintf (vect_dump, "created %u versioning for alias checks.\n",
                VEC_length (ddr_p, may_alias_ddrs));
 
Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 146873)
+++ tree-vectorizer.c	(working copy)
@@ -74,10 +74,10 @@ FILE *vect_dump;
 
 /* vect_verbosity_level set to an invalid value 
    to mark that it's uninitialized.  */
-enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
+static enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 
 /* Loop location.  */
-LOC vect_loop_location;
+LOC vect_location;
 
 /* Bitmap of virtual variables to be renamed.  */
 bitmap vect_memsyms_to_rename;
@@ -89,7 +89,7 @@ VEC(vec_void_p,heap) *stmt_vec_info_vec;
 
 /* Function vect_set_verbosity_level.
 
-   Called from toplev.c upon detection of the
+   Called from opts.c upon detection of the
    -ftree-vectorizer-verbose=N option.  */
 
 void
@@ -132,7 +132,7 @@ vect_set_dump_settings (void)
   if (dump_file && (dump_flags & TDF_DETAILS))
     vect_verbosity_level = REPORT_DETAILS;
   else if (dump_file && (dump_flags & TDF_STATS))
-    vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
+    vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS;
   else
     vect_verbosity_level = REPORT_NONE;
 
@@ -153,13 +153,13 @@ vect_print_dump_info (enum verbosity_lev
   if (!current_function_decl || !vect_dump)
     return false;
 
-  if (vect_loop_location == UNKNOWN_LOC)
+  if (vect_location == UNKNOWN_LOC)
     fprintf (vect_dump, "\n%s:%d: note: ",
 	     DECL_SOURCE_FILE (current_function_decl),
 	     DECL_SOURCE_LINE (current_function_decl));
   else
     fprintf (vect_dump, "\n%s:%d: note: ", 
-	     LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
+	     LOC_FILE (vect_location), LOC_LINE (vect_location));
 
   return true;
 }
@@ -167,7 +167,7 @@ vect_print_dump_info (enum verbosity_lev
 
 /* Function vectorize_loops.
    
-   Entry Point to loop vectorization phase.  */
+   Entry point to loop vectorization phase.  */
 
 unsigned
 vectorize_loops (void)
@@ -187,7 +187,7 @@ vectorize_loops (void)
   /* Fix the verbosity level if not defined explicitly by the user.  */
   vect_set_dump_settings ();
 
-  /* Allocate the bitmap that records which virtual variables that 
+  /* Allocate the bitmap that records which virtual variables  
      need to be renamed.  */
   vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
 
@@ -203,7 +203,7 @@ vectorize_loops (void)
       {
 	loop_vec_info loop_vinfo;
 
-	vect_loop_location = find_loop_location (loop);
+	vect_location = find_loop_location (loop);
 	loop_vinfo = vect_analyze_loop (loop);
 	loop->aux = loop_vinfo;
 
@@ -213,11 +213,12 @@ vectorize_loops (void)
 	vect_transform_loop (loop_vinfo);
 	num_vectorized_loops++;
       }
-  vect_loop_location = UNKNOWN_LOC;
+
+  vect_location = UNKNOWN_LOC;
 
   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
-  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
-      || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
+  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)
+      || (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
 	  && num_vectorized_loops > 0))
     fprintf (vect_dump, "vectorized %u loops in function.\n",
 	     num_vectorized_loops);
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 146873)
+++ tree-vectorizer.h	(working copy)
@@ -56,9 +56,9 @@ enum dr_alignment_support {
 /* Define type of def-use cross-iteration cycle.  */
 enum vect_def_type {
   vect_uninitialized_def = 0,
-  vect_constant_def,
-  vect_invariant_def,
-  vect_loop_def,
+  vect_constant_def = 1,
+  vect_external_def,
+  vect_internal_def,
   vect_induction_def,
   vect_reduction_def,
   vect_unknown_def_type
@@ -67,8 +67,8 @@ enum vect_def_type {
 /* Define verbosity levels.  */
 enum verbosity_levels {
   REPORT_NONE,
-  REPORT_VECTORIZED_LOOPS,
-  REPORT_UNVECTORIZED_LOOPS,
+  REPORT_VECTORIZED_LOCATIONS,
+  REPORT_UNVECTORIZED_LOCATIONS,
   REPORT_COST,
   REPORT_ALIGNMENT,
   REPORT_DR_DETAILS,
@@ -300,9 +300,10 @@ enum stmt_vec_info_type {
   loop_exit_ctrl_vec_info_type
 };
 
-/* Indicates whether/how a variable is used in the loop.  */
+/* Indicates whether/how a variable is used in the scope of loop/basic 
+   block.  */
 enum vect_relevant {
-  vect_unused_in_loop = 0,
+  vect_unused_in_scope = 0,
   vect_used_in_outer_by_reduction,
   vect_used_in_outer,
 
@@ -314,7 +315,7 @@ enum vect_relevant {
      computed.  */
   vect_used_by_reduction,
 
-  vect_used_in_loop  
+  vect_used_in_scope 
 };
 
 /* The type of vectorization that can be applied to the stmt: regular loop-based
@@ -475,7 +476,7 @@ typedef struct _stmt_vec_info {
 #define DR_GROUP_SAME_DR_STMT(S)           (S)->same_dr_stmt
 #define DR_GROUP_READ_WRITE_DEPENDENCE(S)  (S)->read_write_dep
 
-#define STMT_VINFO_RELEVANT_P(S)          ((S)->relevant != vect_unused_in_loop)
+#define STMT_VINFO_RELEVANT_P(S)          ((S)->relevant != vect_unused_in_scope)
 #define STMT_VINFO_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
 #define STMT_VINFO_INSIDE_OF_LOOP_COST(S)  (S)->cost.inside_of_loop
 
@@ -693,12 +694,9 @@ known_alignment_for_access_p (struct dat
 extern FILE *vect_dump;
 extern LOC vect_loop_location;
 
-extern enum verbosity_levels vect_verbosity_level;
-
 /* Bitmap of virtual variables to be renamed.  */
 extern bitmap vect_memsyms_to_rename;
 
-
 /*-----------------------------------------------------------------*/
 /* Function prototypes.                                            */
 /*-----------------------------------------------------------------*/
@@ -744,7 +742,7 @@ extern tree vect_get_vec_def_for_stmt_co
 extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
                                  bool *, slp_tree, slp_instance);
 extern void vect_remove_stores (gimple);
-extern bool vect_analyze_operations (loop_vec_info);
+extern bool vect_analyze_stmt (gimple, bool *);
 
 /* In tree-vect-data-refs.c.  */
 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 146873)
+++ tree-vect-loop.c	(working copy)
@@ -212,7 +212,7 @@ vect_determine_vectorization_factor (loo
 	      vectype = get_vectype_for_scalar_type (scalar_type);
 	      if (!vectype)
 		{
-		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 		    {
 		      fprintf (vect_dump,
 		               "not vectorized: unsupported data-type ");
@@ -262,7 +262,7 @@ vect_determine_vectorization_factor (loo
 
 	  if (gimple_get_lhs (stmt) == NULL_TREE)
 	    {
-	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 		{
 	          fprintf (vect_dump, "not vectorized: irregular stmt.");
 		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
@@ -272,7 +272,7 @@ vect_determine_vectorization_factor (loo
 
 	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
 	    {
-	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	        {
 	          fprintf (vect_dump, "not vectorized: vector stmt in loop:");
 	          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
@@ -306,7 +306,7 @@ vect_determine_vectorization_factor (loo
 	      vectype = get_vectype_for_scalar_type (scalar_type);
 	      if (!vectype)
 		{
-		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 		    {
 		      fprintf (vect_dump, 
 			       "not vectorized: unsupported data-type ");
@@ -339,7 +339,7 @@ vect_determine_vectorization_factor (loo
     fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
   if (vectorization_factor <= 1)
     {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: unsupported data-type");
       return false;
     }
@@ -533,7 +533,6 @@ vect_analyze_scalar_cycles (loop_vec_inf
     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
-
 /* Function vect_get_loop_niters.
 
    Determine how many iterations the loop is executed.
@@ -557,10 +556,10 @@ vect_get_loop_niters (struct loop *loop,
       *number_of_iterations = niters;
 
       if (vect_print_dump_info (REPORT_DETAILS))
-	{
-	  fprintf (vect_dump, "==> get_loop_niters:" );
-	  print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
-	}
+        {
+          fprintf (vect_dump, "==> get_loop_niters:" );
+          print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
+        }
     }
 
   return get_loop_exit_condition (loop);
@@ -1025,7 +1024,7 @@ vect_analyze_loop_form (struct loop *loo
     }
   else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
     {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
       if (inner_loop_vinfo)
         destroy_loop_vec_info (inner_loop_vinfo, false);
@@ -1047,6 +1046,237 @@ vect_analyze_loop_form (struct loop *loo
   return loop_vinfo;
 }
 
+
+/* Function vect_analyze_loop_operations.
+
+   Scan the loop stmts and make sure they are all vectorizable.  */
+
+static bool
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop->num_nodes;
+  gimple_stmt_iterator si;
+  unsigned int vectorization_factor = 0;
+  int i;
+  gimple phi;
+  stmt_vec_info stmt_info;
+  bool need_to_vectorize = false;
+  int min_profitable_iters;
+  int min_scalar_loop_bound;
+  unsigned int th;
+  bool only_slp_in_loop = true, ok;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
+
+  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+
+      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
+        {
+          phi = gsi_stmt (si);
+          ok = true;
+
+          stmt_info = vinfo_for_stmt (phi);
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "examining phi: ");
+              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
+            }
+
+          if (! is_loop_header_bb_p (bb))
+            {
+              /* inner-loop loop-closed exit phi in outer-loop vectorization
+                 (i.e. a phi in the tail of the outer-loop).
+                 FORNOW: we currently don't support the case that these phis
+                 are not used in the outerloop, cause this case requires
+                 to actually do something here.  */
+              if (!STMT_VINFO_RELEVANT_P (stmt_info)
+                  || STMT_VINFO_LIVE_P (stmt_info))
+                {
+                  if (vect_print_dump_info (REPORT_DETAILS))
+                    fprintf (vect_dump,
+                             "Unsupported loop-closed phi in outer-loop.");
+                  return false;
+                }
+              continue;
+            }
+
+          gcc_assert (stmt_info);
+
+          if (STMT_VINFO_LIVE_P (stmt_info))
+            {
+              /* FORNOW: not yet supported.  */
+              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+                fprintf (vect_dump, "not vectorized: value used after loop.");
+              return false;
+            }
+
+          if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
+              && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
+            {
+              /* A scalar-dependence cycle that we don't support.  */
+              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+                fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
+              return false;
+            }
+
+          if (STMT_VINFO_RELEVANT_P (stmt_info))
+            {
+              need_to_vectorize = true;
+              if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
+                ok = vectorizable_induction (phi, NULL, NULL);
+            }
+
+          if (!ok)
+            {
+              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+                {
+                  fprintf (vect_dump,
+                           "not vectorized: relevant phi not supported: ");
+                  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
+                }
+              return false;
+            }
+        }
+
+      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+        {
+          gimple stmt = gsi_stmt (si);
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+          gcc_assert (stmt_info);
+
+	  if (!vect_analyze_stmt (stmt, &need_to_vectorize))
+	    return false;
+
+          if (STMT_VINFO_RELEVANT_P (stmt_info) && !PURE_SLP_STMT (stmt_info))
+            /* STMT needs both SLP and loop-based vectorization.  */
+            only_slp_in_loop = false;
+        } 
+    } /* bbs */
+
+  /* All operations in the loop are either irrelevant (deal with loop
+     control, or dead), or only used outside the loop and can be moved
+     out of the loop (e.g. invariants, inductions).  The loop can be
+     optimized away by scalar optimizations.  We're better off not
+     touching this loop.  */
+  if (!need_to_vectorize)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump,
+                 "All the computation can be taken out of the loop.");
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump,
+                 "not vectorized: redundant loop. no profit to vectorize.");
+      return false;
+    }
+
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by the
+     SLP instances.  If that unrolling factor is 1, we say, that we perform
+     pure SLP on loop - cross iteration parallelism is not exploited.  */
+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor = least_common_multiple (vectorization_factor,
+                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump,
+        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
+        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
+
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: iteration count too small.");
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump,"not vectorized: iteration count smaller than "
+                 "vectorization factor.");
+      return false;
+    }
+
+  /* Analyze cost. Decide if worth while to vectorize.  */
+
+  /* Once VF is set, SLP costs should be updated since the number of created
+     vector stmts depends on VF.  */
+  vect_update_slp_costs_according_to_vf (loop_vinfo);
+
+  min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
+  LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
+
+  if (min_profitable_iters < 0)
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: vectorization not profitable.");
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "not vectorized: vector version will never be "
+                 "profitable.");
+      return false;
+    }
+
+  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
+                            * vectorization_factor) - 1);
+
+  /* Use the cost model only if it is more conservative than user specified
+     threshold.  */
+
+  th = (unsigned) min_scalar_loop_bound;
+  if (min_profitable_iters
+      && (!min_scalar_loop_bound
+          || min_profitable_iters > min_scalar_loop_bound))
+    th = (unsigned) min_profitable_iters;
+
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        fprintf (vect_dump, "not vectorized: vectorization not "
+                 "profitable.");
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "not vectorized: iteration count smaller than "
+                 "user specified loop bound parameter or minimum "
+                 "profitable iterations (whichever is more conservative).");
+      return false;
+    }
+
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
+      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "epilog loop required.");
+      if (!vect_can_advance_ivs_p (loop_vinfo))
+        {
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+            fprintf (vect_dump,
+                     "not vectorized: can't create epilog loop 1.");
+          return false;
+        }
+      if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
+        {
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+            fprintf (vect_dump,
+                     "not vectorized: can't create epilog loop 2.");
+          return false;
+        }
+    }
+
+  return true;
+}
+
+
 /* Function vect_analyze_loop.
 
    Apply a set of analyses on LOOP, and create a loop_vec_info struct
@@ -1197,7 +1427,7 @@ vect_analyze_loop (struct loop *loop)
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -1445,7 +1675,7 @@ vect_is_simple_reduction (loop_vec_info 
 
 
   /* Check that one def is the reduction def, defined by PHI,
-     the other def is either defined in the loop ("vect_loop_def"),
+     the other def is either defined in the loop ("vect_internal_def"),
      or it's an induction (defined by a loop-header phi-node).  */
 
   if (def2 == phi
@@ -1453,7 +1683,7 @@ vect_is_simple_reduction (loop_vec_info 
       && (is_gimple_assign (def1)
 	  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
 	  || (gimple_code (def1) == GIMPLE_PHI
-	      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
+	      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_internal_def
 	      && !is_loop_header_bb_p (gimple_bb (def1)))))
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -1465,7 +1695,7 @@ vect_is_simple_reduction (loop_vec_info 
 	   && (is_gimple_assign (def2)
 	       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
 	       || (gimple_code (def2) == GIMPLE_PHI
-		   && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
+		   && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_internal_def
 		   && !is_loop_header_bb_p (gimple_bb (def2)))))
     {
       /* Swap operands (just for simplicity - so that the rest of the code
@@ -2895,7 +3125,7 @@ vectorizable_reduction (gimple stmt, gim
 
   /* Reductions that are not used even in an enclosing outer-loop,
      are expected to be "live" (used out of the loop).  */
-  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
+  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
       && !STMT_VINFO_LIVE_P (stmt_info))
     return false;
 
@@ -2970,14 +3200,15 @@ vectorizable_reduction (gimple stmt, gim
       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt,
 					  &def, &dt);
       gcc_assert (is_simple_use);
-      if (dt != vect_loop_def
-	  && dt != vect_invariant_def
+      if (dt != vect_internal_def
+	  && dt != vect_external_def
 	  && dt != vect_constant_def
 	  && dt != vect_induction_def)
 	return false;
     }
 
-  is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &def, &dt);
+  is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &def, 
+                                      &dt);
   gcc_assert (is_simple_use);
   gcc_assert (dt == vect_reduction_def);
   gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
@@ -3140,7 +3371,7 @@ vectorizable_reduction (gimple stmt, gim
    from the vectorized reduction operation generated in the previous iteration.
   */
 
-  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop)
+  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
     {
       single_defuse_cycle = true;
       epilog_copies = 1;
@@ -3361,7 +3592,7 @@ vectorizable_live_operation (gimple stmt
           return false;
         }
 
-      if (dt != vect_invariant_def && dt != vect_constant_def)
+      if (dt != vect_external_def && dt != vect_constant_def)
         return false;
     }
 
@@ -3577,8 +3808,8 @@ vect_transform_loop (loop_vec_info loop_
      until all the loops have been transformed?  */
   update_ssa (TODO_update_ssa);
 
-  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
+  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
     fprintf (vect_dump, "LOOP VECTORIZED.");
-  if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
+  if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
     fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
 }
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 146873)
+++ tree-vect-data-refs.c	(working copy)
@@ -321,7 +321,7 @@ vect_equal_offsets (tree offset1, tree o
    Check if DRA and DRB are a part of interleaving. In case they are, insert
    DRA and DRB in an interleaving chain.  */
 
-static void
+static bool 
 vect_check_interleaving (struct data_reference *dra,
 			 struct data_reference *drb)
 {
@@ -337,12 +337,13 @@ vect_check_interleaving (struct data_ref
       || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
       || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) 
       || DR_IS_READ (dra) != DR_IS_READ (drb))
-    return;
+    return false;
 
   /* Check:
      1. data-refs are of the same type
      2. their steps are equal
-     3. the step is greater than the difference between data-refs' inits  */
+     3. the step (if greater than zero) is greater than the difference between 
+        data-refs' inits.  */
   type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
   type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
 
@@ -350,7 +351,7 @@ vect_check_interleaving (struct data_ref
       || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
       || !types_compatible_p (TREE_TYPE (DR_REF (dra)), 
                               TREE_TYPE (DR_REF (drb))))
-    return;
+    return false;
 
   init_a = TREE_INT_CST_LOW (DR_INIT (dra));
   init_b = TREE_INT_CST_LOW (DR_INIT (drb));
@@ -363,7 +364,7 @@ vect_check_interleaving (struct data_ref
       diff_mod_size = (init_a - init_b) % type_size_a;
 
       if ((init_a - init_b) > step)
-         return; 
+         return false; 
 
       if (diff_mod_size == 0)
 	{
@@ -375,7 +376,7 @@ vect_check_interleaving (struct data_ref
 	      fprintf (vect_dump, " and ");
 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
 	    }
-	  return;
+	  return true;
 	} 
     }
   else 
@@ -385,7 +386,7 @@ vect_check_interleaving (struct data_ref
       diff_mod_size = (init_b - init_a) % type_size_a;
 
       if ((init_b - init_a) > step)
-         return;
+         return false;
 
       if (diff_mod_size == 0)
 	{
@@ -397,9 +398,11 @@ vect_check_interleaving (struct data_ref
 	      fprintf (vect_dump, " and ");
 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
 	    }
-	  return;
+	  return true;
 	} 
     }
+    
+  return false;
 }
 
 /* Check if data references pointed by DR_I and DR_J are same or
@@ -584,7 +587,7 @@ vect_analyze_data_ref_dependence (struct
 	  continue;
 	}
 
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	{
 	  fprintf (vect_dump,
 		   "not vectorized, possible dependence "
@@ -868,7 +871,7 @@ vect_verify_datarefs_alignment (loop_vec
       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
       if (!supportable_dr_alignment)
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
             {
               if (DR_IS_READ (dr))
                 fprintf (vect_dump, 
@@ -1347,14 +1350,14 @@ vect_enhance_data_refs_alignment (loop_v
    Return FALSE if a data reference is found that cannot be vectorized.  */
 
 bool
-vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
+vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) 
 {
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
 
   if (!vect_compute_data_refs_alignment (loop_vinfo))
     {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	fprintf (vect_dump, 
 		 "not vectorized: can't calculate alignment for data ref.");
       return false;
@@ -1663,7 +1666,7 @@ vect_analyze_data_ref_accesses (loop_vec
   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
     if (!vect_analyze_data_ref_access (dr))
       {
-	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
 	return false;
       }
@@ -1787,7 +1790,7 @@ vect_analyze_data_refs (loop_vec_info lo
    
       if (!dr || !DR_REF (dr))
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
           return false;
         }
@@ -1799,7 +1802,7 @@ vect_analyze_data_refs (loop_vec_info lo
       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
           || !DR_STEP (dr))
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
             {
               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
               print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
@@ -1809,7 +1812,7 @@ vect_analyze_data_refs (loop_vec_info lo
 
       if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
             fprintf (vect_dump, "not vectorized: base addr of dr is a "
                      "constant");
           return false;
@@ -1930,7 +1933,7 @@ vect_analyze_data_refs (loop_vec_info lo
 
       if (STMT_VINFO_DATA_REF (stmt_info))
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
             {
               fprintf (vect_dump,
                        "not vectorized: more than one data ref in stmt: ");
@@ -1938,6 +1941,7 @@ vect_analyze_data_refs (loop_vec_info lo
             }
           return false;
         }
+
       STMT_VINFO_DATA_REF (stmt_info) = dr;
      
       /* Set vectype for STMT.  */
@@ -1946,7 +1950,7 @@ vect_analyze_data_refs (loop_vec_info lo
                 get_vectype_for_scalar_type (scalar_type);
       if (!STMT_VINFO_VECTYPE (stmt_info)) 
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
             {
               fprintf (vect_dump,
                        "not vectorized: no vectype for stmt: ");
@@ -2056,7 +2060,7 @@ vect_create_addr_base_for_vector_ref (gi
   gimple_seq seq = NULL;
   tree base_offset = unshare_expr (DR_OFFSET (dr));
   tree init = unshare_expr (DR_INIT (dr));
-  tree vect_ptr_type, addr_expr2;
+  tree vect_ptr_type;
   tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
 
   gcc_assert (loop);
@@ -2108,15 +2112,12 @@ vect_create_addr_base_for_vector_ref (gi
 
   vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
 
-  /* addr_expr = addr_base */
+  vec_stmt = fold_convert (vect_ptr_type, addr_base);
   addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
                                      get_name (base_name));
+
   add_referenced_var (addr_expr);
-  vec_stmt = fold_convert (vect_ptr_type, addr_base);
-  addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
-                                     get_name (base_name));
-  add_referenced_var (addr_expr2);
-  vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2);
+  vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
   gimple_seq_add_seq (new_stmt_list, seq);
 
   if (vect_print_dump_info (REPORT_DETAILS))
@@ -2124,6 +2125,7 @@ vect_create_addr_base_for_vector_ref (gi
       fprintf (vect_dump, "created ");
       print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
     }
+
   return vec_stmt;
 }
 
Index: tree-vect-patterns.c
===================================================================
--- tree-vect-patterns.c	(revision 146873)
+++ tree-vect-patterns.c	(working copy)
@@ -81,8 +81,8 @@ widened_name_p (tree name, gimple use_st
   if (!vect_is_simple_use (name, loop_vinfo, def_stmt, &def, &dt))
     return false;
 
-  if (dt != vect_loop_def
-      && dt != vect_invariant_def && dt != vect_constant_def)
+  if (dt != vect_internal_def
+      && dt != vect_external_def && dt != vect_constant_def)
     return false;
 
   if (! *def_stmt)
@@ -259,7 +259,7 @@ vect_recog_dot_prod_pattern (gimple last
     return NULL; 
   stmt_vinfo = vinfo_for_stmt (stmt);
   gcc_assert (stmt_vinfo);
-  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
     return NULL;
   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
     return NULL;
@@ -272,7 +272,7 @@ vect_recog_dot_prod_pattern (gimple last
         return NULL;
       stmt_vinfo = vinfo_for_stmt (stmt);
       gcc_assert (stmt_vinfo);
-      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_loop_def);
+      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
       oprnd00 = gimple_assign_rhs1 (stmt);
       oprnd01 = gimple_assign_rhs2 (stmt);
     }
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 146873)
+++ tree-vect-stmts.c	(working copy)
@@ -116,13 +116,14 @@ vect_stmt_relevant_p (gimple stmt, loop_
   use_operand_p use_p;
   def_operand_p def_p;
 
-  *relevant = vect_unused_in_loop;
+  *relevant = vect_unused_in_scope;
   *live_p = false;
 
   /* cond stmt other than loop exit cond.  */
   if (is_ctrl_stmt (stmt) 
-      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type) 
-    *relevant = vect_used_in_loop;
+      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) 
+         != loop_exit_ctrl_vec_info_type) 
+    *relevant = vect_used_in_scope;
 
   /* changing memory.  */
   if (gimple_code (stmt) != GIMPLE_PHI)
@@ -130,7 +131,7 @@ vect_stmt_relevant_p (gimple stmt, loop_
       {
 	if (vect_print_dump_info (REPORT_DETAILS))
 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
-	*relevant = vect_used_in_loop;
+	*relevant = vect_used_in_scope;
       }
 
   /* uses outside the loop.  */
@@ -249,7 +250,7 @@ process_use (gimple stmt, tree use, loop
 
   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
     { 
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
         fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
       return false;
     }
@@ -284,7 +285,7 @@ process_use (gimple stmt, tree use, loop
 	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo) 
-		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
+		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
       return true;
     }
 
@@ -301,18 +302,18 @@ process_use (gimple stmt, tree use, loop
 	fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
       switch (relevant)
 	{
-	case vect_unused_in_loop:
+	case vect_unused_in_scope:
 	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
-			vect_used_by_reduction : vect_unused_in_loop;
+			vect_used_by_reduction : vect_unused_in_scope;
 	  break;
 	case vect_used_in_outer_by_reduction:
 	  relevant = vect_used_by_reduction;
 	  break;
 	case vect_used_in_outer:
-	  relevant = vect_used_in_loop;
+	  relevant = vect_used_in_scope;
 	  break;
 	case vect_used_by_reduction: 
-	case vect_used_in_loop:
+	case vect_used_in_scope:
 	  break;
 
 	default:
@@ -333,9 +334,9 @@ process_use (gimple stmt, tree use, loop
 	fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
       switch (relevant)
         {
-        case vect_unused_in_loop:
+        case vect_unused_in_scope:
           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
-                        vect_used_in_outer_by_reduction : vect_unused_in_loop;
+                        vect_used_in_outer_by_reduction : vect_unused_in_scope;
           break;
 
         case vect_used_in_outer_by_reduction:
@@ -346,7 +347,7 @@ process_use (gimple stmt, tree use, loop
           relevant = vect_used_in_outer_by_reduction;
           break;
 
-        case vect_used_in_loop:
+        case vect_used_in_scope:
           relevant = vect_used_in_outer;
           break;
 
@@ -468,18 +469,18 @@ vect_mark_stmts_to_be_vectorized (loop_v
 	 Here are the expected values of "relevant" for reduction phis/stmts:
 
 	 relevance:				phi	stmt
-	 vect_unused_in_loop				ok
+	 vect_unused_in_scope				ok
 	 vect_used_in_outer_by_reduction	ok	ok
 	 vect_used_in_outer			ok	ok
 	 vect_used_by_reduction			ok
-	 vect_used_in_loop 						  */
+	 vect_used_in_scope				              */
 
       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
         {
 	  enum vect_relevant tmp_relevant = relevant;
 	  switch (tmp_relevant)
 	    {
-	    case vect_unused_in_loop:
+	    case vect_unused_in_scope:
 	      gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
 	      relevant = vect_used_by_reduction;
 	      break;
@@ -496,7 +497,7 @@ vect_mark_stmts_to_be_vectorized (loop_v
 	      if (gimple_code (stmt) == GIMPLE_PHI)
 		break;
 	      /* fall through */
-	    case vect_used_in_loop:
+	    case vect_used_in_scope:
 	    default:
 	      if (vect_print_dump_info (REPORT_DETAILS))
 	        fprintf (vect_dump, "unsupported use of reduction.");
@@ -571,7 +572,7 @@ vect_model_simple_cost (stmt_vec_info st
   /* FORNOW: Assuming maximum 2 args per stmts.  */
   for (i = 0; i < 2; i++)
     {
-      if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
+      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
 	outside_cost += TARG_SCALAR_TO_VEC_COST; 
     }
   
@@ -619,7 +620,7 @@ vect_model_store_cost (stmt_vec_info stm
   if (PURE_SLP_STMT (stmt_info))
     return;
 
-  if (dt == vect_constant_def || dt == vect_invariant_def)
+  if (dt == vect_constant_def || dt == vect_external_def)
     outside_cost = TARG_SCALAR_TO_VEC_COST;
 
   /* Strided access?  */
@@ -905,7 +906,7 @@ vect_get_vec_def_for_operand (tree op, g
       }
 
     /* Case 2: operand is defined outside the loop - loop invariant.  */
-    case vect_invariant_def:
+    case vect_external_def:
       {
 	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
 	gcc_assert (vector_type);
@@ -929,7 +930,7 @@ vect_get_vec_def_for_operand (tree op, g
       }
 
     /* Case 3: operand is defined inside the loop.  */
-    case vect_loop_def:
+    case vect_internal_def:
       {
 	if (scalar_def) 
 	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
@@ -1042,7 +1043,7 @@ vect_get_vec_def_for_stmt_copy (enum vec
   stmt_vec_info def_stmt_info;
 
   /* Do nothing; can reuse same def.  */
-  if (dt == vect_invariant_def || dt == vect_constant_def )
+  if (dt == vect_external_def || dt == vect_constant_def )
     return vec_oprnd;
 
   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
@@ -1190,7 +1191,7 @@ vectorizable_call (gimple stmt, gimple_s
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* FORNOW: SLP not supported.  */
@@ -1508,7 +1509,7 @@ vectorizable_conversion (gimple stmt, gi
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   if (!is_gimple_assign (stmt))
@@ -1771,7 +1772,7 @@ vectorizable_assignment (gimple stmt, gi
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is vectorizable assignment?  */
@@ -1885,7 +1886,7 @@ vectorizable_operation (gimple stmt, gim
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is STMT a vectorizable binary/unary operation?   */
@@ -1947,7 +1948,7 @@ vectorizable_operation (gimple stmt, gim
       shift_p = true;
 
       /* vector shifted by vector */
-      if (dt[1] == vect_loop_def)
+      if (dt[1] == vect_internal_def)
 	{
 	  optab = optab_for_tree_code (code, vectype, optab_vector);
 	  if (vect_print_dump_info (REPORT_DETAILS))
@@ -1956,7 +1957,7 @@ vectorizable_operation (gimple stmt, gim
 
       /* See if the machine has a vector shifted by scalar insn and if not
 	 then see if it has a vector shifted by vector insn */
-      else if (dt[1] == vect_constant_def || dt[1] == vect_invariant_def)
+      else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
 	{
 	  optab = optab_for_tree_code (code, vectype, optab_scalar);
 	  if (optab
@@ -2323,7 +2324,7 @@ vectorizable_type_demotion (gimple stmt,
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is STMT a vectorizable type-demotion operation?  */
@@ -2590,7 +2591,7 @@ vectorizable_type_promotion (gimple stmt
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is STMT a vectorizable type-promotion operation?  */
@@ -2836,7 +2837,7 @@ vectorizable_store (gimple stmt, gimple_
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is vectorizable store? */
@@ -3204,7 +3205,7 @@ vectorizable_load (gimple stmt, gimple_s
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* Is vectorizable load? */
@@ -3703,7 +3704,7 @@ vectorizable_condition (gimple stmt, gim
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
     return false;
 
   /* FORNOW: SLP not supported.  */
@@ -3803,336 +3804,125 @@ vectorizable_condition (gimple stmt, gim
 }
 
 
-/* Function vect_analyze_operations.
-
-   Scan the loop stmts and make sure they are all vectorizable.  */
+/* Make sure the statement is vectorizable.  */
 
 bool
-vect_analyze_operations (loop_vec_info loop_vinfo)
+vect_analyze_stmt (gimple stmt, bool *need_to_vectorize)
 {
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-  int nbbs = loop->num_nodes;
-  gimple_stmt_iterator si;
-  unsigned int vectorization_factor = 0;
-  int i;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
   bool ok;
-  gimple phi;
-  stmt_vec_info stmt_info;
-  bool need_to_vectorize = false;
-  int min_profitable_iters;
-  int min_scalar_loop_bound;
-  unsigned int th;
-  bool only_slp_in_loop = true;
 
   if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "=== vect_analyze_operations ===");
-
-  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
-  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-
-  for (i = 0; i < nbbs; i++)
-    {
-      basic_block bb = bbs[i];
-
-      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
-        {
-          phi = gsi_stmt (si);
-          ok = true;
-
-          stmt_info = vinfo_for_stmt (phi);
-          if (vect_print_dump_info (REPORT_DETAILS))
-            {
-              fprintf (vect_dump, "examining phi: ");
-              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
-            }
-
-          if (! is_loop_header_bb_p (bb))
-            {
-              /* inner-loop loop-closed exit phi in outer-loop vectorization
-                 (i.e. a phi in the tail of the outer-loop).
-                 FORNOW: we currently don't support the case that these phis
-                 are not used in the outerloop, cause this case requires
-                 to actually do something here.  */
-              if (!STMT_VINFO_RELEVANT_P (stmt_info)
-                  || STMT_VINFO_LIVE_P (stmt_info))
-                {
-                  if (vect_print_dump_info (REPORT_DETAILS))
-                    fprintf (vect_dump,
-                             "Unsupported loop-closed phi in outer-loop.");
-                  return false;
-                }
-              continue;
-            }
-
-          gcc_assert (stmt_info);
-
-          if (STMT_VINFO_LIVE_P (stmt_info))
-            {
-              /* FORNOW: not yet supported.  */
-              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-                fprintf (vect_dump, "not vectorized: value used after loop.");
-              return false;
-            }
-
-          if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
-              && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
-            {
-              /* A scalar-dependence cycle that we don't support.  */
-              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-                fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
-              return false;
-            }
-
-          if (STMT_VINFO_RELEVANT_P (stmt_info))
-            {
-              need_to_vectorize = true;
-              if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
-                ok = vectorizable_induction (phi, NULL, NULL);
-            }
-
-          if (!ok)
-            {
-              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-                {
-                  fprintf (vect_dump,
-                           "not vectorized: relevant phi not supported: ");
-                  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
-                }
-              return false;
-            }
-        }
-
-      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
-        {
-          gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-          enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
-
-          if (vect_print_dump_info (REPORT_DETAILS))
-            {
-              fprintf (vect_dump, "==> examining statement: ");
-              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
-            }
-
-          gcc_assert (stmt_info);
-
-          /* skip stmts which do not need to be vectorized.
-             this is expected to include:
-             - the COND_EXPR which is the loop exit condition
-             - any LABEL_EXPRs in the loop
-             - computations that are used only for array indexing or loop
-             control  */
-
-          if (!STMT_VINFO_RELEVANT_P (stmt_info)
-              && !STMT_VINFO_LIVE_P (stmt_info))
-            {
-              if (vect_print_dump_info (REPORT_DETAILS))
-                fprintf (vect_dump, "irrelevant.");
-              continue;
-            }
-
-          switch (STMT_VINFO_DEF_TYPE (stmt_info))
-            {
-            case vect_loop_def:
-              break;
-
-            case vect_reduction_def:
-              gcc_assert (relevance == vect_used_in_outer
-                          || relevance == vect_used_in_outer_by_reduction
-                          || relevance == vect_unused_in_loop);
-              break;
-
-            case vect_induction_def:
-            case vect_constant_def:
-            case vect_invariant_def:
-            case vect_unknown_def_type:
-            default:
-              gcc_unreachable ();
-            }
-
-          if (STMT_VINFO_RELEVANT_P (stmt_info))
-            {
-              gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
-              gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
-              need_to_vectorize = true;
-            }
-
-          ok = true;
-          if (STMT_VINFO_RELEVANT_P (stmt_info)
-              || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
-            ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
-                || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
-                || vectorizable_conversion (stmt, NULL, NULL, NULL)
-                || vectorizable_operation (stmt, NULL, NULL, NULL)
-                || vectorizable_assignment (stmt, NULL, NULL, NULL)
-                || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
-                || vectorizable_call (stmt, NULL, NULL)
-                || vectorizable_store (stmt, NULL, NULL, NULL)
-                || vectorizable_condition (stmt, NULL, NULL)
-                || vectorizable_reduction (stmt, NULL, NULL));
-
-          if (!ok)
-            {
-              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-                {
-                  fprintf (vect_dump, "not vectorized: relevant stmt not ");
-                  fprintf (vect_dump, "supported: ");
-                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
-                }
-              return false;
-            }
-
-          /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
-             need extra handling, except for vectorizable reductions.  */
-          if (STMT_VINFO_LIVE_P (stmt_info)
-              && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
-            ok = vectorizable_live_operation (stmt, NULL, NULL);
-
-          if (!ok)
-            {
-              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-                {
-                  fprintf (vect_dump, "not vectorized: live stmt not ");
-                  fprintf (vect_dump, "supported: ");
-                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
-                }
-              return false;
-            }
-
-          if (!PURE_SLP_STMT (stmt_info))
-            {
-              /* STMT needs loop-based vectorization.  */
-              only_slp_in_loop = false;
-
-              /* Groups of strided accesses whose size is not a power of 2 are
-                 not vectorizable yet using loop-vectorization. Therefore, if
-                 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
-                 both SLPed and loop-based vectorized), the loop cannot be
-                 vectorized.  */
-              if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
-                  && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
-                                  DR_GROUP_FIRST_DR (stmt_info)))) == -1)
-                {
-                  if (vect_print_dump_info (REPORT_DETAILS))
-                    {
-                      fprintf (vect_dump, "not vectorized: the size of group "
-                               "of strided accesses is not a power of 2");
-                      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
-                    }
-                  return false;
-                }
-            }
-        } /* stmts in bb */
-    } /* bbs */
-
-  /* All operations in the loop are either irrelevant (deal with loop
-     control, or dead), or only used outside the loop and can be moved
-     out of the loop (e.g. invariants, inductions).  The loop can be
-     optimized away by scalar optimizations.  We're better off not
-     touching this loop.  */
-  if (!need_to_vectorize)
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump,
-                 "All the computation can be taken out of the loop.");
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-        fprintf (vect_dump,
-                 "not vectorized: redundant loop. no profit to vectorize.");
-      return false;
+      fprintf (vect_dump, "==> examining statement: ");
+      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump,
-        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
-        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
+  /* Skip stmts that do not need to be vectorized. In loops this is expected
+     to include:
+     - the COND_EXPR which is the loop exit condition
+     - any LABEL_EXPRs in the loop
+     - computations that are used only for array indexing or loop control.
+     In basic blocks we only analyze statements that are a part of some SLP
+     instance, therefore, all the statements are relevant.  */
 
-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
+  if (!STMT_VINFO_RELEVANT_P (stmt_info)
+      && !STMT_VINFO_LIVE_P (stmt_info))
     {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-        fprintf (vect_dump, "not vectorized: iteration count too small.");
       if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump,"not vectorized: iteration count smaller than "
-                 "vectorization factor.");
-      return false;
-    }
+        fprintf (vect_dump, "irrelevant.");
 
-  /* Analyze cost. Decide if worth while to vectorize.  */
+      return true;
+    }
 
-  /* Once VF is set, SLP costs should be updated since the number of created
-     vector stmts depends on VF.  */
-  vect_update_slp_costs_according_to_vf (loop_vinfo);
+  switch (STMT_VINFO_DEF_TYPE (stmt_info))
+    {
+      case vect_internal_def:
+        break;
 
-  min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
-  LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
+      case vect_reduction_def:
+         gcc_assert (relevance == vect_used_in_outer
+                     || relevance == vect_used_in_outer_by_reduction
+                     || relevance == vect_unused_in_scope);
+         break;
+
+      case vect_induction_def:
+      case vect_constant_def:
+      case vect_external_def:
+      case vect_unknown_def_type:
+      default:
+        gcc_unreachable ();
+    }
+
+  if (STMT_VINFO_RELEVANT_P (stmt_info))
+    {
+      gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
+      gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+      *need_to_vectorize = true;
+    }
+
+   ok = true;
+   if (STMT_VINFO_RELEVANT_P (stmt_info)
+       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
+      ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
+            || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
+            || vectorizable_conversion (stmt, NULL, NULL, NULL)
+            || vectorizable_operation (stmt, NULL, NULL, NULL)
+            || vectorizable_assignment (stmt, NULL, NULL, NULL)
+            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
+            || vectorizable_call (stmt, NULL, NULL)
+            || vectorizable_store (stmt, NULL, NULL, NULL)
+            || vectorizable_condition (stmt, NULL, NULL)
+            || vectorizable_reduction (stmt, NULL, NULL));
 
-  if (min_profitable_iters < 0)
+  if (!ok)
     {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-        fprintf (vect_dump, "not vectorized: vectorization not profitable.");
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "not vectorized: vector version will never be "
-                 "profitable.");
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        {
+          fprintf (vect_dump, "not vectorized: relevant stmt not ");
+          fprintf (vect_dump, "supported: ");
+          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+        }
+
       return false;
     }
 
-  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
-                            * vectorization_factor) - 1);
+  /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
+      need extra handling, except for vectorizable reductions.  */
+  if (STMT_VINFO_LIVE_P (stmt_info)
+      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
+    ok = vectorizable_live_operation (stmt, NULL, NULL);
 
-  /* Use the cost model only if it is more conservative than user specified
-     threshold.  */
+  if (!ok)
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
+        {
+          fprintf (vect_dump, "not vectorized: live stmt not ");
+          fprintf (vect_dump, "supported: ");
+          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+        }
 
-  th = (unsigned) min_scalar_loop_bound;
-  if (min_profitable_iters
-      && (!min_scalar_loop_bound
-          || min_profitable_iters > min_scalar_loop_bound))
-    th = (unsigned) min_profitable_iters;
-
-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-        fprintf (vect_dump, "not vectorized: vectorization not "
-                 "profitable.");
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "not vectorized: iteration count smaller than "
-                 "user specified loop bound parameter or minimum "
-                 "profitable iterations (whichever is more conservative).");
-      return false;
+       return false;
     }
 
-  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
-      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+  if (!PURE_SLP_STMT (stmt_info))
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "epilog loop required.");
-      if (!vect_can_advance_ivs_p (loop_vinfo))
+      /* Groups of strided accesses whose size is not a power of 2 are not
+         vectorizable yet using loop-vectorization. Therefore, if this stmt
+         feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
+         loop-based vectorized), the loop cannot be vectorized.  */
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+          && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
+                                        DR_GROUP_FIRST_DR (stmt_info)))) == -1)
         {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-            fprintf (vect_dump,
-                     "not vectorized: can't create epilog loop 1.");
-          return false;
-        }
-      if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
-        {
-          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
-            fprintf (vect_dump,
-                     "not vectorized: can't create epilog loop 2.");
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "not vectorized: the size of group "
+                                  "of strided accesses is not a power of 2");
+              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+            }
+
           return false;
         }
     }
@@ -4343,7 +4133,7 @@ new_stmt_vec_info (gimple stmt, loop_vec
   STMT_VINFO_TYPE (res) = undef_vec_info_type;
   STMT_VINFO_STMT (res) = stmt;
   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
-  STMT_VINFO_RELEVANT (res) = vect_unused_in_loop;
+  STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
   STMT_VINFO_LIVE_P (res) = false;
   STMT_VINFO_VECTYPE (res) = NULL;
   STMT_VINFO_VEC_STMT (res) = NULL;
@@ -4361,7 +4151,8 @@ new_stmt_vec_info (gimple stmt, loop_vec
       && is_loop_header_bb_p (gimple_bb (stmt)))
     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
   else
-    STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
+    STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
+
   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
   STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
   STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
@@ -4499,7 +4290,7 @@ vect_is_simple_use (tree operand, loop_v
   if (is_gimple_min_invariant (operand))
     {
       *def = operand;
-      *dt = vect_invariant_def;
+      *dt = vect_external_def;
       return true;
     }
 
@@ -4530,18 +4321,18 @@ vect_is_simple_use (tree operand, loop_v
       print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
     }
 
-  /* empty stmt is expected only in case of a function argument.
+  /* Empty stmt is expected only in case of a function argument.
      (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
   if (gimple_nop_p (*def_stmt))
     {
       *def = operand;
-      *dt = vect_invariant_def;
+      *dt = vect_external_def;
       return true;
     }
 
   bb = gimple_bb (*def_stmt);
   if (!flow_bb_inside_loop_p (loop, bb))
-    *dt = vect_invariant_def;
+    *dt = vect_external_def;
   else
     {
       stmt_vinfo = vinfo_for_stmt (*def_stmt);
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 146873)
+++ tree-vect-slp.c	(working copy)
@@ -239,10 +239,10 @@ vect_get_and_check_slp_defs (loop_vec_in
       switch (dt[i])
 	{
 	case vect_constant_def:
-	case vect_invariant_def:
+	case vect_external_def:
 	  break;
 	  
-	case vect_loop_def:
+	case vect_internal_def:
 	  if (i == 0)
 	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
 	  else
@@ -581,7 +581,7 @@ vect_build_slp_tree (loop_vec_info loop_
     }
 
   /* Create SLP_TREE nodes for the definition node/s.  */ 
-  if (first_stmt_dt0 == vect_loop_def)
+  if (first_stmt_dt0 == vect_internal_def)
     {
       slp_tree left_node = XNEW (struct _slp_tree);
       SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
@@ -598,7 +598,7 @@ vect_build_slp_tree (loop_vec_info loop_
       SLP_TREE_LEFT (*node) = left_node;
     }
 
-  if (first_stmt_dt1 == vect_loop_def)
+  if (first_stmt_dt1 == vect_internal_def)
     {
       slp_tree right_node = XNEW (struct _slp_tree);
       SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
@@ -952,7 +952,7 @@ vect_analyze_slp (loop_vec_info loop_vin
     if (!vect_analyze_slp_instance (loop_vinfo, store))
       {
 	/* SLP failed. No instance can be SLPed in the loop.  */
-	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))	
+	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))	
 	  fprintf (vect_dump, "SLP failed.");
 
 	return false;
@@ -1694,8 +1694,8 @@ vect_schedule_slp (loop_vec_info loop_vi
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
                             instance, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 			  
-      if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
-	  || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+      if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
+	  || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
 	fprintf (vect_dump, "vectorizing stmts using SLP.");
     }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [patch] Vectorizer cleanup
@ 2007-08-13 13:09 Ira Rosen
  2007-08-14 16:48 ` Diego Novillo
  0 siblings, 1 reply; 7+ messages in thread
From: Ira Rosen @ 2007-08-13 13:09 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1908 bytes --]


Bootstrapped and tested on x86_64-linux.
O.K. for mainline?

Thanks,
Ira

:ADDPATCH SSA (vectorizer):

ChangeLog:

      * tree-vectorizer.c: Remove declarations of static functions.
      * tree-vectorizer.h (STMT_VINFO_STRIDED_ACCESS): New macro.
      * tree-vect-analyze.c: Remove declarations of static functions.
      (vect_can_advance_ivs_p): Move above the calling function to avoid
      function declaration.
      (vect_update_misalignment_for_peel): Use
      STMT_VINFO_STRIDED_ACCESS instead DR_GROUP_FIRST_DR.
      (vect_verify_datarefs_alignment, vector_alignment_reachable_p,
      vect_enhance_data_refs_alignment): Likewise.
      (vect_analyze_group_access): New.
      (vect_analyze_data_ref_access): Fix comment. Call
      vect_analyze_group_access().
      * tree-vect-transform.c: Remove declarations of static functions.
      (cost_for_stmt): Fix indentation.
      (vect_model_simple_cost): Likewise.
      (vect_model_store_cost): Fix indentation and spacing. Use
      STMT_VINFO_STRIDED_ACCESS instead DR_GROUP_FIRST_DR.
      (vect_model_load_cost): Likewise.
      (vect_finish_stmt_generation): Move above the calling function to
avoid
      function declaration.
      (vect_get_vec_def_for_stmt_copy, vect_min_worthwhile_factor):
      Likewise.
      (vectorizable_conversion): Fix indentation and spacing.
      (vectorizable_operation): Fix comment.
      (vectorizable_store): Use STMT_VINFO_STRIDED_ACCESS
      instead DR_GROUP_FIRST_DR.
      (vectorizable_load): Add assert that number of copies is at least 1.
      Use STMT_VINFO_STRIDED_ACCESS instead
      DR_GROUP_FIRST_DR.
      (vect_transform_stmt): Use STMT_VINFO_STRIDED_ACCESS
      instead DR_GROUP_FIRST_DR. Make static.
      (vect_gen_niters_for_prolog_loop): Use
      STMT_VINFO_STRIDED_ACCESS instead DR_GROUP_FIRST_DR.
      (vect_transform_loop): Likewise.

(See attached file: cleanup.txt)

[-- Attachment #2: cleanup.txt --]
[-- Type: text/plain, Size: 45623 bytes --]

Index: tree-vectorizer.c
===================================================================
--- tree-vectorizer.c	(revision 127394)
+++ tree-vectorizer.c	(working copy)
@@ -146,20 +146,6 @@ along with GCC; see the file COPYING3.  
 #include "tree-vectorizer.h"
 #include "tree-pass.h"
 
-/*************************************************************************
-  Simple Loop Peeling Utilities
- *************************************************************************/
-static void slpeel_update_phis_for_duplicate_loop 
-  (struct loop *, struct loop *, bool after);
-static void slpeel_update_phi_nodes_for_guard1 
-  (edge, struct loop *, bool, basic_block *, bitmap *); 
-static void slpeel_update_phi_nodes_for_guard2 
-  (edge, struct loop *, bool, basic_block *);
-static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
-
-static void rename_use_op (use_operand_p);
-static void rename_variables_in_bb (basic_block);
-static void rename_variables_in_loop (struct loop *);
 
 /*************************************************************************
   General Vectorization Utilities
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 127394)
+++ tree-vectorizer.h	(working copy)
@@ -304,6 +304,7 @@ typedef struct _stmt_vec_info {
 #define STMT_VINFO_DR_GROUP_GAP(S)         (S)->gap
 #define STMT_VINFO_DR_GROUP_SAME_DR_STMT(S)(S)->same_dr_stmt
 #define STMT_VINFO_DR_GROUP_READ_WRITE_DEPENDENCE(S)  (S)->read_write_dep
+#define STMT_VINFO_STRIDED_ACCESS(S)      ((S)->first_dr != NULL)
 
 #define DR_GROUP_FIRST_DR(S)               (S)->first_dr
 #define DR_GROUP_NEXT_DR(S)                (S)->next_dr
Index: tree-vect-analyze.c
===================================================================
--- tree-vect-analyze.c	(revision 127394)
+++ tree-vect-analyze.c	(working copy)
@@ -40,29 +40,6 @@ along with GCC; see the file COPYING3.  
 #include "tree-vectorizer.h"
 #include "toplev.h"
 
-/* Main analysis functions.  */
-static loop_vec_info vect_analyze_loop_form (struct loop *);
-static bool vect_analyze_data_refs (loop_vec_info);
-static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
-static void vect_analyze_scalar_cycles (loop_vec_info);
-static bool vect_analyze_data_ref_accesses (loop_vec_info);
-static bool vect_analyze_data_ref_dependences (loop_vec_info);
-static bool vect_analyze_data_refs_alignment (loop_vec_info);
-static bool vect_compute_data_refs_alignment (loop_vec_info);
-static bool vect_enhance_data_refs_alignment (loop_vec_info);
-static bool vect_analyze_operations (loop_vec_info);
-static bool vect_determine_vectorization_factor (loop_vec_info);
-
-/* Utility functions for the analyses.  */
-static bool exist_non_indexing_operands_for_use_p (tree, tree);
-static tree vect_get_loop_niters (struct loop *, tree *);
-static bool vect_analyze_data_ref_dependence
-  (struct data_dependence_relation *, loop_vec_info);
-static bool vect_compute_data_ref_alignment (struct data_reference *); 
-static bool vect_analyze_data_ref_access (struct data_reference *);
-static bool vect_can_advance_ivs_p (loop_vec_info);
-static void vect_update_misalignment_for_peel
-  (struct data_reference *, struct data_reference *, int npeel);
 
 /* Function vect_determine_vectorization_factor
 
@@ -283,6 +260,95 @@ vect_determine_vectorization_factor (loo
 }
 
 
+/* Function vect_can_advance_ivs_p
+
+   In case the number of iterations that LOOP iterates is unknown at compile 
+   time, an epilog loop will be generated, and the loop induction variables 
+   (IVs) will be "advanced" to the value they are supposed to take just before 
+   the epilog loop.  Here we check that the access function of the loop IVs
+   and the expression that represents the loop bound are simple enough.
+   These restrictions will be relaxed in the future.  */
+
+static bool 
+vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block bb = loop->header;
+  tree phi;
+
+  /* Analyze phi functions of the loop header.  */
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_can_advance_ivs_p:");
+
+  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+    {
+      tree access_fn = NULL;
+      tree evolution_part;
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+	{
+          fprintf (vect_dump, "Analyze phi: ");
+          print_generic_expr (vect_dump, phi, TDF_SLIM);
+	}
+
+      /* Skip virtual phi's. The data dependences that are associated with
+         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
+
+      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "virtual phi. skip.");
+	  continue;
+	}
+
+      /* Skip reduction phis.  */
+
+      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "reduc phi. skip.");
+          continue;
+        }
+
+      /* Analyze the evolution function.  */
+
+      access_fn = instantiate_parameters
+	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
+
+      if (!access_fn)
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "No Access function.");
+	  return false;
+	}
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+        {
+	  fprintf (vect_dump, "Access function of PHI: ");
+	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
+        }
+
+      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
+      
+      if (evolution_part == NULL_TREE)
+        {
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "No evolution.");
+	  return false;
+        }
+  
+      /* FORNOW: We do not transform initial conditions of IVs 
+	 which evolution functions are a polynomial of degree >= 2.  */
+
+      if (tree_is_chrec (evolution_part))
+	return false;  
+    }
+
+  return true;
+}
+
+
 /* Function vect_analyze_operations.
 
    Scan the loop stmts and make sure they are all vectorizable.  */
@@ -1299,9 +1365,9 @@ vect_update_misalignment_for_peel (struc
 
  /* For interleaved data accesses the step in the loop must be multiplied by
      the size of the interleaving group.  */
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-  if (DR_GROUP_FIRST_DR (peel_stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
     dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
 
   /* It can be assumed that the data refs with the same alignment as dr_peel
@@ -1353,7 +1419,7 @@ vect_verify_datarefs_alignment (loop_vec
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
       /* For interleaving, only the alignment of the first access matters.  */
-      if (DR_GROUP_FIRST_DR (stmt_info)
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
         continue;
 
@@ -1391,7 +1457,7 @@ vector_alignment_reachable_p (struct dat
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       /* For interleaved access we peel only if number of iterations in
 	 the prolog loop ({VF - misalignment}), is a multiple of the
@@ -1604,7 +1670,7 @@ vect_enhance_data_refs_alignment (loop_v
 
       /* For interleaving, only the alignment of the first access
          matters.  */
-      if (DR_GROUP_FIRST_DR (stmt_info)
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
           && DR_GROUP_FIRST_DR (stmt_info) != stmt)
         continue;
 
@@ -1648,7 +1714,7 @@ vect_enhance_data_refs_alignment (loop_v
 	     members of the group, therefore we divide the number of iterations
 	     by the group size.  */
 	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));	  
-	  if (DR_GROUP_FIRST_DR (stmt_info))
+	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
 	    npeel /= DR_GROUP_SIZE (stmt_info);
 
           if (vect_print_dump_info (REPORT_DETAILS))
@@ -1667,7 +1733,7 @@ vect_enhance_data_refs_alignment (loop_v
 	  stmt_info = vinfo_for_stmt (stmt);
 	  /* For interleaving, only the alignment of the first access
             matters.  */
-	  if (DR_GROUP_FIRST_DR (stmt_info)
+	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
 	      && DR_GROUP_FIRST_DR (stmt_info) != stmt)
 	    continue;
 
@@ -1734,7 +1800,7 @@ vect_enhance_data_refs_alignment (loop_v
 	  /* For interleaving, only the alignment of the first access
 	     matters.  */
 	  if (aligned_access_p (dr)
-	      || (DR_GROUP_FIRST_DR (stmt_info)
+	      || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
 		  && DR_GROUP_FIRST_DR (stmt_info) != stmt))
 	    continue;
 
@@ -1846,65 +1912,51 @@ vect_analyze_data_refs_alignment (loop_v
 }
 
 
-/* Function vect_analyze_data_ref_access.
-
-   Analyze the access pattern of the data-reference DR. For now, a data access
-   has to be consecutive to be considered vectorizable.  */
+/* Analyze groups of strided accesses: check that DR belongs to a group of
+   strided accesses of legal size, step, etc. Detect gaps, single element
+   interleaving, and other special cases. Set strided access info.  */
 
 static bool
-vect_analyze_data_ref_access (struct data_reference *dr)
+vect_analyze_group_access (struct data_reference *dr)
 {
   tree step = DR_STEP (dr);
-  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
   tree scalar_type = TREE_TYPE (DR_REF (dr));
   HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
   tree stmt = DR_STMT (dr);
-  /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the 
-     interleaving group (including gaps).  */
-  HOST_WIDE_INT stride = dr_step / type_size;
-
-  if (!step)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "bad data-ref access");
-      return false;
-    }
+  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
+  HOST_WIDE_INT stride;
 
-  /* Consecutive?  */
-  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
-    {
-      /* Mark that it is not interleaving.  */
-      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
-      return true;
-    }
+  /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
+     interleaving group (including gaps).  */
+  stride = dr_step / type_size;
 
   /* Not consecutive access is possible only if it is a part of interleaving.  */
-  if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
+  if (!STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
     {
       /* Check if it this DR is a part of interleaving, and is a single
-	 element of the group that is accessed in the loop.  */
-      
+         element of the group that is accessed in the loop.  */
+
       /* Gaps are supported only for loads. STEP must be a multiple of the type
-	 size.  The size of the group must be a power of 2.  */
+         size.  The size of the group must be a power of 2.  */
       if (DR_IS_READ (dr)
-	  && (dr_step % type_size) == 0
-	  && stride > 0
-	  && exact_log2 (stride) != -1)
-	{
-	  DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
-	  DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
-	  if (vect_print_dump_info (REPORT_DR_DETAILS))
-	    {
-	      fprintf (vect_dump, "Detected single element interleaving %d ",
-		       DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
-	      print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
-	      fprintf (vect_dump, " step ");
-	      print_generic_expr (vect_dump, step, TDF_SLIM);
-	    }
-	  return true;
-	}
+          && (dr_step % type_size) == 0
+          && stride > 0
+          && exact_log2 (stride) != -1)
+        {
+          DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
+          DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
+          if (vect_print_dump_info (REPORT_DR_DETAILS))
+            {
+              fprintf (vect_dump, "Detected single element interleaving %d ",
+                       DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
+              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
+              fprintf (vect_dump, " step ");
+              print_generic_expr (vect_dump, step, TDF_SLIM);
+            }
+          return true;
+        }
       if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "not consecutive access");
+        fprintf (vect_dump, "not consecutive access");
       return false;
     }
 
@@ -1920,99 +1972,99 @@ vect_analyze_data_ref_access (struct dat
       HOST_WIDE_INT diff, count_in_bytes;
 
       while (next)
-	{
-	  /* Skip same data-refs. In case that two or more stmts share data-ref
-	     (supported only for loads), we vectorize only the first stmt, and
-	     the rest get their vectorized loads from the first one.  */
-	  if (!tree_int_cst_compare (DR_INIT (data_ref),
-				     DR_INIT (STMT_VINFO_DATA_REF (
-						      vinfo_for_stmt (next)))))
-	    {
+        {
+          /* Skip same data-refs. In case that two or more stmts share data-ref
+             (supported only for loads), we vectorize only the first stmt, and
+             the rest get their vectorized loads from the first one.  */
+          if (!tree_int_cst_compare (DR_INIT (data_ref),
+                                     DR_INIT (STMT_VINFO_DATA_REF (
+						   vinfo_for_stmt (next)))))
+            {
               if (!DR_IS_READ (data_ref))
-                { 
+                {
                   if (vect_print_dump_info (REPORT_DETAILS))
                     fprintf (vect_dump, "Two store stmts share the same dr.");
-                  return false; 
+                  return false;
                 }
 
-              /* Check that there is no load-store dependencies for this loads 
+              /* Check that there is no load-store dependencies for this loads
                  to prevent a case of load-store-load to the same location.  */
               if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
                   || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
                 {
                   if (vect_print_dump_info (REPORT_DETAILS))
-                    fprintf (vect_dump, 
+                    fprintf (vect_dump,
                              "READ_WRITE dependence in interleaving.");
                   return false;
                 }
 
-	      /* For load use the same data-ref load.  */
-	      DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
+              /* For load use the same data-ref load.  */
+              DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
 
-	      prev = next;
-	      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
-	      continue;
-	    }
-	  prev = next;
+              prev = next;
+              next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+              continue;
+            }
+          prev = next;
 
-	  /* Check that all the accesses have the same STEP.  */
-	  next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
-	  if (tree_int_cst_compare (step, next_step))
-	    {
-	      if (vect_print_dump_info (REPORT_DETAILS))
-		fprintf (vect_dump, "not consecutive access in interleaving");
-	      return false;
-	    }
+          /* Check that all the accesses have the same STEP.  */
+          next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
+          if (tree_int_cst_compare (step, next_step))
+            {
+              if (vect_print_dump_info (REPORT_DETAILS))
+                fprintf (vect_dump, "not consecutive access in interleaving");
+              return false;
+            }
 
-	  data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
-	  /* Check that the distance between two accesses is equal to the type
-	     size. Otherwise, we have gaps.  */
-	  diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) 
-		  - TREE_INT_CST_LOW (prev_init)) / type_size;
-	  if (!DR_IS_READ (data_ref) && diff != 1)
-	    {
-	      if (vect_print_dump_info (REPORT_DETAILS))
-		fprintf (vect_dump, "interleaved store with gaps");
-	      return false;
-	    }
-	  /* Store the gap from the previous member of the group. If there is no
+          data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
+          /* Check that the distance between two accesses is equal to the type
+             size. Otherwise, we have gaps.  */
+          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
+                  - TREE_INT_CST_LOW (prev_init)) / type_size;
+          if (!DR_IS_READ (data_ref) && diff != 1)
+            {
+              if (vect_print_dump_info (REPORT_DETAILS))
+                fprintf (vect_dump, "interleaved store with gaps");
+              return false;
+            }
+          /* Store the gap from the previous member of the group. If there is no
              gap in the access, DR_GROUP_GAP is always 1.  */
-	  DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
+          DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
 
-	  prev_init = DR_INIT (data_ref);
-	  next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
-	  /* Count the number of data-refs in the chain.  */
-	  count++;
-	}
+          prev_init = DR_INIT (data_ref);
+          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+          /* Count the number of data-refs in the chain.  */
+          count++;
+        }
 
-      /* COUNT is the number of accesses found, we multiply it by the size of 
-	 the type to get COUNT_IN_BYTES.  */
+      /* COUNT is the number of accesses found, we multiply it by the size of
+         the type to get COUNT_IN_BYTES.  */
       count_in_bytes = type_size * count;
 
       /* Check that the size of the interleaving is not greater than STEP.  */
-      if (dr_step < count_in_bytes) 
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    {
-	      fprintf (vect_dump, "interleaving size is greater than step for ");
-	      print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 
-	    }
-	  return false;
-	}
+      if (dr_step < count_in_bytes)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            {
+              fprintf (vect_dump, "interleaving size is greater than step for ");
+              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
+            }
+          return false;
+        }
 
-      /* Check that the size of the interleaving is equal to STEP for stores, 
-         i.e., that there are no gaps.  */ 
-      if (!DR_IS_READ (dr) && dr_step != count_in_bytes) 
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "interleaved store with gaps");
-	  return false;
-	}
+      /* Check that the size of the interleaving is equal to STEP for stores,
+         i.e., that there are no gaps.  */
+      if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "interleaved store with gaps");
+          return false;
+        }
 
       /* Check that STEP is a multiple of type size.  */
       if ((dr_step % type_size) != 0)
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS)) 
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
             {
               fprintf (vect_dump, "step is not a multiple of type size: step ");
               print_generic_expr (vect_dump, step, TDF_SLIM);
@@ -2020,22 +2072,53 @@ vect_analyze_data_ref_access (struct dat
               print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
                                   TDF_SLIM);
             }
-	  return false;
-	}
+          return false;
+        }
 
       /* FORNOW: we handle only interleaving that is a power of 2.  */
       if (exact_log2 (stride) == -1)
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "interleaving is not a power of 2");
-	  return false;
-	}
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "interleaving is not a power of 2");
+          return false;
+        }
       DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
     }
   return true;
 }
 
 
+/* Analyze the access pattern of the data-reference DR.
+   In case of non-consecutive accesse call vect_analyze_group_access() to
+   analyze groups of strided accesses.  */
+
+static bool
+vect_analyze_data_ref_access (struct data_reference *dr)
+{
+  tree step = DR_STEP (dr);
+  tree scalar_type = TREE_TYPE (DR_REF (dr));
+  tree stmt = DR_STMT (dr);
+
+  if (!step)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "bad data-ref access");
+      return false;
+    }
+
+  /* Consecutive?  */
+  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+    {
+      /* Mark that it is not interleaving.  */
+      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
+      return true;
+    }
+
+  /* Not consecutive access - check if it's a part of interleaving group.  */
+  return vect_analyze_group_access (dr);
+}
+
+
 /* Function vect_analyze_data_ref_accesses.
 
    Analyze the access pattern of all the data references in the loop.
@@ -2519,95 +2602,6 @@ vect_mark_stmts_to_be_vectorized (loop_v
 }
 
 
-/* Function vect_can_advance_ivs_p
-
-   In case the number of iterations that LOOP iterates is unknown at compile 
-   time, an epilog loop will be generated, and the loop induction variables 
-   (IVs) will be "advanced" to the value they are supposed to take just before 
-   the epilog loop.  Here we check that the access function of the loop IVs
-   and the expression that represents the loop bound are simple enough.
-   These restrictions will be relaxed in the future.  */
-
-static bool 
-vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
-{
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  basic_block bb = loop->header;
-  tree phi;
-
-  /* Analyze phi functions of the loop header.  */
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "vect_can_advance_ivs_p:");
-
-  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
-    {
-      tree access_fn = NULL;
-      tree evolution_part;
-
-      if (vect_print_dump_info (REPORT_DETAILS))
-	{
-          fprintf (vect_dump, "Analyze phi: ");
-          print_generic_expr (vect_dump, phi, TDF_SLIM);
-	}
-
-      /* Skip virtual phi's. The data dependences that are associated with
-         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
-
-      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "virtual phi. skip.");
-	  continue;
-	}
-
-      /* Skip reduction phis.  */
-
-      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
-        {
-          if (vect_print_dump_info (REPORT_DETAILS))
-            fprintf (vect_dump, "reduc phi. skip.");
-          continue;
-        }
-
-      /* Analyze the evolution function.  */
-
-      access_fn = instantiate_parameters
-	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
-
-      if (!access_fn)
-	{
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "No Access function.");
-	  return false;
-	}
-
-      if (vect_print_dump_info (REPORT_DETAILS))
-        {
-	  fprintf (vect_dump, "Access function of PHI: ");
-	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
-        }
-
-      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
-      
-      if (evolution_part == NULL_TREE)
-        {
-	  if (vect_print_dump_info (REPORT_DETAILS))
-	    fprintf (vect_dump, "No evolution.");
-	  return false;
-        }
-  
-      /* FORNOW: We do not transform initial conditions of IVs 
-	 which evolution functions are a polynomial of degree >= 2.  */
-
-      if (tree_is_chrec (evolution_part))
-	return false;  
-    }
-
-  return true;
-}
-
-
 /* Function vect_get_loop_niters.
 
    Determine how many iterations the loop is executed.
Index: tree-vect-transform.c
===================================================================
--- tree-vect-transform.c	(revision 127394)
+++ tree-vect-transform.c	(working copy)
@@ -45,58 +45,33 @@ along with GCC; see the file COPYING3.  
 #include "toplev.h"
 #include "real.h"
 
-/* Utility functions for the code transformation.  */
-static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
-static tree vect_create_destination_var (tree, tree);
-static tree vect_create_data_ref_ptr 
-  (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree); 
-static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
-static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
-static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
-static tree vect_get_vec_def_for_operand (tree, tree, tree *);
-static tree vect_init_vector (tree, tree, tree);
-static void vect_finish_stmt_generation 
-  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
-static bool vect_is_simple_cond (tree, loop_vec_info); 
-static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
 static tree get_initial_def_for_reduction (tree, tree, tree *);
 
-/* Utility function dealing with loop peeling (not peeling itself).  */
-static void vect_generate_tmps_on_preheader 
-  (loop_vec_info, tree *, tree *, tree *);
-static tree vect_build_loop_niters (loop_vec_info);
-static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); 
-static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
-static void vect_update_init_of_dr (struct data_reference *, tree niters);
-static void vect_update_inits_of_drs (loop_vec_info, tree);
-static int vect_min_worthwhile_factor (enum tree_code);
-
-
 static int
 cost_for_stmt (tree stmt)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
   switch (STMT_VINFO_TYPE (stmt_info))
-  {
-  case load_vec_info_type:
-    return TARG_SCALAR_LOAD_COST;
-  case store_vec_info_type:
-    return TARG_SCALAR_STORE_COST;
-  case op_vec_info_type:
-  case condition_vec_info_type:
-  case assignment_vec_info_type:
-  case reduc_vec_info_type:
-  case induc_vec_info_type:
-  case type_promotion_vec_info_type:
-  case type_demotion_vec_info_type:
-  case type_conversion_vec_info_type:
-  case call_vec_info_type:
-    return TARG_SCALAR_STMT_COST;
-  case undef_vec_info_type:
-  default:
-    gcc_unreachable ();
-  }
+    {
+    case load_vec_info_type:
+      return TARG_SCALAR_LOAD_COST;
+    case store_vec_info_type:
+      return TARG_SCALAR_STORE_COST;
+    case op_vec_info_type:
+    case condition_vec_info_type:
+    case assignment_vec_info_type:
+    case reduc_vec_info_type:
+    case induc_vec_info_type:
+    case type_promotion_vec_info_type:
+    case type_demotion_vec_info_type:
+    case type_conversion_vec_info_type:
+    case call_vec_info_type:
+      return TARG_SCALAR_STMT_COST;
+    case undef_vec_info_type:
+    default:
+      gcc_unreachable ();
+    }
 }
 
 
@@ -442,14 +417,15 @@ vect_model_induction_cost (stmt_vec_info
    be generated for the single vector op.  We will handle that shortly.  */
 
 static void
-vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt)
+vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, 
+                        enum vect_def_type *dt)
 {
   int i;
 
   STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
-  for (i=0; i<2; i++)
+  for (i = 0; i < 2; i++)
     {
       if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
 	STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST; 
@@ -486,7 +462,8 @@ vect_cost_strided_group_size (stmt_vec_i
    has the overhead of the strided access attributed to it.  */
 
 static void
-vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt)
+vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, 
+                       enum vect_def_type dt)
 {
   int cost = 0;
   int group_size;
@@ -495,7 +472,7 @@ vect_model_store_cost (stmt_vec_info stm
     STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST;
 
   /* Strided access?  */
-  if (DR_GROUP_FIRST_DR (stmt_info)) 
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 
     group_size = vect_cost_strided_group_size (stmt_info);
   /* Not a strided access.  */
   else
@@ -612,7 +589,7 @@ vect_model_load_cost (stmt_vec_info stmt
            access in the group. Inside the loop, there is a load op
            and a realignment op.  */
 
-        if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
+        if ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1)
           {
             outer_cost = 2*TARG_VEC_STMT_COST;
             if (targetm.vectorize.builtin_mask_for_load)
@@ -636,7 +613,6 @@ vect_model_load_cost (stmt_vec_info stmt
     fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
              "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
              STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
-
 }
 
 
@@ -953,6 +929,38 @@ vect_create_data_ref_ptr (tree stmt,
 }
 
 
+/* Function vect_finish_stmt_generation.
+
+   Insert a new stmt.  */
+
+static void
+vect_finish_stmt_generation (tree stmt, tree vec_stmt, 
+			     block_stmt_iterator *bsi)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+
+  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
+  set_stmt_info (get_stmt_ann (vec_stmt), 
+		 new_stmt_vec_info (vec_stmt, loop_vinfo)); 
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    {
+      fprintf (vect_dump, "add new stmt: ");
+      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
+    }
+
+  /* Make sure bsi points to the stmt that is being vectorized.  */
+  gcc_assert (stmt == bsi_stmt (*bsi));
+
+#ifdef USE_MAPPED_LOCATION
+  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
+#else
+  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
+#endif
+}
+
+
 /* Function bump_vector_ptr
 
    Increment a pointer (to a vector type) by vector-size. Connect the new 
@@ -1415,115 +1423,6 @@ vect_get_vec_def_for_operand (tree op, t
 }
 
 
-/* Function vect_get_vec_def_for_stmt_copy
-
-   Return a vector-def for an operand. This function is used when the 
-   vectorized stmt to be created (by the caller to this function) is a "copy" 
-   created in case the vectorized result cannot fit in one vector, and several 
-   copies of the vector-stmt are required. In this case the vector-def is 
-   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
-   of the stmt that defines VEC_OPRND. 
-   DT is the type of the vector def VEC_OPRND.
-
-   Context:
-        In case the vectorization factor (VF) is bigger than the number
-   of elements that can fit in a vectype (nunits), we have to generate
-   more than one vector stmt to vectorize the scalar stmt. This situation
-   arises when there are multiple data-types operated upon in the loop; the 
-   smallest data-type determines the VF, and as a result, when vectorizing
-   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
-   vector stmt (each computing a vector of 'nunits' results, and together
-   computing 'VF' results in each iteration).  This function is called when 
-   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
-   which VF=16 and nunits=4, so the number of copies required is 4):
-
-   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
- 
-   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
-                        VS1.1:  vx.1 = memref1      VS1.2
-                        VS1.2:  vx.2 = memref2      VS1.3
-                        VS1.3:  vx.3 = memref3 
-
-   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
-                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
-                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
-                        VSnew.3:  vz3 = vx.3 + ...
-
-   The vectorization of S1 is explained in vectorizable_load.
-   The vectorization of S2:
-        To create the first vector-stmt out of the 4 copies - VSnew.0 - 
-   the function 'vect_get_vec_def_for_operand' is called to 
-   get the relevant vector-def for each operand of S2. For operand x it
-   returns  the vector-def 'vx.0'.
-
-        To create the remaining copies of the vector-stmt (VSnew.j), this 
-   function is called to get the relevant vector-def for each operand.  It is 
-   obtained from the respective VS1.j stmt, which is recorded in the 
-   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
-
-        For example, to obtain the vector-def 'vx.1' in order to create the 
-   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 
-   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 
-   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
-   and return its def ('vx.1').
-   Overall, to create the above sequence this function will be called 3 times:
-        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
-        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
-        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
-
-static tree
-vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
-{
-  tree vec_stmt_for_operand;
-  stmt_vec_info def_stmt_info;
-
-  /* Do nothing; can reuse same def.  */
-  if (dt == vect_invariant_def || dt == vect_constant_def )
-    return vec_oprnd;
-
-  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
-  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
-  gcc_assert (def_stmt_info);
-  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
-  gcc_assert (vec_stmt_for_operand);
-  vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
-
-  return vec_oprnd;
-}
-
-
-/* Function vect_finish_stmt_generation.
-
-   Insert a new stmt.  */
-
-static void
-vect_finish_stmt_generation (tree stmt, tree vec_stmt, 
-			     block_stmt_iterator *bsi)
-{
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-
-  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
-  set_stmt_info (get_stmt_ann (vec_stmt), 
-		 new_stmt_vec_info (vec_stmt, loop_vinfo)); 
-
-  if (vect_print_dump_info (REPORT_DETAILS))
-    {
-      fprintf (vect_dump, "add new stmt: ");
-      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
-    }
-
-  /* Make sure bsi points to the stmt that is being vectorized.  */
-  gcc_assert (stmt == bsi_stmt (*bsi));
-
-#ifdef USE_MAPPED_LOCATION
-  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
-#else
-  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
-#endif
-}
-
-
 /* Function get_initial_def_for_reduction
 
    Input:
@@ -1616,6 +1515,111 @@ get_initial_def_for_reduction (tree stmt
 }
 
 
+/* Function vect_get_vec_def_for_stmt_copy
+
+   Return a vector-def for an operand. This function is used when the 
+   vectorized stmt to be created (by the caller to this function) is a "copy" 
+   created in case the vectorized result cannot fit in one vector, and several 
+   copies of the vector-stmt are required. In this case the vector-def is 
+   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
+   of the stmt that defines VEC_OPRND. 
+   DT is the type of the vector def VEC_OPRND.
+
+   Context:
+        In case the vectorization factor (VF) is bigger than the number
+   of elements that can fit in a vectype (nunits), we have to generate
+   more than one vector stmt to vectorize the scalar stmt. This situation
+   arises when there are multiple data-types operated upon in the loop; the 
+   smallest data-type determines the VF, and as a result, when vectorizing
+   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
+   vector stmt (each computing a vector of 'nunits' results, and together
+   computing 'VF' results in each iteration).  This function is called when 
+   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
+   which VF=16 and nunits=4, so the number of copies required is 4):
+
+   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
+ 
+   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
+                        VS1.1:  vx.1 = memref1      VS1.2
+                        VS1.2:  vx.2 = memref2      VS1.3
+                        VS1.3:  vx.3 = memref3 
+
+   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
+                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
+                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
+                        VSnew.3:  vz3 = vx.3 + ...
+
+   The vectorization of S1 is explained in vectorizable_load.
+   The vectorization of S2:
+        To create the first vector-stmt out of the 4 copies - VSnew.0 - 
+   the function 'vect_get_vec_def_for_operand' is called to 
+   get the relevant vector-def for each operand of S2. For operand x it
+   returns  the vector-def 'vx.0'.
+
+        To create the remaining copies of the vector-stmt (VSnew.j), this 
+   function is called to get the relevant vector-def for each operand.  It is 
+   obtained from the respective VS1.j stmt, which is recorded in the 
+   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
+
+        For example, to obtain the vector-def 'vx.1' in order to create the 
+   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'. 
+   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the 
+   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
+   and return its def ('vx.1').
+   Overall, to create the above sequence this function will be called 3 times:
+        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
+        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
+        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
+
+static tree
+vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
+{
+  tree vec_stmt_for_operand;
+  stmt_vec_info def_stmt_info;
+
+  /* Do nothing; can reuse same def.  */
+  if (dt == vect_invariant_def || dt == vect_constant_def )
+    return vec_oprnd;
+
+  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
+  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
+  gcc_assert (def_stmt_info);
+  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
+  gcc_assert (vec_stmt_for_operand);
+  vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
+
+  return vec_oprnd;
+}
+
+
+/* Function vect_min_worthwhile_factor.
+
+   For a loop where we could vectorize the operation indicated by CODE,
+   return the minimum vectorization factor that makes it worthwhile
+   to use generic vectors.  */
+
+static int
+vect_min_worthwhile_factor (enum tree_code code)
+{
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case NEGATE_EXPR:
+      return 4;
+
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+    case BIT_NOT_EXPR:
+      return 2;
+
+    default:
+      return INT_MAX;
+    }
+}
+
+
 /* Function vect_create_epilog_for_reduction
     
    Create code at the loop-epilog to finalize the result of a reduction
@@ -2653,16 +2657,14 @@ vect_gen_widened_results_half (enum tree
 }
 
 
-/* Function vectorizable_conversion.
-
-Check if STMT performs a conversion operation, that can be vectorized. 
-If VEC_STMT is also passed, vectorize the STMT: create a vectorized 
-stmt to replace it, put it in VEC_STMT, and insert it at BSI.
-Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
+/* Check if STMT performs a conversion operation, that can be vectorized. 
+   If VEC_STMT is also passed, vectorize the STMT: create a vectorized 
+   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
+   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
 
 bool
-vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
-				   tree * vec_stmt)
+vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
+			 tree *vec_stmt)
 {
   tree vec_dest;
   tree scalar_dest;
@@ -2714,7 +2716,7 @@ vectorizable_conversion (tree stmt, bloc
   if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
     return false;
 
-  /* Check types of lhs and rhs */
+  /* Check types of lhs and rhs.  */
   op0 = TREE_OPERAND (operation, 0);
   rhs_type = TREE_TYPE (op0);
   vectype_in = get_vectype_for_scalar_type (rhs_type);
@@ -2738,7 +2740,7 @@ vectorizable_conversion (tree stmt, bloc
   if (modifier == NONE)
     gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
 
-  /* Bail out if the types are both integral or non-integral */
+  /* Bail out if the types are both integral or non-integral.  */
   if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
       || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
     return false;
@@ -2901,6 +2903,7 @@ vectorizable_conversion (tree stmt, bloc
 
       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
     }
+
   return true;
 }
 
@@ -2992,33 +2995,6 @@ vectorizable_assignment (tree stmt, bloc
 }
 
 
-/* Function vect_min_worthwhile_factor.
-
-   For a loop where we could vectorize the operation indicated by CODE,
-   return the minimum vectorization factor that makes it worthwhile
-   to use generic vectors.  */
-static int
-vect_min_worthwhile_factor (enum tree_code code)
-{
-  switch (code)
-    {
-    case PLUS_EXPR:
-    case MINUS_EXPR:
-    case NEGATE_EXPR:
-      return 4;
-
-    case BIT_AND_EXPR:
-    case BIT_IOR_EXPR:
-    case BIT_XOR_EXPR:
-    case BIT_NOT_EXPR:
-      return 2;
-
-    default:
-      return INT_MAX;
-    }
-}
-
-
 /* Function vectorizable_induction
 
    Check if PHI performs an induction computation that can be vectorized.
@@ -3331,7 +3307,7 @@ vectorizable_operation (tree stmt, block
 	    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
 	}
 
-      /* Arguments are ready. create the new vector stmt.  */
+      /* Arguments are ready. Create the new vector stmt.  */
 
       if (op_type == binary_op)
         new_stmt = build_gimple_modify_stmt (vec_dest,
@@ -3881,6 +3857,7 @@ vectorizable_store (tree stmt, block_stm
   bool strided_store = false;
   unsigned int group_size, i;
   VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
+  
   gcc_assert (ncopies >= 1);
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
@@ -3904,7 +3881,7 @@ vectorizable_store (tree stmt, block_stm
   scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
-      && !DR_GROUP_FIRST_DR (stmt_info))
+      && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
     return false;
 
   op = GIMPLE_STMT_OPERAND (stmt, 1);
@@ -3924,7 +3901,7 @@ vectorizable_store (tree stmt, block_stm
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
 
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       strided_store = true;
       if (!vect_strided_store_supported (vectype))
@@ -4517,6 +4494,8 @@ vectorizable_load (tree stmt, block_stmt
   bool strided_load = false;
   tree first_stmt;
 
+  gcc_assert (ncopies >= 1);
+
   if (!STMT_VINFO_RELEVANT_P (stmt_info))
     return false;
 
@@ -4542,7 +4521,7 @@ vectorizable_load (tree stmt, block_stmt
   op = GIMPLE_STMT_OPERAND (stmt, 1);
   if (TREE_CODE (op) != ARRAY_REF 
       && TREE_CODE (op) != INDIRECT_REF
-      && !DR_GROUP_FIRST_DR (stmt_info))
+      && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -4560,7 +4539,7 @@ vectorizable_load (tree stmt, block_stmt
     }
 
   /* Check if the load is a part of an interleaving chain.  */
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       strided_load = true;
 
@@ -5028,11 +5007,12 @@ vectorizable_condition (tree stmt, block
   return true;
 }
 
+
 /* Function vect_transform_stmt.
 
    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
 
-bool
+static bool
 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
 {
   bool is_store = false;
@@ -5081,7 +5061,7 @@ vect_transform_stmt (tree stmt, block_st
     case store_vec_info_type:
       done = vectorizable_store (stmt, bsi, &vec_stmt);
       gcc_assert (done);
-      if (DR_GROUP_FIRST_DR (stmt_info))
+      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
 	{
 	  /* In case of interleaving, the whole chain is vectorized when the
 	     last store in the chain is reached. Store stmts before the last
@@ -5517,7 +5497,7 @@ vect_gen_niters_for_prolog_loop (loop_ve
   int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
   int nelements = TYPE_VECTOR_SUBPARTS (vectype);
 
-  if (DR_GROUP_FIRST_DR (stmt_info))
+  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
     {
       /* For interleaved access element size must be multiplied by the size of
 	 the interleaved group.  */
@@ -5989,7 +5969,7 @@ vect_transform_loop (loop_vec_info loop_
           if (is_store)
             {
 	      stmt_ann_t ann;
-	      if (DR_GROUP_FIRST_DR (stmt_info))
+	      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
 		{
 		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
 		     interleaving chain was completed - free all the stores in

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-09-16 10:36 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-14 14:23 [patch] Vectorizer cleanup Ira Rosen
2010-09-14 16:34 ` Sebastian Pop
2010-09-16 12:16   ` Ira Rosen
  -- strict thread matches above, loose matches on Subject: below --
2009-06-01  8:18 Ira Rosen
2009-04-28  9:02 Ira Rosen
2007-08-13 13:09 Ira Rosen
2007-08-14 16:48 ` Diego Novillo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).