public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] limit vectorization to large enough trip-counts
@ 2007-01-08 20:35 Dorit Nuzman
  2007-01-09 16:25 ` Daniel Berlin
  0 siblings, 1 reply; 3+ messages in thread
From: Dorit Nuzman @ 2007-01-08 20:35 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1683 bytes --]


The other patch I referred to in
http://gcc.gnu.org/ml/gcc/2007-01/msg00289.html:
This is a trivial patch that lets the user specify a minimum threshold for
the trip-count of a loop, below which vectorization should be avoided.
The new flag is: --param min-vect-loop-bound=X
The semantics is: If the number of vectorized iterations is less than or
equal to X - don't vectorize the loop. The default for X is 0.

In case the loop bound is known - we give up vectorization already at
compile time (if it's less than X). When the loop bound is unknown, we use
the guards that we already generate anyhow to control how many iterations
will be executed in the vectorized code, and how many in the remainder
scalar loop. The rum-time check we already generate is augmented to direct
execution to the scalar loop when n<=X instead of when n<=0.

Ideally we'd want to allow different thresholds for different kinds of
loops, rather than the same threshold for all loops (e.g. a loop with
aligned accesses might be profitable to vecotrize already from a smaller
trip-count than a loop with misaligned accesses). But even with this simple
patch, using it with --param min-vect-loop-bound=2 already helped to
improve performance of several benchmarks (i.e. allows benefitting from
vectorization speedups in some loops, and avoid degradations in other
loops).

Ideally2: what we really want is a cost model to replace the user specified
X. That is in the works. At least until then - this patch can be useful.

Was bootstrapped on powerpc-linux a couple months ago - I need to update to
current mainline and test. In the meantime, here it is.

Dorit

(See attached file: diff-min-loop-boun.txt)

[-- Attachment #2: diff-min-loop-boun.txt --]
[-- Type: text/plain, Size: 7566 bytes --]

Index: params.h
===================================================================
*** params.h	(revision 116059)
--- params.h	(working copy)
*************** typedef enum compiler_param
*** 111,116 ****
--- 111,118 ----
    PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO)
  #define MAX_VARIABLE_EXPANSIONS \
    PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS)
+ #define MIN_VECT_LOOP_BOUND \
+   PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
  #define MAX_DELAY_SLOT_INSN_SEARCH \
    PARAM_VALUE (PARAM_MAX_DELAY_SLOT_INSN_SEARCH)
  #define MAX_DELAY_SLOT_LIVE_SEARCH \
Index: tree-vectorizer.c
===================================================================
*** tree-vectorizer.c	(revision 116059)
--- tree-vectorizer.c	(working copy)
*************** slpeel_verify_cfg_after_peeling (struct 
*** 1067,1073 ****
  struct loop*
  slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, 
  			       edge e, tree first_niters, 
! 			       tree niters, bool update_first_loop_count)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
--- 1067,1074 ----
  struct loop*
  slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, 
  			       edge e, tree first_niters, 
! 			       tree niters, bool update_first_loop_count,
! 			       unsigned int th)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
*************** slpeel_tree_peel_loop_to_edge (struct lo
*** 1162,1168 ****
  
    pre_condition =
      fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
!                  build_int_cst (TREE_TYPE (first_niters), 0));
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                    bb_before_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
--- 1163,1170 ----
  
    pre_condition =
      fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
! 	build_int_cst (TREE_TYPE (first_niters), th));
! 
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                    bb_before_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
Index: tree-vectorizer.h
===================================================================
*** tree-vectorizer.h	(revision 116059)
--- tree-vectorizer.h	(working copy)
*************** extern bitmap vect_vnames_to_rename;
*** 307,313 ****
     divide by the vectorization factor, and to peel the first few iterations
     to force the alignment of data references in the loop.  */
  extern struct loop *slpeel_tree_peel_loop_to_edge 
!   (struct loop *, struct loops *, edge, tree, tree, bool);
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (struct loop *, edge);
  #ifdef ENABLE_CHECKING
--- 307,313 ----
     divide by the vectorization factor, and to peel the first few iterations
     to force the alignment of data references in the loop.  */
  extern struct loop *slpeel_tree_peel_loop_to_edge 
!   (struct loop *, struct loops *, edge, tree, tree, bool, unsigned int);
  extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
  extern bool slpeel_can_duplicate_loop_p (struct loop *, edge);
  #ifdef ENABLE_CHECKING
Index: tree-vect-analyze.c
===================================================================
*** tree-vect-analyze.c	(revision 116059)
--- tree-vect-analyze.c	(working copy)
*************** vect_analyze_operations (loop_vec_info l
*** 377,383 ****
          vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
  
    if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
      {
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
  	fprintf (vect_dump, "not vectorized: iteration count too small.");
--- 377,386 ----
          vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
  
    if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
!       && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
! 	  || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
! 		((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) 
! 					   * vectorization_factor))))
      {
        if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
  	fprintf (vect_dump, "not vectorized: iteration count too small.");
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c	(revision 116060)
--- tree-vect-transform.c	(working copy)
*************** Software Foundation, 51 Franklin Street,
*** 35,40 ****
--- 35,41 ----
  #include "cfgloop.h"
  #include "expr.h"
  #include "optabs.h"
+ #include "params.h"
  #include "recog.h"
  #include "tree-data-ref.h"
  #include "tree-chrec.h"
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 2591,2596 ****
--- 2592,2598 ----
    edge update_e;
    basic_block preheader;
    int loop_num;
+   unsigned int th;
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
*************** vect_do_peeling_for_loop_bound (loop_vec
*** 2606,2613 ****
  				   &ratio_mult_vf_name, ratio);
  
    loop_num  = loop->num; 
    new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
! 					    ratio_mult_vf_name, ni_name, false);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
  #ifdef ENABLE_CHECKING
--- 2608,2619 ----
  				   &ratio_mult_vf_name, ratio);
  
    loop_num  = loop->num; 
+   /* threshold for vectorized loop  */
+   th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * 
+                LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+      
    new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
! 					    ratio_mult_vf_name, ni_name, false, th);
    gcc_assert (new_loop);
    gcc_assert (loop_num == loop->num);
  #ifdef ENABLE_CHECKING
*************** vect_do_peeling_for_alignment (loop_vec_
*** 2814,2820 ****
    /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
    new_loop = 
  	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 
! 				       niters_of_prolog_loop, ni_name, true); 
    gcc_assert (new_loop);
  #ifdef ENABLE_CHECKING
    slpeel_verify_cfg_after_peeling (new_loop, loop);
--- 2820,2826 ----
    /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
    new_loop = 
  	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop), 
! 				       niters_of_prolog_loop, ni_name, true, 0); 
    gcc_assert (new_loop);
  #ifdef ENABLE_CHECKING
    slpeel_verify_cfg_after_peeling (new_loop, loop);
Index: params.def
===================================================================
*** params.def	(revision 116059)
--- params.def	(working copy)
*************** DEFPARAM (PARAM_MAX_VARIABLE_EXPANSIONS,
*** 146,151 ****
--- 146,157 ----
  	  "If -fvariable-expansion-in-unroller is used, the maximum number of times that an individual variable will be expanded during loop unrolling",
            1, 0, 0)
       
+ /* Limit loop autovectorization to loops with large enough iteration count.  */
+ DEFPARAM (PARAM_MIN_VECT_LOOP_BOUND,
+ 	  "min-vect-loop-bound",
+ 	  "If -ftree-vectorize is used, the minimal loop bound of a loop to be considered for vectorization",
+ 	  0, 0, 0)
+ 
  /* The maximum number of instructions to consider when looking for an
     instruction to fill a delay slot.  If more than this arbitrary
     number of instructions is searched, the time savings from filling

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-01-14 12:59 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-08 20:35 [PATCH] limit vectorization to large enough trip-counts Dorit Nuzman
2007-01-09 16:25 ` Daniel Berlin
2007-01-14 12:59   ` Dorit Nuzman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).