public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] Remove last permutation restriction for SLP vectorizing
Date: Tue, 09 Jun 2015 14:22:00 -0000	[thread overview]
Message-ID: <alpine.LSU.2.11.1506091615180.30088@zhemvz.fhfr.qr> (raw)


This removes the restriction in place on reductions.  To not regress
gfortran.dg/vect/fast-math-pr37021.f90 the patch also implements
SLP permutations for strided group loads.  With those two pieces
we can finally SLP vectorize the complex multiplication (which
in the gfortran.dg/vect/fast-math-pr37021.f90 testcase uses
variable stride).  Code generated on x86_64 suffers from PR56766,
combine not being able to use sse3_addsubv2df3 because the
RTL doesn't use the expected vec_merge but
(vec_select (vec_concat )) instead.

Previously we vectorized this with unrolling and interleaving.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-06-09  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
	out from ...
	(vect_supported_load_permutation_p): ... here.  Handle
	supportable permutations in reductions.
	* tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
	for vectorizing strided group loads.

Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c	(revision 224271)
--- gcc/tree-vect-slp.c	(working copy)
*************** vect_slp_rearrange_stmts (slp_tree node,
*** 1326,1331 ****
--- 1270,1336 ----
  }
  
  
+ /* Attempt to reorder stmts in a reduction chain so that we don't
+    require any load permutation.  Return true if that was possible,
+    otherwise return false.  */
+ 
+ static bool
+ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
+ {
+   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+   unsigned int i, j;
+   sbitmap load_index;
+   unsigned int lidx;
+   slp_tree node, load;
+ 
+   /* Compare all the permutation sequences to the first one.  We know
+      that at least one load is permuted.  */
+   node = SLP_INSTANCE_LOADS (slp_instn)[0];
+   if (!node->load_permutation.exists ())
+     return false;
+   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
+     {
+       if (!load->load_permutation.exists ())
+ 	return false;
+       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
+ 	if (lidx != node->load_permutation[j])
+ 	  return false;
+     }
+ 
+   /* Check that the loads in the first sequence are different and there
+      are no gaps between them.  */
+   load_index = sbitmap_alloc (group_size);
+   bitmap_clear (load_index);
+   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
+     {
+       if (bitmap_bit_p (load_index, lidx))
+ 	{
+ 	  sbitmap_free (load_index);
+ 	  return false;
+ 	}
+       bitmap_set_bit (load_index, lidx);
+     }
+   for (i = 0; i < group_size; i++)
+     if (!bitmap_bit_p (load_index, i))
+       {
+ 	sbitmap_free (load_index);
+ 	return false;
+       }
+   sbitmap_free (load_index);
+ 
+   /* This permutation is valid for reduction.  Since the order of the
+      statements in the nodes is not important unless they are memory
+      accesses, we can rearrange the statements in all the nodes
+      according to the order of the loads.  */
+   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+ 			    node->load_permutation);
+ 
+   /* We are done, no actual permutations need to be generated.  */
+   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+     SLP_TREE_LOAD_PERMUTATION (node).release ();
+   return true;
+ }
+ 
  /* Check if the required load permutations in the SLP instance
     SLP_INSTN are supported.  */
  
*************** vect_supported_load_permutation_p (slp_i
*** 1334,1340 ****
  {
    unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
    unsigned int i, j, k, next;
-   sbitmap load_index;
    slp_tree node;
    gimple stmt, load, next_load, first_load;
    struct data_reference *dr;
--- 1339,1344 ----
*************** vect_supported_load_permutation_p (slp_i
*** 1369,1427 ****
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
  
    /* Reduction (there are no data-refs in the root).
!      In reduction chain the order of the loads is important.  */
    if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
        && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
      {
!       slp_tree load;
!       unsigned int lidx;
! 
!       /* Compare all the permutation sequences to the first one.  We know
!          that at least one load is permuted.  */
!       node = SLP_INSTANCE_LOADS (slp_instn)[0];
!       if (!node->load_permutation.exists ())
! 	return false;
!       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
! 	{
! 	  if (!load->load_permutation.exists ())
! 	    return false;
! 	  FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
! 	    if (lidx != node->load_permutation[j])
! 	      return false;
! 	}
  
!       /* Check that the loads in the first sequence are different and there
! 	 are no gaps between them.  */
!       load_index = sbitmap_alloc (group_size);
!       bitmap_clear (load_index);
!       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
! 	{
! 	  if (bitmap_bit_p (load_index, lidx))
! 	    {
! 	      sbitmap_free (load_index);
! 	      return false;
! 	    }
! 	  bitmap_set_bit (load_index, lidx);
! 	}
!       for (i = 0; i < group_size; i++)
! 	if (!bitmap_bit_p (load_index, i))
! 	  {
! 	    sbitmap_free (load_index);
! 	    return false;
! 	  }
!       sbitmap_free (load_index);
! 
!       /* This permutation is valid for reduction.  Since the order of the
! 	 statements in the nodes is not important unless they are memory
! 	 accesses, we can rearrange the statements in all the nodes
! 	 according to the order of the loads.  */
!       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
! 				node->load_permutation);
! 
!       /* We are done, no actual permutations need to be generated.  */
!       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! 	SLP_TREE_LOAD_PERMUTATION (node).release ();
!       return true;
      }
  
    /* In basic block vectorization we allow any subchain of an interleaving
--- 1373,1386 ----
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
  
    /* Reduction (there are no data-refs in the root).
!      In reduction chain the order of the loads is not important.  */
    if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
        && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
      {
!       if (vect_attempt_slp_rearrange_stmts (slp_instn))
! 	return true;
  
!       /* Fallthru to general load permutation handling.  */
      }
  
    /* In basic block vectorization we allow any subchain of an interleaving
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 224271)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5995,6003 ****
        if ((grouped_load
  	   && (slp || PURE_SLP_STMT (stmt_info)))
  	  && (group_size > nunits
! 	      || nunits % group_size != 0
! 	      /* We don't support load permutations.  */
! 	      || slp_perm))
  	{
  	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  			   "unhandled strided group load\n");
--- 5995,6001 ----
        if ((grouped_load
  	   && (slp || PURE_SLP_STMT (stmt_info)))
  	  && (group_size > nunits
! 	      || nunits % group_size != 0))
  	{
  	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
  			   "unhandled strided group load\n");
*************** vectorizable_load (gimple stmt, gimple_s
*** 6294,6299 ****
--- 6292,6298 ----
        alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
        int nloads = nunits;
        tree ltype = TREE_TYPE (vectype);
+       auto_vec<tree> dr_chain;
        if (slp)
  	{
  	  nloads = nunits / group_size;
*************** vectorizable_load (gimple stmt, gimple_s
*** 6303,6309 ****
  	    ltype = vectype;
  	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
  	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! 	  gcc_assert (!slp_perm);
  	}
        for (j = 0; j < ncopies; j++)
  	{
--- 6302,6309 ----
  	    ltype = vectype;
  	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
  	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! 	  if (slp_perm)
! 	    dr_chain.create (ncopies);
  	}
        for (j = 0; j < ncopies; j++)
  	{
*************** vectorizable_load (gimple stmt, gimple_s
*** 6350,6362 ****
  	    }
  
  	  if (slp)
! 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
  	  if (j == 0)
  	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
  	  else
  	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
  	  prev_stmt_info = vinfo_for_stmt (new_stmt);
  	}
        return true;
      }
  
--- 6350,6369 ----
  	    }
  
  	  if (slp)
! 	    {
! 	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
! 	      if (slp_perm)
! 		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
! 	    }
  	  if (j == 0)
  	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
  	  else
  	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
  	  prev_stmt_info = vinfo_for_stmt (new_stmt);
  	}
+       if (slp_perm)
+ 	vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+ 				      slp_node_instance, false);
        return true;
      }
  

                 reply	other threads:[~2015-06-09 14:21 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.LSU.2.11.1506091615180.30088@zhemvz.fhfr.qr \
    --to=rguenther@suse.de \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).