public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* SLP for vectors
@ 2013-01-27 15:28 Marc Glisse
  2013-01-29 11:37 ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Marc Glisse @ 2013-01-27 15:28 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 940 bytes --]

Hello,

this message is to check that I am not doing something absurd and ask for 
a bit of advice.

In the attached patch, I let SLP recognize vector loads/stores just like 
it recognizes those in an array. It has a few issues: the cost of the 
vectorized version is overestimated, the base object is wrong (doesn't 
strip the bit_field_ref, but since the base address is right and the base 
object is almost unused...), but most importantly it only works if the 
vectors have their address taken, otherwise (after asking gimple_vuse?) 
SLP doesn't detect their use as loads or stores at all. Also, it only 
works if you write result[0]=..., result[1]=... and does not recognize a 
constructor as a series of stores.

Is slowly extending SLP the right way to go? Should get_references_in_stmt 
report vector element accesses?

(the patch as is passes the testsuite on x86_64-linux and vectorizes the 
few examples I tried)

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6255 bytes --]

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 195493)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -3860,20 +3860,21 @@ vectorizable_store (gimple stmt, gimple_
   /* Is vectorizable store? */
 
   if (!is_gimple_assign (stmt))
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
   if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
       && is_pattern_stmt_p (stmt_info))
     scalar_dest = TREE_OPERAND (scalar_dest, 0);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
+      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
       && TREE_CODE (scalar_dest) != COMPONENT_REF
       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
       && TREE_CODE (scalar_dest) != REALPART_EXPR
       && TREE_CODE (scalar_dest) != MEM_REF)
     return false;
 
   gcc_assert (gimple_assign_single_p (stmt));
   op = gimple_assign_rhs1 (stmt);
   if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
@@ -4394,20 +4395,21 @@ vectorizable_load (gimple stmt, gimple_s
   /* Is vectorizable load? */
   if (!is_gimple_assign (stmt))
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
   if (TREE_CODE (scalar_dest) != SSA_NAME)
     return false;
 
   code = gimple_assign_rhs_code (stmt);
   if (code != ARRAY_REF
+      && code != BIT_FIELD_REF
       && code != INDIRECT_REF
       && code != COMPONENT_REF
       && code != IMAGPART_EXPR
       && code != REALPART_EXPR
       && code != MEM_REF
       && TREE_CODE_CLASS (code) != tcc_declaration)
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 195493)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
 	}
       else
 	{
 	  if (first_stmt_code != rhs_code
 	      && (first_stmt_code != IMAGPART_EXPR
 		  || rhs_code != REALPART_EXPR)
 	      && (first_stmt_code != REALPART_EXPR
 		  || rhs_code != IMAGPART_EXPR)
               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
                    && (first_stmt_code == ARRAY_REF
+                       || first_stmt_code == BIT_FIELD_REF
                        || first_stmt_code == INDIRECT_REF
                        || first_stmt_code == COMPONENT_REF
                        || first_stmt_code == MEM_REF)))
 	    {
 	      if (dump_enabled_p ())
 		{
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
 				   "Build SLP failed: different operation "
 				   "in stmt ");
 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	(revision 195493)
+++ gcc/tree-data-ref.c	(working copy)
@@ -854,20 +854,24 @@ dr_analyze_indices (struct data_referenc
   /* Analyze access functions of dimensions we know to be independent.  */
   while (handled_component_p (ref))
     {
       if (TREE_CODE (ref) == ARRAY_REF)
 	{
 	  op = TREE_OPERAND (ref, 1);
 	  access_fn = analyze_scalar_evolution (loop, op);
 	  access_fn = instantiate_scev (before_loop, loop, access_fn);
 	  access_fns.safe_push (access_fn);
 	}
+      else if (TREE_CODE (ref) == BIT_FIELD_REF)
+	{
+	  access_fns.safe_push (TREE_OPERAND (ref, 2));
+	}
       else if (TREE_CODE (ref) == COMPONENT_REF
 	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
 	{
 	  /* For COMPONENT_REFs of records (but not unions!) use the
 	     FIELD_DECL offset as constant access function so we can
 	     disambiguate a[i].f1 and a[i].f2.  */
 	  tree off = component_ref_field_offset (ref);
 	  off = size_binop (PLUS_EXPR,
 			    size_binop (MULT_EXPR,
 					fold_convert (bitsizetype, off),
Index: gcc/tree-flow-inline.h
===================================================================
--- gcc/tree-flow-inline.h	(revision 195493)
+++ gcc/tree-flow-inline.h	(working copy)
@@ -1239,21 +1239,24 @@ get_addr_base_and_unit_offset_1 (tree ex
 {
   HOST_WIDE_INT byte_offset = 0;
 
   /* Compute cumulative byte-offset for nested component-refs and array-refs,
      and find the ultimate containing object.  */
   while (1)
     {
       switch (TREE_CODE (exp))
 	{
 	case BIT_FIELD_REF:
-	  return NULL_TREE;
+	  // Exit the loop?
+	  byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
+			  / BITS_PER_UNIT);
+	  break;
 
 	case COMPONENT_REF:
 	  {
 	    tree field = TREE_OPERAND (exp, 1);
 	    tree this_offset = component_ref_field_offset (exp);
 	    HOST_WIDE_INT hthis_offset;
 
 	    if (!this_offset
 		|| TREE_CODE (this_offset) != INTEGER_CST
 		|| (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 195493)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -3789,20 +3789,22 @@ vect_create_data_ref_ptr (gimple stmt, t
 
   if (dump_enabled_p ())
     {
       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
       dump_printf_loc (MSG_NOTE, vect_location,
                        "create %s-pointer variable to type: ",
                        tree_code_name[(int) TREE_CODE (aggr_type)]);
       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
+      else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
+        dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
       else
         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
     }
 
   /* (1) Create the new aggregate-pointer variable.  */
   aggr_ptr_type = build_pointer_type (aggr_type);
   base = get_base_address (DR_REF (dr));

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-01-27 15:28 SLP for vectors Marc Glisse
@ 2013-01-29 11:37 ` Richard Biener
  2013-01-30 20:48   ` Marc Glisse
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2013-01-29 11:37 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Sun, Jan 27, 2013 at 4:28 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this message is to check that I am not doing something absurd and ask for a
> bit of advice.
>
> In the attached patch, I let SLP recognize vector loads/stores just like it
> recognizes those in an array. It has a few issues: the cost of the
> vectorized version is overestimated, the base object is wrong (doesn't strip
> the bit_field_ref, but since the base address is right and the base object
> is almost unused...), but most importantly it only works if the vectors have
> their address taken, otherwise (after asking gimple_vuse?) SLP doesn't
> detect their use as loads or stores at all.

Yes, if they have not their address taken they are not loads.

> Also, it only works if you write
> result[0]=..., result[1]=... and does not recognize a constructor as a
> series of stores.
>
> Is slowly extending SLP the right way to go? Should get_references_in_stmt
> report vector element accesses?

It does if it is a memory access.

You didn't provide a new testcase so it's hard for me to guess what you
are trying to do.  I suppose you want to vectorize

   w[0] = v[0] + v[0];
   w[1] = v[1] + v[1];

into

   w = v + v;

As it would work if w and v are array accesses instead of vector subscripts?
Note that similar issues (and bugreports) exist for complex component accesses.

So yes, handling BIT_FIELD_REF in the vectorizer looks like the correct
way to do - but mind that you should constrain the BIT_FIELD_REFs you
allow (I suppose in the end that's properly done by other part of the analysis).

As of handling non-memory BIT_FIELD_REFs - I suggest to split out
the test of whether a stmt is considered a "load" for the purpose of
vectorization and simply special-case this therein, not requiring a VUSE.

I suppose the data-ref analysis parts are not strictly required, nor
the get_addr_base_and_unit_offset_1 parts?  They should be split out
and separately tested anyway.  The data-ref part at least will confuse
analysis of 'a[0]' or 'a[1]' vs. 'a', but I suppose independent of the patch.

Richard.

> (the patch as is passes the testsuite on x86_64-linux and vectorizes the few
> examples I tried)
>
> --
> Marc Glisse
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       (revision 195493)
> +++ gcc/tree-vect-stmts.c       (working copy)
> @@ -3860,20 +3860,21 @@ vectorizable_store (gimple stmt, gimple_
>    /* Is vectorizable store? */
>
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
>        && is_pattern_stmt_p (stmt_info))
>      scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
> +      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF
>        && TREE_CODE (scalar_dest) != IMAGPART_EXPR
>        && TREE_CODE (scalar_dest) != REALPART_EXPR
>        && TREE_CODE (scalar_dest) != MEM_REF)
>      return false;
>
>    gcc_assert (gimple_assign_single_p (stmt));
>    op = gimple_assign_rhs1 (stmt);
>    if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
> @@ -4394,20 +4395,21 @@ vectorizable_load (gimple stmt, gimple_s
>    /* Is vectorizable load? */
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) != SSA_NAME)
>      return false;
>
>    code = gimple_assign_rhs_code (stmt);
>    if (code != ARRAY_REF
> +      && code != BIT_FIELD_REF
>        && code != INDIRECT_REF
>        && code != COMPONENT_REF
>        && code != IMAGPART_EXPR
>        && code != REALPART_EXPR
>        && code != MEM_REF
>        && TREE_CODE_CLASS (code) != tcc_declaration)
>      return false;
>
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c (revision 195493)
> +++ gcc/tree-vect-slp.c (working copy)
> @@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
>         }
>        else
>         {
>           if (first_stmt_code != rhs_code
>               && (first_stmt_code != IMAGPART_EXPR
>                   || rhs_code != REALPART_EXPR)
>               && (first_stmt_code != REALPART_EXPR
>                   || rhs_code != IMAGPART_EXPR)
>                && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
>                     && (first_stmt_code == ARRAY_REF
> +                       || first_stmt_code == BIT_FIELD_REF
>                         || first_stmt_code == INDIRECT_REF
>                         || first_stmt_code == COMPONENT_REF
>                         || first_stmt_code == MEM_REF)))
>             {
>               if (dump_enabled_p ())
>                 {
>                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                                    "Build SLP failed: different operation "
>                                    "in stmt ");
>                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
> 0);
> Index: gcc/tree-data-ref.c
> ===================================================================
> --- gcc/tree-data-ref.c (revision 195493)
> +++ gcc/tree-data-ref.c (working copy)
> @@ -854,20 +854,24 @@ dr_analyze_indices (struct data_referenc
>    /* Analyze access functions of dimensions we know to be independent.  */
>    while (handled_component_p (ref))
>      {
>        if (TREE_CODE (ref) == ARRAY_REF)
>         {
>           op = TREE_OPERAND (ref, 1);
>           access_fn = analyze_scalar_evolution (loop, op);
>           access_fn = instantiate_scev (before_loop, loop, access_fn);
>           access_fns.safe_push (access_fn);
>         }
> +      else if (TREE_CODE (ref) == BIT_FIELD_REF)
> +       {
> +         access_fns.safe_push (TREE_OPERAND (ref, 2));
> +       }
>        else if (TREE_CODE (ref) == COMPONENT_REF
>                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) ==
> RECORD_TYPE)
>         {
>           /* For COMPONENT_REFs of records (but not unions!) use the
>              FIELD_DECL offset as constant access function so we can
>              disambiguate a[i].f1 and a[i].f2.  */
>           tree off = component_ref_field_offset (ref);
>           off = size_binop (PLUS_EXPR,
>                             size_binop (MULT_EXPR,
>                                         fold_convert (bitsizetype, off),
> Index: gcc/tree-flow-inline.h
> ===================================================================
> --- gcc/tree-flow-inline.h      (revision 195493)
> +++ gcc/tree-flow-inline.h      (working copy)
> @@ -1239,21 +1239,24 @@ get_addr_base_and_unit_offset_1 (tree ex
>  {
>    HOST_WIDE_INT byte_offset = 0;
>
>    /* Compute cumulative byte-offset for nested component-refs and
> array-refs,
>       and find the ultimate containing object.  */
>    while (1)
>      {
>        switch (TREE_CODE (exp))
>         {
>         case BIT_FIELD_REF:
> -         return NULL_TREE;
> +         // Exit the loop?
> +         byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
> +                         / BITS_PER_UNIT);
> +         break;
>
>         case COMPONENT_REF:
>           {
>             tree field = TREE_OPERAND (exp, 1);
>             tree this_offset = component_ref_field_offset (exp);
>             HOST_WIDE_INT hthis_offset;
>
>             if (!this_offset
>                 || TREE_CODE (this_offset) != INTEGER_CST
>                 || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
> Index: gcc/tree-vect-data-refs.c
> ===================================================================
> --- gcc/tree-vect-data-refs.c   (revision 195493)
> +++ gcc/tree-vect-data-refs.c   (working copy)
> @@ -3789,20 +3789,22 @@ vect_create_data_ref_ptr (gimple stmt, t
>
>    if (dump_enabled_p ())
>      {
>        tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
>        dump_printf_loc (MSG_NOTE, vect_location,
>                         "create %s-pointer variable to type: ",
>                         tree_code_name[(int) TREE_CODE (aggr_type)]);
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
>        if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
> +      else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
> +        dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
>        else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
>        else
>          dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
>      }
>
>    /* (1) Create the new aggregate-pointer variable.  */
>    aggr_ptr_type = build_pointer_type (aggr_type);
>    base = get_base_address (DR_REF (dr));
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-01-29 11:37 ` Richard Biener
@ 2013-01-30 20:48   ` Marc Glisse
  2013-01-31  9:14     ` Richard Biener
  0 siblings, 1 reply; 8+ messages in thread
From: Marc Glisse @ 2013-01-30 20:48 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Tue, 29 Jan 2013, Richard Biener wrote:

> On Sun, Jan 27, 2013 at 4:28 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> Hello,
>>
>> this message is to check that I am not doing something absurd and ask for a
>> bit of advice.
>>
>> In the attached patch, I let SLP recognize vector loads/stores just like it
>> recognizes those in an array. It has a few issues: the cost of the
>> vectorized version is overestimated, the base object is wrong (doesn't strip
>> the bit_field_ref, but since the base address is right and the base object
>> is almost unused...), but most importantly it only works if the vectors have
>> their address taken, otherwise (after asking gimple_vuse?) SLP doesn't
>> detect their use as loads or stores at all.
>
> Yes, if they have not their address taken they are not loads.
>
>> Also, it only works if you write
>> result[0]=..., result[1]=... and does not recognize a constructor as a
>> series of stores.
>>
>> Is slowly extending SLP the right way to go? Should get_references_in_stmt
>> report vector element accesses?
>
> It does if it is a memory access.
>
> You didn't provide a new testcase so it's hard for me to guess what you
> are trying to do.  I suppose you want to vectorize
>
>   w[0] = v[0] + v[0];
>   w[1] = v[1] + v[1];
>
> into
>
>   w = v + v;
>
> As it would work if w and v are array accesses instead of vector subscripts?

Yes, sorry for not being more precise. Vectorization that works for an 
array should work (even better) for a vector.

> Note that similar issues (and bugreports) exist for complex component accesses.

One extension at a time :-)

> As of handling non-memory BIT_FIELD_REFs - I suggest to split out
> the test of whether a stmt is considered a "load" for the purpose of
> vectorization and simply special-case this therein, not requiring a VUSE.

Ok. The hardest part will be preventing the pass from creating ADDR_EXPR 
of those vectors, or at least folding them before the pass is done.

> I suppose the data-ref analysis parts are not strictly required, nor
> the get_addr_base_and_unit_offset_1 parts?

Actually it is necessary (at least the get_addr_base_and_unit_offset_1 
part is) otherwise I get a gimple verification failure because we have an 
ADDR_EXPR of a BIT_FIELD_REF.

> They should be split out
> and separately tested anyway.  The data-ref part at least will confuse
> analysis of 'a[0]' or 'a[1]' vs. 'a', but I suppose independent of the patch.

Ah... Could you be more specific? Are you thinking about arrays of 
vectors?

Thanks for the help,

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-01-30 20:48   ` Marc Glisse
@ 2013-01-31  9:14     ` Richard Biener
  2013-03-30 16:14       ` Marc Glisse
  0 siblings, 1 reply; 8+ messages in thread
From: Richard Biener @ 2013-01-31  9:14 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Wed, Jan 30, 2013 at 9:45 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Tue, 29 Jan 2013, Richard Biener wrote:
>
>> On Sun, Jan 27, 2013 at 4:28 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> Hello,
>>>
>>> this message is to check that I am not doing something absurd and ask for
>>> a
>>> bit of advice.
>>>
>>> In the attached patch, I let SLP recognize vector loads/stores just like
>>> it
>>> recognizes those in an array. It has a few issues: the cost of the
>>> vectorized version is overestimated, the base object is wrong (doesn't
>>> strip
>>> the bit_field_ref, but since the base address is right and the base
>>> object
>>> is almost unused...), but most importantly it only works if the vectors
>>> have
>>> their address taken, otherwise (after asking gimple_vuse?) SLP doesn't
>>> detect their use as loads or stores at all.
>>
>>
>> Yes, if they have not their address taken they are not loads.
>>
>>> Also, it only works if you write
>>> result[0]=..., result[1]=... and does not recognize a constructor as a
>>> series of stores.
>>>
>>> Is slowly extending SLP the right way to go? Should
>>> get_references_in_stmt
>>> report vector element accesses?
>>
>>
>> It does if it is a memory access.
>>
>> You didn't provide a new testcase so it's hard for me to guess what you
>> are trying to do.  I suppose you want to vectorize
>>
>>   w[0] = v[0] + v[0];
>>   w[1] = v[1] + v[1];
>>
>> into
>>
>>   w = v + v;
>>
>> As it would work if w and v are array accesses instead of vector
>> subscripts?
>
>
> Yes, sorry for not being more precise. Vectorization that works for an array
> should work (even better) for a vector.
>
>
>> Note that similar issues (and bugreports) exist for complex component
>> accesses.
>
>
> One extension at a time :-)
>
>
>> As of handling non-memory BIT_FIELD_REFs - I suggest to split out
>> the test of whether a stmt is considered a "load" for the purpose of
>> vectorization and simply special-case this therein, not requiring a VUSE.
>
>
> Ok. The hardest part will be preventing the pass from creating ADDR_EXPR of
> those vectors, or at least folding them before the pass is done.

Oh ... I suppose the "loads"/"stores" need simply be specially handled.

>
>> I suppose the data-ref analysis parts are not strictly required, nor
>> the get_addr_base_and_unit_offset_1 parts?
>
>
> Actually it is necessary (at least the get_addr_base_and_unit_offset_1 part
> is) otherwise I get a gimple verification failure because we have an
> ADDR_EXPR of a BIT_FIELD_REF.

Hm.  Certainly handling BIT_FIELD_REF in get_addr_base_and_unit_offset_1
is ok, but we should ensure by other means that the address of a
BIT_FIELD_REF is never taken ...

>> They should be split out
>> and separately tested anyway.  The data-ref part at least will confuse
>> analysis of 'a[0]' or 'a[1]' vs. 'a', but I suppose independent of the
>> patch.
>
>
> Ah... Could you be more specific? Are you thinking about arrays of vectors?

No, I was thinking of code that accesses both the whole vector and the
vector piecewise.  Mixing this kind of accesses will confuse data-ref
analysis I believe.  But it also will disable vectorization because there are
already vector types in the IL ... so it's probably a minor issue for now.

Richard.

> Thanks for the help,
>
> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-01-31  9:14     ` Richard Biener
@ 2013-03-30 16:14       ` Marc Glisse
  2013-04-01 15:52         ` Marc Glisse
  2013-04-02 13:31         ` Richard Biener
  0 siblings, 2 replies; 8+ messages in thread
From: Marc Glisse @ 2013-03-30 16:14 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1512 bytes --]

On Tue, 29 Jan 2013, Richard Biener wrote:

> So yes, handling BIT_FIELD_REF in the vectorizer looks like the correct
> way to do - but mind that you should constrain the BIT_FIELD_REFs you
> allow (I suppose in the end that's properly done by other part of the analysis).

Does that mean adding something like this to vectorizable_store? (and 
similarly to *_load)

if (VECTOR_TYPE_P (TREE_TYPE (scalar_dest)))
   return false;

Or maybe this?

if (TREE_CODE (scalar_dest) == BIT_FIELD_REF
     && !VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (scalar_dest, 0)))
   return false;

Or checking that the type of the BIT_FIELD_REF is the element type of the 
vector type it accesses?

I am not sure what restrictions are needed.

> I suppose the data-ref analysis parts are not strictly required,

I removed the dr_analyze_indices part, which was indeed not necessary. The 
tree-vect-data-refs.c part is useless (the base object is not computed for 
SLP) but shouldn't hurt.

The current patch is attached (passed bootstrap+testsuite a week ago).
Are there some parts of this that could go in?


2013-03-30  Marc Glisse  <marc.glisse@inria.fr>

 	* tree-vect-stmts.c (vectorizable_store): Accept BIT_FIELD_REF.
 	(vectorizable_load): Likewise.
 	* tree-vect-slp.c (vect_build_slp_tree): Likewise.
 	* tree-vect-data-refs.c (vect_create_data_ref_ptr): Handle VECTOR_TYPE.
 	* tree-flow-inline.h (get_addr_base_and_unit_offset_1): Handle
 	BIT_FIELD_REF.

testsuite/
 	* gcc.dg/vect/bb-slp-31.c: New file.

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 5811 bytes --]

Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c	(revision 197265)
+++ tree-vect-data-refs.c	(working copy)
@@ -3565,20 +3565,22 @@ vect_create_data_ref_ptr (gimple stmt, t
 
   if (dump_enabled_p ())
     {
       tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
       dump_printf_loc (MSG_NOTE, vect_location,
                        "create %s-pointer variable to type: ",
                        tree_code_name[(int) TREE_CODE (aggr_type)]);
       dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
       if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
         dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
+      else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
+        dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
       else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
         dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
       else
         dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
       dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
     }
 
   /* (1) Create the new aggregate-pointer variable.
      Vector and array types inherit the alias set of their component
      type by default so we need to use a ref-all pointer if the data
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 197265)
+++ tree-vect-stmts.c	(working copy)
@@ -3844,20 +3844,21 @@ vectorizable_store (gimple stmt, gimple_
   /* Is vectorizable store? */
 
   if (!is_gimple_assign (stmt))
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
   if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
       && is_pattern_stmt_p (stmt_info))
     scalar_dest = TREE_OPERAND (scalar_dest, 0);
   if (TREE_CODE (scalar_dest) != ARRAY_REF
+      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
       && TREE_CODE (scalar_dest) != INDIRECT_REF
       && TREE_CODE (scalar_dest) != COMPONENT_REF
       && TREE_CODE (scalar_dest) != IMAGPART_EXPR
       && TREE_CODE (scalar_dest) != REALPART_EXPR
       && TREE_CODE (scalar_dest) != MEM_REF)
     return false;
 
   gcc_assert (gimple_assign_single_p (stmt));
   op = gimple_assign_rhs1 (stmt);
   if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
@@ -4378,20 +4379,21 @@ vectorizable_load (gimple stmt, gimple_s
   /* Is vectorizable load? */
   if (!is_gimple_assign (stmt))
     return false;
 
   scalar_dest = gimple_assign_lhs (stmt);
   if (TREE_CODE (scalar_dest) != SSA_NAME)
     return false;
 
   code = gimple_assign_rhs_code (stmt);
   if (code != ARRAY_REF
+      && code != BIT_FIELD_REF
       && code != INDIRECT_REF
       && code != COMPONENT_REF
       && code != IMAGPART_EXPR
       && code != REALPART_EXPR
       && code != MEM_REF
       && TREE_CODE_CLASS (code) != tcc_declaration)
     return false;
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
     return false;
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 197265)
+++ tree-vect-slp.c	(working copy)
@@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
 	}
       else
 	{
 	  if (first_stmt_code != rhs_code
 	      && (first_stmt_code != IMAGPART_EXPR
 		  || rhs_code != REALPART_EXPR)
 	      && (first_stmt_code != REALPART_EXPR
 		  || rhs_code != IMAGPART_EXPR)
               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
                    && (first_stmt_code == ARRAY_REF
+                       || first_stmt_code == BIT_FIELD_REF
                        || first_stmt_code == INDIRECT_REF
                        || first_stmt_code == COMPONENT_REF
                        || first_stmt_code == MEM_REF)))
 	    {
 	      if (dump_enabled_p ())
 		{
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
 				   "Build SLP failed: different operation "
 				   "in stmt ");
 		  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
Index: tree-flow-inline.h
===================================================================
--- tree-flow-inline.h	(revision 197265)
+++ tree-flow-inline.h	(working copy)
@@ -1239,21 +1239,23 @@ get_addr_base_and_unit_offset_1 (tree ex
 {
   HOST_WIDE_INT byte_offset = 0;
 
   /* Compute cumulative byte-offset for nested component-refs and array-refs,
      and find the ultimate containing object.  */
   while (1)
     {
       switch (TREE_CODE (exp))
 	{
 	case BIT_FIELD_REF:
-	  return NULL_TREE;
+	  byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
+			  / BITS_PER_UNIT);
+	  break;
 
 	case COMPONENT_REF:
 	  {
 	    tree field = TREE_OPERAND (exp, 1);
 	    tree this_offset = component_ref_field_offset (exp);
 	    HOST_WIDE_INT hthis_offset;
 
 	    if (!this_offset
 		|| TREE_CODE (this_offset) != INTEGER_CST
 		|| (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
Index: testsuite/gcc.dg/vect/bb-slp-31.c
===================================================================
--- testsuite/gcc.dg/vect/bb-slp-31.c	(revision 0)
+++ testsuite/gcc.dg/vect/bb-slp-31.c	(revision 0)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+typedef double vec __attribute__ ((vector_size (2 * sizeof (double))));
+vec a;
+
+void f(){
+  a[0]=1+2*a[0]*a[0];
+  a[1]=1+2*a[1]*a[1];
+}
+
+/* { dg-final { scan-tree-dump "basic block vectorized using SLP" "slp" } } */

Property changes on: testsuite/gcc.dg/vect/bb-slp-31.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-03-30 16:14       ` Marc Glisse
@ 2013-04-01 15:52         ` Marc Glisse
  2013-04-02 13:29           ` Richard Biener
  2013-04-02 13:31         ` Richard Biener
  1 sibling, 1 reply; 8+ messages in thread
From: Marc Glisse @ 2013-04-01 15:52 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Sat, 30 Mar 2013, Marc Glisse wrote:

> 	* tree-flow-inline.h (get_addr_base_and_unit_offset_1): Handle
> 	BIT_FIELD_REF.

I wrote a safer version of this for PR52436:


  	case BIT_FIELD_REF:
-	  return NULL_TREE;
+	  {
+	    HOST_WIDE_INT this_off = TREE_INT_CST_LOW (TREE_OPERAND (exp, 2));
+	    if (this_off % BITS_PER_UNIT)
+	      return NULL_TREE;
+	    byte_offset += this_off / BITS_PER_UNIT;
+	  }
+	  break;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-04-01 15:52         ` Marc Glisse
@ 2013-04-02 13:29           ` Richard Biener
  0 siblings, 0 replies; 8+ messages in thread
From: Richard Biener @ 2013-04-02 13:29 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Mon, Apr 1, 2013 at 5:52 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Sat, 30 Mar 2013, Marc Glisse wrote:
>
>>         * tree-flow-inline.h (get_addr_base_and_unit_offset_1): Handle
>>         BIT_FIELD_REF.
>
>
> I wrote a safer version of this for PR52436:

That variant is ok - please test and commit separately.

Thanks,
Richard.

>
>
>         case BIT_FIELD_REF:
> -         return NULL_TREE;
> +         {
> +           HOST_WIDE_INT this_off = TREE_INT_CST_LOW (TREE_OPERAND (exp,
> 2));
> +           if (this_off % BITS_PER_UNIT)
> +             return NULL_TREE;
> +           byte_offset += this_off / BITS_PER_UNIT;
> +         }
> +         break;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: SLP for vectors
  2013-03-30 16:14       ` Marc Glisse
  2013-04-01 15:52         ` Marc Glisse
@ 2013-04-02 13:31         ` Richard Biener
  1 sibling, 0 replies; 8+ messages in thread
From: Richard Biener @ 2013-04-02 13:31 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Sat, Mar 30, 2013 at 5:14 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Tue, 29 Jan 2013, Richard Biener wrote:
>
>> So yes, handling BIT_FIELD_REF in the vectorizer looks like the correct
>> way to do - but mind that you should constrain the BIT_FIELD_REFs you
>> allow (I suppose in the end that's properly done by other part of the
>> analysis).
>
>
> Does that mean adding something like this to vectorizable_store? (and
> similarly to *_load)
>
> if (VECTOR_TYPE_P (TREE_TYPE (scalar_dest)))
>   return false;
>
> Or maybe this?
>
> if (TREE_CODE (scalar_dest) == BIT_FIELD_REF
>     && !VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (scalar_dest, 0)))
>   return false;
>
> Or checking that the type of the BIT_FIELD_REF is the element type of the
> vector type it accesses?
>
> I am not sure what restrictions are needed.
>
>
>> I suppose the data-ref analysis parts are not strictly required,
>
>
> I removed the dr_analyze_indices part, which was indeed not necessary. The
> tree-vect-data-refs.c part is useless (the base object is not computed for
> SLP) but shouldn't hurt.
>
> The current patch is attached (passed bootstrap+testsuite a week ago).
> Are there some parts of this that could go in?

The patch is ok apart from the tree-flow-inline.h parts where you have posted
a correct variant in a followup.

Thanks,
Richard.

>
> 2013-03-30  Marc Glisse  <marc.glisse@inria.fr>
>
>         * tree-vect-stmts.c (vectorizable_store): Accept BIT_FIELD_REF.
>         (vectorizable_load): Likewise.
>         * tree-vect-slp.c (vect_build_slp_tree): Likewise.
>         * tree-vect-data-refs.c (vect_create_data_ref_ptr): Handle
> VECTOR_TYPE.
>         * tree-flow-inline.h (get_addr_base_and_unit_offset_1): Handle
>         BIT_FIELD_REF.
>
> testsuite/
>         * gcc.dg/vect/bb-slp-31.c: New file.
>
> --
> Marc Glisse
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c       (revision 197265)
> +++ tree-vect-data-refs.c       (working copy)
> @@ -3565,20 +3565,22 @@ vect_create_data_ref_ptr (gimple stmt, t
>
>    if (dump_enabled_p ())
>      {
>        tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
>        dump_printf_loc (MSG_NOTE, vect_location,
>                         "create %s-pointer variable to type: ",
>                         tree_code_name[(int) TREE_CODE (aggr_type)]);
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
>        if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing an array ref: ");
> +      else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
> +        dump_printf (MSG_NOTE, "  vectorizing a vector ref: ");
>        else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
>          dump_printf (MSG_NOTE, "  vectorizing a record based array ref: ");
>        else
>          dump_printf (MSG_NOTE, "  vectorizing a pointer ref: ");
>        dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
>      }
>
>    /* (1) Create the new aggregate-pointer variable.
>       Vector and array types inherit the alias set of their component
>       type by default so we need to use a ref-all pointer if the data
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c   (revision 197265)
> +++ tree-vect-stmts.c   (working copy)
> @@ -3844,20 +3844,21 @@ vectorizable_store (gimple stmt, gimple_
>    /* Is vectorizable store? */
>
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
>        && is_pattern_stmt_p (stmt_info))
>      scalar_dest = TREE_OPERAND (scalar_dest, 0);
>    if (TREE_CODE (scalar_dest) != ARRAY_REF
> +      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
>        && TREE_CODE (scalar_dest) != INDIRECT_REF
>        && TREE_CODE (scalar_dest) != COMPONENT_REF
>        && TREE_CODE (scalar_dest) != IMAGPART_EXPR
>        && TREE_CODE (scalar_dest) != REALPART_EXPR
>        && TREE_CODE (scalar_dest) != MEM_REF)
>      return false;
>
>    gcc_assert (gimple_assign_single_p (stmt));
>    op = gimple_assign_rhs1 (stmt);
>    if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
> @@ -4378,20 +4379,21 @@ vectorizable_load (gimple stmt, gimple_s
>    /* Is vectorizable load? */
>    if (!is_gimple_assign (stmt))
>      return false;
>
>    scalar_dest = gimple_assign_lhs (stmt);
>    if (TREE_CODE (scalar_dest) != SSA_NAME)
>      return false;
>
>    code = gimple_assign_rhs_code (stmt);
>    if (code != ARRAY_REF
> +      && code != BIT_FIELD_REF
>        && code != INDIRECT_REF
>        && code != COMPONENT_REF
>        && code != IMAGPART_EXPR
>        && code != REALPART_EXPR
>        && code != MEM_REF
>        && TREE_CODE_CLASS (code) != tcc_declaration)
>      return false;
>
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
> Index: tree-vect-slp.c
> ===================================================================
> --- tree-vect-slp.c     (revision 197265)
> +++ tree-vect-slp.c     (working copy)
> @@ -660,20 +660,21 @@ vect_build_slp_tree (loop_vec_info loop_
>         }
>        else
>         {
>           if (first_stmt_code != rhs_code
>               && (first_stmt_code != IMAGPART_EXPR
>                   || rhs_code != REALPART_EXPR)
>               && (first_stmt_code != REALPART_EXPR
>                   || rhs_code != IMAGPART_EXPR)
>                && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
>                     && (first_stmt_code == ARRAY_REF
> +                       || first_stmt_code == BIT_FIELD_REF
>                         || first_stmt_code == INDIRECT_REF
>                         || first_stmt_code == COMPONENT_REF
>                         || first_stmt_code == MEM_REF)))
>             {
>               if (dump_enabled_p ())
>                 {
>                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                                    "Build SLP failed: different operation "
>                                    "in stmt ");
>                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
> 0);
> Index: tree-flow-inline.h
> ===================================================================
> --- tree-flow-inline.h  (revision 197265)
> +++ tree-flow-inline.h  (working copy)
> @@ -1239,21 +1239,23 @@ get_addr_base_and_unit_offset_1 (tree ex
>  {
>    HOST_WIDE_INT byte_offset = 0;
>
>    /* Compute cumulative byte-offset for nested component-refs and
> array-refs,
>       and find the ultimate containing object.  */
>    while (1)
>      {
>        switch (TREE_CODE (exp))
>         {
>         case BIT_FIELD_REF:
> -         return NULL_TREE;
> +         byte_offset += (TREE_INT_CST_LOW (TREE_OPERAND (exp, 2))
> +                         / BITS_PER_UNIT);
> +         break;
>
>         case COMPONENT_REF:
>           {
>             tree field = TREE_OPERAND (exp, 1);
>             tree this_offset = component_ref_field_offset (exp);
>             HOST_WIDE_INT hthis_offset;
>
>             if (!this_offset
>                 || TREE_CODE (this_offset) != INTEGER_CST
>                 || (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
> Index: testsuite/gcc.dg/vect/bb-slp-31.c
> ===================================================================
> --- testsuite/gcc.dg/vect/bb-slp-31.c   (revision 0)
> +++ testsuite/gcc.dg/vect/bb-slp-31.c   (revision 0)
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_double } */
> +
> +typedef double vec __attribute__ ((vector_size (2 * sizeof (double))));
> +vec a;
> +
> +void f(){
> +  a[0]=1+2*a[0]*a[0];
> +  a[1]=1+2*a[1]*a[1];
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block vectorized using SLP" "slp" } }
> */
>
> Property changes on: testsuite/gcc.dg/vect/bb-slp-31.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2013-04-02 12:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-27 15:28 SLP for vectors Marc Glisse
2013-01-29 11:37 ` Richard Biener
2013-01-30 20:48   ` Marc Glisse
2013-01-31  9:14     ` Richard Biener
2013-03-30 16:14       ` Marc Glisse
2013-04-01 15:52         ` Marc Glisse
2013-04-02 13:29           ` Richard Biener
2013-04-02 13:31         ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).