public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
@ 2023-11-02  0:57 Juzhe-Zhong
  2023-11-02 11:11 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Juzhe-Zhong @ 2023-11-02  0:57 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, rguenther, Juzhe-Zhong

This patch fixes following FAILs for RVV:
FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts"

Bootstrap on X86 and regtest passed.

Tested on aarch64 passed.

Ok for trunk ?

        PR tree-optimization/111721

gcc/ChangeLog:

        * tree-vect-slp.cc (vect_get_and_check_slp_defs): Support SLP for dummy mask -1.
        * tree-vect-stmts.cc (vectorizable_load): Ditto.

---
 gcc/tree-vect-slp.cc   | 14 ++++++++++++--
 gcc/tree-vect-stmts.cc |  8 +++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 43d742e3c92..23ca0318e31 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -756,8 +756,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 	{
 	  tree type = TREE_TYPE (oprnd);
 	  dt = dts[i];
-	  if ((dt == vect_constant_def
-	       || dt == vect_external_def)
+	  if (dt == vect_external_def
 	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
 	      && (TREE_CODE (type) == BOOLEAN_TYPE
 		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
@@ -769,6 +768,17 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 				 "for variable-length SLP %T\n", oprnd);
 	      return -1;
 	    }
+	  if (dt == vect_constant_def
+	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
+	      && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "Build SLP failed: invalid type of def "
+				 "for variable-length SLP %T\n",
+				 oprnd);
+	      return -1;
+	    }
 
 	  /* For the swapping logic below force vect_reduction_def
 	     for the reduction op in a SLP reduction group.  */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6ce4868d3e1..6c47121e158 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
       mask_index = internal_fn_mask_index (ifn);
       if (mask_index >= 0 && slp_node)
 	mask_index = vect_slp_child_index_for_operand (call, mask_index);
+      slp_tree slp_op = NULL;
       if (mask_index >= 0
 	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
-				      &mask, NULL, &mask_dt, &mask_vectype))
+				      &mask, &slp_op, &mask_dt, &mask_vectype))
 	return false;
+      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
+	 MASK_VECTYPE.  */
+      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
+	  && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
+	gcc_unreachable ();
     }
 
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-- 
2.36.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
  2023-11-02  0:57 [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask Juzhe-Zhong
@ 2023-11-02 11:11 ` Richard Biener
  2023-11-02 11:55   ` juzhe.zhong
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2023-11-02 11:11 UTC (permalink / raw)
  To: Juzhe-Zhong; +Cc: gcc-patches, richard.sandiford

On Thu, 2 Nov 2023, Juzhe-Zhong wrote:

> This patch fixes following FAILs for RVV:
> FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
> FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts"
> 
> Bootstrap on X86 and regtest passed.
> 
> Tested on aarch64 passed.
> 
> Ok for trunk ?
> 
>         PR tree-optimization/111721
> 
> gcc/ChangeLog:
> 
>         * tree-vect-slp.cc (vect_get_and_check_slp_defs): Support SLP for dummy mask -1.
>         * tree-vect-stmts.cc (vectorizable_load): Ditto.
> 
> ---
>  gcc/tree-vect-slp.cc   | 14 ++++++++++++--
>  gcc/tree-vect-stmts.cc |  8 +++++++-
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 43d742e3c92..23ca0318e31 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -756,8 +756,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>  	{
>  	  tree type = TREE_TYPE (oprnd);
>  	  dt = dts[i];
> -	  if ((dt == vect_constant_def
> -	       || dt == vect_external_def)
> +	  if (dt == vect_external_def
>  	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
>  	      && (TREE_CODE (type) == BOOLEAN_TYPE
>  		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> @@ -769,6 +768,17 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>  				 "for variable-length SLP %T\n", oprnd);
>  	      return -1;
>  	    }
> +	  if (dt == vect_constant_def
> +	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> +	      && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
> +	    {
> +	      if (dump_enabled_p ())
> +		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +				 "Build SLP failed: invalid type of def "
> +				 "for variable-length SLP %T\n",
> +				 oprnd);
> +	      return -1;
> +	    }

I don't think that's quite correct.  can_duplicate_and_interleave_p
doesn't get enough info here and IIRC even materializing arbitrary
constants isn't possible with VLA vectors.  The very first thing
the function does is

  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, 
count);
  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
    return false;

but for masks that's not going to get us the correct vector type.
While I don't understand why we have that 'BOOLEAN_TYPE' special
case (maybe the intent was to identify 'mask' operands that way?),
we might want to require that we can materialize both all-zero
and all-ones constant 'mask's.  But then 'mask' operands should
be properly identified here.

Maybe we can also simply delay the check to the point we know
whether we're facing an uniform constant or not (note for 'first',
we cannot really special-case vect_constant_def as the second
SLP lane might demote that to vect_external_def).  It's always
a balance of whether to reject sth at SLP build time (possibly
allowing operand swapping to do magic) or to delay checks
to stmt analysis time.  That might also explain that you
do not see fallout of the "wrong" change (the later checking
will catch it anyway).

There's probably testsuite coverage for SVE here.

That said, a "correct" patch might be to simply change

              && (TREE_CODE (type) == BOOLEAN_TYPE
                  || !can_duplicate_and_interleave_p (vinfo, stmts.length 
(),
                                                      type)))

to

       && TREE_CODE (type) != BOOLEAN_TYPE
       && !can_duplicate_and_interleave_p (vinfo, stmts.length 
(),       
                                                      type)

thus delay 'mask' operand validation here.

Note I still think we should improve TREE_CODE (type) == BOOLEAN_TYPE
to identify internal function mask operands only.

Richard.


>  
>  	  /* For the swapping logic below force vect_reduction_def
>  	     for the reduction op in a SLP reduction group.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 6ce4868d3e1..6c47121e158 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
>        mask_index = internal_fn_mask_index (ifn);
>        if (mask_index >= 0 && slp_node)
>  	mask_index = vect_slp_child_index_for_operand (call, mask_index);
> +      slp_tree slp_op = NULL;
>        if (mask_index >= 0
>  	  && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> -				      &mask, NULL, &mask_dt, &mask_vectype))
> +				      &mask, &slp_op, &mask_dt, &mask_vectype))
>  	return false;
> +      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
> +	 MASK_VECTYPE.  */
> +      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
> +	  && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
> +	gcc_unreachable ();
>      }
>  
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Re: [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
  2023-11-02 11:11 ` Richard Biener
@ 2023-11-02 11:55   ` juzhe.zhong
  2023-11-02 13:14     ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: juzhe.zhong @ 2023-11-02 11:55 UTC (permalink / raw)
  To: rguenther; +Cc: gcc-patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 7259 bytes --]

Thanks Richi.

The following is the V2 patch:
Testing on X86 and aarch64 are running.

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 43d742e3c92..e7f7f976f11 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -760,7 +760,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
               || dt == vect_external_def)
              && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
              && (TREE_CODE (type) == BOOLEAN_TYPE
-                 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
+                 && !can_duplicate_and_interleave_p (vinfo, stmts.length (),
                                                      type)))
            {
              if (dump_enabled_p ())
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 6ce4868d3e1..6c47121e158 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
       mask_index = internal_fn_mask_index (ifn);
       if (mask_index >= 0 && slp_node)
        mask_index = vect_slp_child_index_for_operand (call, mask_index);
+      slp_tree slp_op = NULL;
       if (mask_index >= 0
          && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
-                                     &mask, NULL, &mask_dt, &mask_vectype))
+                                     &mask, &slp_op, &mask_dt, &mask_vectype))
        return false;
+      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
+        MASK_VECTYPE.  */
+      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
+         && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
+       gcc_unreachable ();
     }




juzhe.zhong@rivai.ai
 
From: Richard Biener
Date: 2023-11-02 19:11
To: Juzhe-Zhong
CC: gcc-patches; richard.sandiford
Subject: Re: [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
On Thu, 2 Nov 2023, Juzhe-Zhong wrote:
 
> This patch fixes following FAILs for RVV:
> FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
> FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts"
> 
> Bootstrap on X86 and regtest passed.
> 
> Tested on aarch64 passed.
> 
> Ok for trunk ?
> 
>         PR tree-optimization/111721
> 
> gcc/ChangeLog:
> 
>         * tree-vect-slp.cc (vect_get_and_check_slp_defs): Support SLP for dummy mask -1.
>         * tree-vect-stmts.cc (vectorizable_load): Ditto.
> 
> ---
>  gcc/tree-vect-slp.cc   | 14 ++++++++++++--
>  gcc/tree-vect-stmts.cc |  8 +++++++-
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 43d742e3c92..23ca0318e31 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -756,8 +756,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>  {
>    tree type = TREE_TYPE (oprnd);
>    dt = dts[i];
> -   if ((dt == vect_constant_def
> -        || dt == vect_external_def)
> +   if (dt == vect_external_def
>        && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
>        && (TREE_CODE (type) == BOOLEAN_TYPE
>    || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> @@ -769,6 +768,17 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>  "for variable-length SLP %T\n", oprnd);
>        return -1;
>      }
> +   if (dt == vect_constant_def
> +       && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> +       && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
> +     {
> +       if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "Build SLP failed: invalid type of def "
> + "for variable-length SLP %T\n",
> + oprnd);
> +       return -1;
> +     }
 
I don't think that's quite correct.  can_duplicate_and_interleave_p
doesn't get enough info here and IIRC even materializing arbitrary
constants isn't possible with VLA vectors.  The very first thing
the function does is
 
  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, 
count);
  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
    return false;
 
but for masks that's not going to get us the correct vector type.
While I don't understand why we have that 'BOOLEAN_TYPE' special
case (maybe the intent was to identify 'mask' operands that way?),
we might want to require that we can materialize both all-zero
and all-ones constant 'mask's.  But then 'mask' operands should
be properly identified here.
 
Maybe we can also simply delay the check to the point we know
whether we're facing an uniform constant or not (note for 'first',
we cannot really special-case vect_constant_def as the second
SLP lane might demote that to vect_external_def).  It's always
a balance of whether to reject sth at SLP build time (possibly
allowing operand swapping to do magic) or to delay checks
to stmt analysis time.  That might also explain that you
do not see fallout of the "wrong" change (the later checking
will catch it anyway).
 
There's probably testsuite coverage for SVE here.
 
That said, a "correct" patch might be to simply change
 
              && (TREE_CODE (type) == BOOLEAN_TYPE
                  || !can_duplicate_and_interleave_p (vinfo, stmts.length 
(),
                                                      type)))
 
to
 
       && TREE_CODE (type) != BOOLEAN_TYPE
       && !can_duplicate_and_interleave_p (vinfo, stmts.length 
(),       
                                                      type)
 
thus delay 'mask' operand validation here.
 
Note I still think we should improve TREE_CODE (type) == BOOLEAN_TYPE
to identify internal function mask operands only.
 
Richard.
 
 
>  
>    /* For the swapping logic below force vect_reduction_def
>       for the reduction op in a SLP reduction group.  */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 6ce4868d3e1..6c47121e158 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
>        mask_index = internal_fn_mask_index (ifn);
>        if (mask_index >= 0 && slp_node)
>  mask_index = vect_slp_child_index_for_operand (call, mask_index);
> +      slp_tree slp_op = NULL;
>        if (mask_index >= 0
>    && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> -       &mask, NULL, &mask_dt, &mask_vectype))
> +       &mask, &slp_op, &mask_dt, &mask_vectype))
>  return false;
> +      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
> + MASK_VECTYPE.  */
> +      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
> +   && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
> + gcc_unreachable ();
>      }
>  
>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> 
 
-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Re: [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
  2023-11-02 11:55   ` juzhe.zhong
@ 2023-11-02 13:14     ` Richard Biener
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2023-11-02 13:14 UTC (permalink / raw)
  To: juzhe.zhong; +Cc: gcc-patches, richard.sandiford

On Thu, 2 Nov 2023, juzhe.zhong@rivai.ai wrote:

> Thanks Richi.
> 
> The following is the V2 patch:
> Testing on X86 and aarch64 are running.
> 
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 43d742e3c92..e7f7f976f11 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -760,7 +760,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
>                || dt == vect_external_def)
>               && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
>               && (TREE_CODE (type) == BOOLEAN_TYPE
> -                 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> +                 && !can_duplicate_and_interleave_p (vinfo, stmts.length (),
>                                                       type)))

That's not what I wrote.  I wrote to let == BOOLEAN_TYPE pass without
check here, thus

 -             && (TREE_CODE (type) == BOOLEAN_TYPE
 +             && TREE_CODE (type) != BOOLEAN_TYPE
               && !can_duplicate...

>             {
>               if (dump_enabled_p ())
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 6ce4868d3e1..6c47121e158 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
>        mask_index = internal_fn_mask_index (ifn);
>        if (mask_index >= 0 && slp_node)
>         mask_index = vect_slp_child_index_for_operand (call, mask_index);
> +      slp_tree slp_op = NULL;
>        if (mask_index >= 0
>           && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> -                                     &mask, NULL, &mask_dt, &mask_vectype))
> +                                     &mask, &slp_op, &mask_dt, &mask_vectype))
>         return false;
> +      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
> +        MASK_VECTYPE.  */
> +      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
> +         && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
> +       gcc_unreachable ();

You shouldn't do this here.  Theres code in if (costing_p) that
would need to be updated if you (correctly) want to track slp_op here.

>      }
> 
> 
> 
> 
> juzhe.zhong@rivai.ai
>  
> From: Richard Biener
> Date: 2023-11-02 19:11
> To: Juzhe-Zhong
> CC: gcc-patches; richard.sandiford
> Subject: Re: [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask
> On Thu, 2 Nov 2023, Juzhe-Zhong wrote:
>  
> > This patch fixes following FAILs for RVV:
> > FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects  scan-tree-dump vect "Loop contains only SLP stmts"
> > FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts"
> > 
> > Bootstrap on X86 and regtest passed.
> > 
> > Tested on aarch64 passed.
> > 
> > Ok for trunk ?
> > 
> >         PR tree-optimization/111721
> > 
> > gcc/ChangeLog:
> > 
> >         * tree-vect-slp.cc (vect_get_and_check_slp_defs): Support SLP for dummy mask -1.
> >         * tree-vect-stmts.cc (vectorizable_load): Ditto.
> > 
> > ---
> >  gcc/tree-vect-slp.cc   | 14 ++++++++++++--
> >  gcc/tree-vect-stmts.cc |  8 +++++++-
> >  2 files changed, 19 insertions(+), 3 deletions(-)
> > 
> > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > index 43d742e3c92..23ca0318e31 100644
> > --- a/gcc/tree-vect-slp.cc
> > +++ b/gcc/tree-vect-slp.cc
> > @@ -756,8 +756,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
> >  {
> >    tree type = TREE_TYPE (oprnd);
> >    dt = dts[i];
> > -   if ((dt == vect_constant_def
> > -        || dt == vect_external_def)
> > +   if (dt == vect_external_def
> >        && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> >        && (TREE_CODE (type) == BOOLEAN_TYPE
> >    || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> > @@ -769,6 +768,17 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
> >  "for variable-length SLP %T\n", oprnd);
> >        return -1;
> >      }
> > +   if (dt == vect_constant_def
> > +       && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
> > +       && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
> > +     {
> > +       if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "Build SLP failed: invalid type of def "
> > + "for variable-length SLP %T\n",
> > + oprnd);
> > +       return -1;
> > +     }
>  
> I don't think that's quite correct.  can_duplicate_and_interleave_p
> doesn't get enough info here and IIRC even materializing arbitrary
> constants isn't possible with VLA vectors.  The very first thing
> the function does is
>  
>   tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, 
> count);
>   if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
>     return false;
>  
> but for masks that's not going to get us the correct vector type.
> While I don't understand why we have that 'BOOLEAN_TYPE' special
> case (maybe the intent was to identify 'mask' operands that way?),
> we might want to require that we can materialize both all-zero
> and all-ones constant 'mask's.  But then 'mask' operands should
> be properly identified here.
>  
> Maybe we can also simply delay the check to the point we know
> whether we're facing an uniform constant or not (note for 'first',
> we cannot really special-case vect_constant_def as the second
> SLP lane might demote that to vect_external_def).  It's always
> a balance of whether to reject sth at SLP build time (possibly
> allowing operand swapping to do magic) or to delay checks
> to stmt analysis time.  That might also explain that you
> do not see fallout of the "wrong" change (the later checking
> will catch it anyway).
>  
> There's probably testsuite coverage for SVE here.
>  
> That said, a "correct" patch might be to simply change
>  
>               && (TREE_CODE (type) == BOOLEAN_TYPE
>                   || !can_duplicate_and_interleave_p (vinfo, stmts.length 
> (),
>                                                       type)))
>  
> to
>  
>        && TREE_CODE (type) != BOOLEAN_TYPE
>        && !can_duplicate_and_interleave_p (vinfo, stmts.length 
> (),       
>                                                       type)
>  
> thus delay 'mask' operand validation here.
>  
> Note I still think we should improve TREE_CODE (type) == BOOLEAN_TYPE
> to identify internal function mask operands only.
>  
> Richard.
>  
>  
> >  
> >    /* For the swapping logic below force vect_reduction_def
> >       for the reduction op in a SLP reduction group.  */
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index 6ce4868d3e1..6c47121e158 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -9859,10 +9859,16 @@ vectorizable_load (vec_info *vinfo,
> >        mask_index = internal_fn_mask_index (ifn);
> >        if (mask_index >= 0 && slp_node)
> >  mask_index = vect_slp_child_index_for_operand (call, mask_index);
> > +      slp_tree slp_op = NULL;
> >        if (mask_index >= 0
> >    && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> > -       &mask, NULL, &mask_dt, &mask_vectype))
> > +       &mask, &slp_op, &mask_dt, &mask_vectype))
> >  return false;
> > +      /* MASK_LEN_GATHER_LOAD dummy mask -1 should always match the
> > + MASK_VECTYPE.  */
> > +      if (mask_index >= 0 && slp_node && mask_dt == vect_constant_def
> > +   && !vect_maybe_update_slp_op_vectype (slp_op, mask_vectype))
> > + gcc_unreachable ();
> >      }
> >  
> >    tree vectype = STMT_VINFO_VECTYPE (stmt_info);
> > 
>  
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-11-02 13:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-02  0:57 [tree-optimization/111721] VECT: Support SLP for MASK_LEN_GATHER_LOAD with dummy mask Juzhe-Zhong
2023-11-02 11:11 ` Richard Biener
2023-11-02 11:55   ` juzhe.zhong
2023-11-02 13:14     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).