[PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction
@ 2023-07-14 23:45 juzhe.zhong
  2023-07-19  8:17 ` Richard Biener
  0 siblings, 1 reply; 3+ messages in thread
From: juzhe.zhong @ 2023-07-14 23:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: rguenther, richard.sandiford, Ju-Zhe Zhong

From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

Hi, Richard and Richi.

This patch adds mask_len_fold_left_plus pattern to support in-order floating-point
reduction for target support len loop control.

Consider this following case:
double
foo2 (double *__restrict a,
     double init,
     int *__restrict cond,
     int n)
{
    for (int i = 0; i < n; i++)
      if (cond[i])
        init += a[i];
    return init;
}

ARM SVE:

...
vec_mask_and_60 = loop_mask_54 & mask__23.33_57;
vect__ifc__35.37_64 = .VCOND_MASK (vec_mask_and_60, vect__8.36_61, { 0.0, ... });
_36 = .MASK_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, loop_mask_54);
...

For RVV, we want to see:
...
_36 = .MASK_LEN_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, control_mask, loop_len, bias);
...

gcc/ChangeLog:

        * doc/md.texi: Add mask_len_fold_left_plus.
        * internal-fn.cc (mask_len_fold_left_direct): Ditto.
        (expand_mask_len_fold_left_optab_fn): Ditto.
        (direct_mask_len_fold_left_optab_supported_p): Ditto.
        * internal-fn.def (MASK_LEN_FOLD_LEFT_PLUS): Ditto.
        * optabs.def (OPTAB_D): Ditto.

---
 gcc/doc/md.texi     | 13 +++++++++++++
 gcc/internal-fn.cc  |  5 +++++
 gcc/internal-fn.def |  3 +++
 gcc/optabs.def      |  1 +
 4 files changed, 22 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index cbcb992e5d7..6f44e66399d 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5615,6 +5615,19 @@ no reassociation.
 Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
 (operand 3) that specifies which elements of the source vector should be added.
 
+@cindex @code{mask_len_fold_left_plus_@var{m}} instruction pattern
+@item @code{mask_len_fold_left_plus_@var{m}}
+Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
+(operand 3), len operand (operand 4) and bias operand (operand 5) that
+performs following operations strictly in-order (no reassociation):
+
+@smallexample
+operand0 = operand1;
+for (i = 0; i < LEN + BIAS; i++)
+  if (operand3[i])
+    operand0 += operand2[i];
+@end smallexample
+
 @cindex @code{sdot_prod@var{m}} instruction pattern
 @item @samp{sdot_prod@var{m}}
 
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index e698f0bffc7..2bf4fc492fe 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -190,6 +190,7 @@ init_internal_fns ()
 #define fold_extract_direct { 2, 2, false }
 #define fold_left_direct { 1, 1, false }
 #define mask_fold_left_direct { 1, 1, false }
+#define mask_len_fold_left_direct { 1, 1, false }
 #define check_ptrs_direct { 0, 0, false }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
@@ -3890,6 +3891,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
 #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 3)
 
+#define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \
+  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
+
 #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 4)
 
@@ -3997,6 +4001,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
 #define direct_fold_left_optab_supported_p direct_optab_supported_p
 #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
+#define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
 #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
 #define direct_vec_set_optab_supported_p direct_optab_supported_p
 #define direct_vec_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index ea750a921ed..d3aec51b1f2 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -319,6 +319,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
 DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
 		       mask_fold_left_plus, mask_fold_left)
 
+DEF_INTERNAL_OPTAB_FN (MASK_LEN_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
+		       mask_len_fold_left_plus, mask_len_fold_left)
+
 /* Unary math functions.  */
 DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
 DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3dae228fba6..7023392979e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -385,6 +385,7 @@ OPTAB_D (reduc_ior_scal_optab,  "reduc_ior_scal_$a")
 OPTAB_D (reduc_xor_scal_optab,  "reduc_xor_scal_$a")
 OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
 OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
+OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
 
 OPTAB_D (extract_last_optab, "extract_last_$a")
 OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
-- 
2.36.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction
  2023-07-14 23:45 [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction juzhe.zhong
@ 2023-07-19  8:17 ` Richard Biener
  2023-07-19 13:37   ` Li, Pan2
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Biener @ 2023-07-19  8:17 UTC (permalink / raw)
  To: Ju-Zhe Zhong; +Cc: gcc-patches, richard.sandiford

On Sat, 15 Jul 2023, juzhe.zhong@rivai.ai wrote:

> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> 
> Hi, Richard and Richi.
> 
> This patch adds mask_len_fold_left_plus pattern to support in-order floating-point
> reduction for target support len loop control.
> 
> Consider this following case:
> double
> foo2 (double *__restrict a,
>      double init,
>      int *__restrict cond,
>      int n)
> {
>     for (int i = 0; i < n; i++)
>       if (cond[i])
>         init += a[i];
>     return init;
> }
> 
> ARM SVE:
> 
> ...
> vec_mask_and_60 = loop_mask_54 & mask__23.33_57;
> vect__ifc__35.37_64 = .VCOND_MASK (vec_mask_and_60, vect__8.36_61, { 0.0, ... });
> _36 = .MASK_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, loop_mask_54);
> ...
> 
> For RVV, we want to see:
> ...
> _36 = .MASK_LEN_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, control_mask, loop_len, bias);
> ...

OK.

Richard.

> gcc/ChangeLog:
> 
>         * doc/md.texi: Add mask_len_fold_left_plus.
>         * internal-fn.cc (mask_len_fold_left_direct): Ditto.
>         (expand_mask_len_fold_left_optab_fn): Ditto.
>         (direct_mask_len_fold_left_optab_supported_p): Ditto.
>         * internal-fn.def (MASK_LEN_FOLD_LEFT_PLUS): Ditto.
>         * optabs.def (OPTAB_D): Ditto.
> 
> ---
>  gcc/doc/md.texi     | 13 +++++++++++++
>  gcc/internal-fn.cc  |  5 +++++
>  gcc/internal-fn.def |  3 +++
>  gcc/optabs.def      |  1 +
>  4 files changed, 22 insertions(+)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index cbcb992e5d7..6f44e66399d 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5615,6 +5615,19 @@ no reassociation.
>  Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
>  (operand 3) that specifies which elements of the source vector should be added.
>  
> +@cindex @code{mask_len_fold_left_plus_@var{m}} instruction pattern
> +@item @code{mask_len_fold_left_plus_@var{m}}
> +Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
> +(operand 3), len operand (operand 4) and bias operand (operand 5) that
> +performs following operations strictly in-order (no reassociation):
> +
> +@smallexample
> +operand0 = operand1;
> +for (i = 0; i < LEN + BIAS; i++)
> +  if (operand3[i])
> +    operand0 += operand2[i];
> +@end smallexample
> +
>  @cindex @code{sdot_prod@var{m}} instruction pattern
>  @item @samp{sdot_prod@var{m}}
>  
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index e698f0bffc7..2bf4fc492fe 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -190,6 +190,7 @@ init_internal_fns ()
>  #define fold_extract_direct { 2, 2, false }
>  #define fold_left_direct { 1, 1, false }
>  #define mask_fold_left_direct { 1, 1, false }
> +#define mask_len_fold_left_direct { 1, 1, false }
>  #define check_ptrs_direct { 0, 0, false }
>  
>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
> @@ -3890,6 +3891,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
>  #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 3)
>  
> +#define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \
> +  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
> +
>  #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 4)
>  
> @@ -3997,6 +4001,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
>  #define direct_fold_extract_optab_supported_p direct_optab_supported_p
>  #define direct_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
> +#define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
>  #define direct_vec_set_optab_supported_p direct_optab_supported_p
>  #define direct_vec_extract_optab_supported_p direct_optab_supported_p
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index ea750a921ed..d3aec51b1f2 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -319,6 +319,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  		       mask_fold_left_plus, mask_fold_left)
>  
> +DEF_INTERNAL_OPTAB_FN (MASK_LEN_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
> +		       mask_len_fold_left_plus, mask_len_fold_left)
> +
>  /* Unary math functions.  */
>  DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
>  DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 3dae228fba6..7023392979e 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -385,6 +385,7 @@ OPTAB_D (reduc_ior_scal_optab,  "reduc_ior_scal_$a")
>  OPTAB_D (reduc_xor_scal_optab,  "reduc_xor_scal_$a")
>  OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
>  OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> +OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
>  
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction
  2023-07-19  8:17 ` Richard Biener
@ 2023-07-19 13:37   ` Li, Pan2
  0 siblings, 0 replies; 3+ messages in thread
From: Li, Pan2 @ 2023-07-19 13:37 UTC (permalink / raw)
  To: Richard Biener, Ju-Zhe Zhong; +Cc: gcc-patches, richard.sandiford

Committed as passed both the bootstrap and regression test, thanks Richard.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Richard Biener via Gcc-patches
Sent: Wednesday, July 19, 2023 4:17 PM
To: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com
Subject: Re: [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction

On Sat, 15 Jul 2023, juzhe.zhong@rivai.ai wrote:

> From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> 
> Hi, Richard and Richi.
> 
> This patch adds mask_len_fold_left_plus pattern to support in-order floating-point
> reduction for target support len loop control.
> 
> Consider this following case:
> double
> foo2 (double *__restrict a,
>      double init,
>      int *__restrict cond,
>      int n)
> {
>     for (int i = 0; i < n; i++)
>       if (cond[i])
>         init += a[i];
>     return init;
> }
> 
> ARM SVE:
> 
> ...
> vec_mask_and_60 = loop_mask_54 & mask__23.33_57;
> vect__ifc__35.37_64 = .VCOND_MASK (vec_mask_and_60, vect__8.36_61, { 0.0, ... });
> _36 = .MASK_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, loop_mask_54);
> ...
> 
> For RVV, we want to see:
> ...
> _36 = .MASK_LEN_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, control_mask, loop_len, bias);
> ...

OK.

Richard.

> gcc/ChangeLog:
> 
>         * doc/md.texi: Add mask_len_fold_left_plus.
>         * internal-fn.cc (mask_len_fold_left_direct): Ditto.
>         (expand_mask_len_fold_left_optab_fn): Ditto.
>         (direct_mask_len_fold_left_optab_supported_p): Ditto.
>         * internal-fn.def (MASK_LEN_FOLD_LEFT_PLUS): Ditto.
>         * optabs.def (OPTAB_D): Ditto.
> 
> ---
>  gcc/doc/md.texi     | 13 +++++++++++++
>  gcc/internal-fn.cc  |  5 +++++
>  gcc/internal-fn.def |  3 +++
>  gcc/optabs.def      |  1 +
>  4 files changed, 22 insertions(+)
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index cbcb992e5d7..6f44e66399d 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5615,6 +5615,19 @@ no reassociation.
>  Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
>  (operand 3) that specifies which elements of the source vector should be added.
>  
> +@cindex @code{mask_len_fold_left_plus_@var{m}} instruction pattern
> +@item @code{mask_len_fold_left_plus_@var{m}}
> +Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
> +(operand 3), len operand (operand 4) and bias operand (operand 5) that
> +performs following operations strictly in-order (no reassociation):
> +
> +@smallexample
> +operand0 = operand1;
> +for (i = 0; i < LEN + BIAS; i++)
> +  if (operand3[i])
> +    operand0 += operand2[i];
> +@end smallexample
> +
>  @cindex @code{sdot_prod@var{m}} instruction pattern
>  @item @samp{sdot_prod@var{m}}
>  
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index e698f0bffc7..2bf4fc492fe 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -190,6 +190,7 @@ init_internal_fns ()
>  #define fold_extract_direct { 2, 2, false }
>  #define fold_left_direct { 1, 1, false }
>  #define mask_fold_left_direct { 1, 1, false }
> +#define mask_len_fold_left_direct { 1, 1, false }
>  #define check_ptrs_direct { 0, 0, false }
>  
>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
> @@ -3890,6 +3891,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
>  #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 3)
>  
> +#define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \
> +  expand_direct_optab_fn (FN, STMT, OPTAB, 5)
> +
>  #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 4)
>  
> @@ -3997,6 +4001,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
>  #define direct_fold_extract_optab_supported_p direct_optab_supported_p
>  #define direct_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
> +#define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
>  #define direct_check_ptrs_optab_supported_p direct_optab_supported_p
>  #define direct_vec_set_optab_supported_p direct_optab_supported_p
>  #define direct_vec_extract_optab_supported_p direct_optab_supported_p
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index ea750a921ed..d3aec51b1f2 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -319,6 +319,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
>  		       mask_fold_left_plus, mask_fold_left)
>  
> +DEF_INTERNAL_OPTAB_FN (MASK_LEN_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
> +		       mask_len_fold_left_plus, mask_len_fold_left)
> +
>  /* Unary math functions.  */
>  DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
>  DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 3dae228fba6..7023392979e 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -385,6 +385,7 @@ OPTAB_D (reduc_ior_scal_optab,  "reduc_ior_scal_$a")
>  OPTAB_D (reduc_xor_scal_optab,  "reduc_xor_scal_$a")
>  OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
>  OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
> +OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
>  
>  OPTAB_D (extract_last_optab, "extract_last_$a")
>  OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-07-19 13:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-14 23:45 [PATCH] VECT: Add mask_len_fold_left_plus for in-order floating-point reduction juzhe.zhong
2023-07-19  8:17 ` Richard Biener
2023-07-19 13:37   ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).