* Re: [PATCH] tree-optimization/111115 - SLP of masked stores
[not found] <d8d44ad7-52b0-4d07-ad19-84acf702f19b@DBAEUR03FT055.eop-EUR03.prod.protection.outlook.com>
@ 2023-08-24 9:38 ` Richard Sandiford
2023-08-24 12:18 ` Robin Dapp
0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2023-08-24 9:38 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
Richard Biener <rguenther@suse.de> writes:
> The following adds the capability to do SLP on .MASK_STORE, I do not
> plan to add interleaving support.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
LGTM, thanks.
Richard
> Thanks,
> Richard.
>
> PR tree-optimization/111115
> gcc/
> * tree-vectorizer.h (vect_slp_child_index_for_operand): New.
> * tree-vect-data-refs.cc (can_group_stmts_p): Also group
> .MASK_STORE.
> * tree-vect-slp.cc (arg3_arg2_map): New.
> (vect_get_operand_map): Handle IFN_MASK_STORE.
> (vect_slp_child_index_for_operand): New function.
> (vect_build_slp_tree_1): Handle statements with no LHS,
> masked store ifns.
> (vect_remove_slp_scalar_calls): Likewise.
> * tree-vect-stmts.c (vect_check_store_rhs): Lookup the
> SLP child corresponding to the ifn value index.
> (vectorizable_store): Likewise for the mask index. Support
> masked stores.
> (vectorizable_load): Lookup the SLP child corresponding to the
> ifn mask index.
>
> gcc/testsuite/
> * lib/target-supports.exp (check_effective_target_vect_masked_store):
> Supported with check_avx_available.
> * gcc.dg/vect/slp-mask-store-1.c: New testcase.
> ---
> gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 39 +++++++++++++++++
> gcc/testsuite/lib/target-supports.exp | 3 +-
> gcc/tree-vect-data-refs.cc | 3 +-
> gcc/tree-vect-slp.cc | 46 +++++++++++++++++---
> gcc/tree-vect-stmts.cc | 23 +++++-----
> gcc/tree-vectorizer.h | 1 +
> 6 files changed, 94 insertions(+), 21 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
> new file mode 100644
> index 00000000000..50b7066778e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
> @@ -0,0 +1,39 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-mavx2" { target avx2 } } */
> +
> +#include "tree-vect.h"
> +
> +void __attribute__((noipa))
> +foo (unsigned * __restrict x, int * __restrict flag)
> +{
> + for (int i = 0; i < 32; ++i)
> + {
> + if (flag[2*i+0])
> + x[2*i+0] = x[2*i+0] + 3;
> + if (flag[2*i+1])
> + x[2*i+1] = x[2*i+1] + 177;
> + }
> +}
> +
> +unsigned x[16];
> +int flag[32] = { 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
> +unsigned res[16] = { 3, 177, 0, 0, 0, 177, 3, 0, 3, 177, 0, 0, 0, 177, 3, 0 };
> +
> +int
> +main ()
> +{
> + check_vect ();
> +
> + foo (x, flag);
> +
> + if (__builtin_memcmp (x, res, sizeof (x)) != 0)
> + abort ();
> + for (int i = 0; i < 32; ++i)
> + if (flag[i] != 0 && flag[i] != 1)
> + abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { vect_masked_store && vect_masked_load } } } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index d4623ee6b45..d353cc0aaf0 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -8400,7 +8400,8 @@ proc check_effective_target_vect_masked_load { } {
> # Return 1 if the target supports vector masked stores.
>
> proc check_effective_target_vect_masked_store { } {
> - return [expr { [check_effective_target_aarch64_sve]
> + return [expr { [check_avx_available]
> + || [check_effective_target_aarch64_sve]
> || [istarget amdgcn*-*-*] }]
> }
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 3e9a284666c..a2caf6cb1c7 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -3048,8 +3048,7 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
> like those created by build_mask_conversion. */
> tree mask1 = gimple_call_arg (call1, 2);
> tree mask2 = gimple_call_arg (call2, 2);
> - if (!operand_equal_p (mask1, mask2, 0)
> - && (ifn == IFN_MASK_STORE || !allow_slp_p))
> + if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
> {
> mask1 = strip_conversion (mask1);
> if (!mask1)
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index b5f9333fc22..cc799b6ebcd 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -503,6 +503,7 @@ static const int cond_expr_maps[3][5] = {
> static const int arg1_map[] = { 1, 1 };
> static const int arg2_map[] = { 1, 2 };
> static const int arg1_arg4_map[] = { 2, 1, 4 };
> +static const int arg3_arg2_map[] = { 2, 3, 2 };
> static const int op1_op0_map[] = { 2, 1, 0 };
>
> /* For most SLP statements, there is a one-to-one mapping between
> @@ -543,6 +544,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
> case IFN_MASK_GATHER_LOAD:
> return arg1_arg4_map;
>
> + case IFN_MASK_STORE:
> + return arg3_arg2_map;
> +
> default:
> break;
> }
> @@ -550,6 +554,20 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
> return nullptr;
> }
>
> +/* Return the SLP node child index for operand OP of STMT. */
> +
> +int
> +vect_slp_child_index_for_operand (const gimple *stmt, int op)
> +{
> + const int *opmap = vect_get_operand_map (stmt);
> + if (!opmap)
> + return op;
> + for (int i = 1; i < 1 + opmap[0]; ++i)
> + if (opmap[i] == op)
> + return i - 1;
> + gcc_unreachable ();
> +}
> +
> /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
> they are of a valid type and that they match the defs of the first stmt of
> the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
> @@ -1003,8 +1021,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
> return false;
> }
>
> + gcall *call_stmt = dyn_cast <gcall *> (stmt);
> lhs = gimple_get_lhs (stmt);
> - if (lhs == NULL_TREE)
> + if (lhs == NULL_TREE
> + && (!call_stmt
> + || !gimple_call_internal_p (stmt)
> + || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
> {
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -1041,7 +1063,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
>
> gcc_assert (vectype);
>
> - gcall *call_stmt = dyn_cast <gcall *> (stmt);
> if (call_stmt)
> {
> combined_fn cfn = gimple_call_combined_fn (call_stmt);
> @@ -1054,6 +1075,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
> || cfn == CFN_GATHER_LOAD
> || cfn == CFN_MASK_GATHER_LOAD)
> load_p = true;
> + else if (cfn == CFN_MASK_STORE)
> + rhs_code = CFN_MASK_STORE;
> else if ((internal_fn_p (cfn)
> && !vectorizable_internal_fn_p (as_internal_fn (cfn)))
> || gimple_call_tail_p (call_stmt)
> @@ -1212,7 +1235,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
> continue;
> }
>
> - if (call_stmt && first_stmt_code != CFN_MASK_LOAD)
> + if (call_stmt
> + && first_stmt_code != CFN_MASK_LOAD
> + && first_stmt_code != CFN_MASK_STORE)
> {
> if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
> call_stmt))
> @@ -1266,9 +1291,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
> /* Grouped store or load. */
> if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> {
> - if (REFERENCE_CLASS_P (lhs))
> + if (!load_p)
> {
> /* Store. */
> + gcc_assert (rhs_code == CFN_MASK_STORE
> + || REFERENCE_CLASS_P (lhs));
> ;
> }
> else
> @@ -9090,10 +9117,17 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
> || !PURE_SLP_STMT (stmt_info))
> continue;
> lhs = gimple_call_lhs (stmt);
> - new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> + if (lhs)
> + new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
> + else
> + {
> + new_stmt = gimple_build_nop ();
> + unlink_stmt_vdef (stmt_info->stmt);
> + }
> gsi = gsi_for_stmt (stmt);
> vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
> - SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
> + if (lhs)
> + SSA_NAME_DEF_STMT (lhs) = new_stmt;
> }
> }
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 413a88750d6..31b73b08e62 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -2629,12 +2629,14 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
> return false;
> }
>
> - unsigned op_no = 0;
> + int op_no = 0;
> if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
> {
> if (gimple_call_internal_p (call)
> && internal_store_fn_p (gimple_call_internal_fn (call)))
> op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
> + if (slp_node)
> + op_no = vect_slp_child_index_for_operand (call, op_no);
> }
>
> enum vect_def_type rhs_dt;
> @@ -8244,15 +8246,9 @@ vectorizable_store (vec_info *vinfo,
> if (!internal_store_fn_p (ifn))
> return false;
>
> - if (slp_node != NULL)
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "SLP of masked stores not supported.\n");
> - return false;
> - }
> -
> int mask_index = internal_fn_mask_index (ifn);
> + if (mask_index >= 0 && slp_node)
> + mask_index = vect_slp_child_index_for_operand (call, mask_index);
> if (mask_index >= 0
> && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> &mask, NULL, &mask_dt, &mask_vectype))
> @@ -9093,8 +9089,10 @@ vectorizable_store (vec_info *vinfo,
> {
> /* Get vectorized arguments for SLP_NODE. */
> vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
> - &vec_oprnds);
> + &vec_oprnds, mask, &vec_masks);
> vec_oprnd = vec_oprnds[0];
> + if (mask)
> + vec_mask = vec_masks[0];
> }
> else
> {
> @@ -9191,6 +9189,8 @@ vectorizable_store (vec_info *vinfo,
> final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
> vec_num * ncopies, vectype,
> vec_num * j + i);
> + if (slp && vec_mask)
> + vec_mask = vec_masks[i];
> if (vec_mask)
> final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
> vec_mask, gsi);
> @@ -9575,9 +9575,8 @@ vectorizable_load (vec_info *vinfo,
> return false;
>
> mask_index = internal_fn_mask_index (ifn);
> - /* ??? For SLP the mask operand is always last. */
> if (mask_index >= 0 && slp_node)
> - mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
> + mask_index = vect_slp_child_index_for_operand (call, mask_index);
> if (mask_index >= 0
> && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
> &mask, NULL, &mask_dt, &mask_vectype))
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 53a3d78d545..f1d0cd79961 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2429,6 +2429,7 @@ extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
> extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
> extern void vect_free_slp_tree (slp_tree);
> extern bool compatible_calls_p (gcall *, gcall *);
> +extern int vect_slp_child_index_for_operand (const gimple *, int op);
>
> /* In tree-vect-patterns.cc. */
> extern void
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] tree-optimization/111115 - SLP of masked stores
2023-08-24 9:38 ` [PATCH] tree-optimization/111115 - SLP of masked stores Richard Sandiford
@ 2023-08-24 12:18 ` Robin Dapp
2023-08-24 12:43 ` Richard Biener
0 siblings, 1 reply; 4+ messages in thread
From: Robin Dapp @ 2023-08-24 12:18 UTC (permalink / raw)
To: Richard Biener, gcc-patches, richard.sandiford; +Cc: rdapp.gcc
This causes an ICE in
gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load-11.c
(internal compiler error: in get_group_load_store_type, at tree-vect-stmts.cc:2121)
#include <stdint-gcc.h>
#define TEST_LOOP(DATA_TYPE, INDEX_TYPE) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE##_##INDEX_TYPE (DATA_TYPE *restrict y, DATA_TYPE *restrict x, \
INDEX_TYPE *restrict index, \
INDEX_TYPE *restrict cond) \
{ \
for (int i = 0; i < 100; ++i) \
{ \
if (cond[i * 2]) \
y[i * 2] = x[index[i * 2]] + 1; \
if (cond[i * 2 + 1]) \
y[i * 2 + 1] = x[index[i * 2 + 1]] + 2; \
} \
}
TEST_LOOP (int8_t, int8_t)
Is there now a mismatch with the LEN_ IFNs somewhere?
Regards
Robin
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] tree-optimization/111115 - SLP of masked stores
2023-08-24 12:18 ` Robin Dapp
@ 2023-08-24 12:43 ` Richard Biener
0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2023-08-24 12:43 UTC (permalink / raw)
To: Robin Dapp; +Cc: gcc-patches, richard.sandiford
On Thu, 24 Aug 2023, Robin Dapp wrote:
> This causes an ICE in
> gcc.target/riscv/rvv/autovec/gather-scatter/mask_gather_load-11.c
> (internal compiler error: in get_group_load_store_type, at tree-vect-stmts.cc:2121)
>
> #include <stdint-gcc.h>
>
> #define TEST_LOOP(DATA_TYPE, INDEX_TYPE) \
> void __attribute__ ((noinline, noclone)) \
> f_##DATA_TYPE##_##INDEX_TYPE (DATA_TYPE *restrict y, DATA_TYPE *restrict x, \
> INDEX_TYPE *restrict index, \
> INDEX_TYPE *restrict cond) \
> { \
> for (int i = 0; i < 100; ++i) \
> { \
> if (cond[i * 2]) \
> y[i * 2] = x[index[i * 2]] + 1; \
> if (cond[i * 2 + 1]) \
> y[i * 2 + 1] = x[index[i * 2 + 1]] + 2; \
> } \
> }
>
> TEST_LOOP (int8_t, int8_t)
>
> Is there now a mismatch with the LEN_ IFNs somewhere?
Can you open a bugreport, produce a preprocessed testcase and state
the cc1 commandline to debug this in a cross?
Richard.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] tree-optimization/111115 - SLP of masked stores
@ 2023-08-23 13:24 Richard Biener
0 siblings, 0 replies; 4+ messages in thread
From: Richard Biener @ 2023-08-23 13:24 UTC (permalink / raw)
To: gcc-patches; +Cc: richard.sandiford
The following adds the capability to do SLP on .MASK_STORE, I do not
plan to add interleaving support.
Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
Thanks,
Richard.
PR tree-optimization/111115
gcc/
* tree-vectorizer.h (vect_slp_child_index_for_operand): New.
* tree-vect-data-refs.cc (can_group_stmts_p): Also group
.MASK_STORE.
* tree-vect-slp.cc (arg3_arg2_map): New.
(vect_get_operand_map): Handle IFN_MASK_STORE.
(vect_slp_child_index_for_operand): New function.
(vect_build_slp_tree_1): Handle statements with no LHS,
masked store ifns.
(vect_remove_slp_scalar_calls): Likewise.
* tree-vect-stmts.c (vect_check_store_rhs): Lookup the
SLP child corresponding to the ifn value index.
(vectorizable_store): Likewise for the mask index. Support
masked stores.
(vectorizable_load): Lookup the SLP child corresponding to the
ifn mask index.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_vect_masked_store):
Supported with check_avx_available.
* gcc.dg/vect/slp-mask-store-1.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c | 39 +++++++++++++++++
gcc/testsuite/lib/target-supports.exp | 3 +-
gcc/tree-vect-data-refs.cc | 3 +-
gcc/tree-vect-slp.cc | 46 +++++++++++++++++---
gcc/tree-vect-stmts.cc | 23 +++++-----
gcc/tree-vectorizer.h | 1 +
6 files changed, 94 insertions(+), 21 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
new file mode 100644
index 00000000000..50b7066778e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-additional-options "-mavx2" { target avx2 } } */
+
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+foo (unsigned * __restrict x, int * __restrict flag)
+{
+ for (int i = 0; i < 32; ++i)
+ {
+ if (flag[2*i+0])
+ x[2*i+0] = x[2*i+0] + 3;
+ if (flag[2*i+1])
+ x[2*i+1] = x[2*i+1] + 177;
+ }
+}
+
+unsigned x[16];
+int flag[32] = { 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+unsigned res[16] = { 3, 177, 0, 0, 0, 177, 3, 0, 3, 177, 0, 0, 0, 177, 3, 0 };
+
+int
+main ()
+{
+ check_vect ();
+
+ foo (x, flag);
+
+ if (__builtin_memcmp (x, res, sizeof (x)) != 0)
+ abort ();
+ for (int i = 0; i < 32; ++i)
+ if (flag[i] != 0 && flag[i] != 1)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { vect_masked_store && vect_masked_load } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index d4623ee6b45..d353cc0aaf0 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8400,7 +8400,8 @@ proc check_effective_target_vect_masked_load { } {
# Return 1 if the target supports vector masked stores.
proc check_effective_target_vect_masked_store { } {
- return [expr { [check_effective_target_aarch64_sve]
+ return [expr { [check_avx_available]
+ || [check_effective_target_aarch64_sve]
|| [istarget amdgcn*-*-*] }]
}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 3e9a284666c..a2caf6cb1c7 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3048,8 +3048,7 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
like those created by build_mask_conversion. */
tree mask1 = gimple_call_arg (call1, 2);
tree mask2 = gimple_call_arg (call2, 2);
- if (!operand_equal_p (mask1, mask2, 0)
- && (ifn == IFN_MASK_STORE || !allow_slp_p))
+ if (!operand_equal_p (mask1, mask2, 0) && !allow_slp_p)
{
mask1 = strip_conversion (mask1);
if (!mask1)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index b5f9333fc22..cc799b6ebcd 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -503,6 +503,7 @@ static const int cond_expr_maps[3][5] = {
static const int arg1_map[] = { 1, 1 };
static const int arg2_map[] = { 1, 2 };
static const int arg1_arg4_map[] = { 2, 1, 4 };
+static const int arg3_arg2_map[] = { 2, 3, 2 };
static const int op1_op0_map[] = { 2, 1, 0 };
/* For most SLP statements, there is a one-to-one mapping between
@@ -543,6 +544,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
case IFN_MASK_GATHER_LOAD:
return arg1_arg4_map;
+ case IFN_MASK_STORE:
+ return arg3_arg2_map;
+
default:
break;
}
@@ -550,6 +554,20 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0)
return nullptr;
}
+/* Return the SLP node child index for operand OP of STMT. */
+
+int
+vect_slp_child_index_for_operand (const gimple *stmt, int op)
+{
+ const int *opmap = vect_get_operand_map (stmt);
+ if (!opmap)
+ return op;
+ for (int i = 1; i < 1 + opmap[0]; ++i)
+ if (opmap[i] == op)
+ return i - 1;
+ gcc_unreachable ();
+}
+
/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
they are of a valid type and that they match the defs of the first stmt of
the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
@@ -1003,8 +1021,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
return false;
}
+ gcall *call_stmt = dyn_cast <gcall *> (stmt);
lhs = gimple_get_lhs (stmt);
- if (lhs == NULL_TREE)
+ if (lhs == NULL_TREE
+ && (!call_stmt
+ || !gimple_call_internal_p (stmt)
+ || !internal_store_fn_p (gimple_call_internal_fn (stmt))))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1041,7 +1063,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
gcc_assert (vectype);
- gcall *call_stmt = dyn_cast <gcall *> (stmt);
if (call_stmt)
{
combined_fn cfn = gimple_call_combined_fn (call_stmt);
@@ -1054,6 +1075,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|| cfn == CFN_GATHER_LOAD
|| cfn == CFN_MASK_GATHER_LOAD)
load_p = true;
+ else if (cfn == CFN_MASK_STORE)
+ rhs_code = CFN_MASK_STORE;
else if ((internal_fn_p (cfn)
&& !vectorizable_internal_fn_p (as_internal_fn (cfn)))
|| gimple_call_tail_p (call_stmt)
@@ -1212,7 +1235,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
continue;
}
- if (call_stmt && first_stmt_code != CFN_MASK_LOAD)
+ if (call_stmt
+ && first_stmt_code != CFN_MASK_LOAD
+ && first_stmt_code != CFN_MASK_STORE)
{
if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
call_stmt))
@@ -1266,9 +1291,11 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
/* Grouped store or load. */
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
- if (REFERENCE_CLASS_P (lhs))
+ if (!load_p)
{
/* Store. */
+ gcc_assert (rhs_code == CFN_MASK_STORE
+ || REFERENCE_CLASS_P (lhs));
;
}
else
@@ -9090,10 +9117,17 @@ vect_remove_slp_scalar_calls (vec_info *vinfo,
|| !PURE_SLP_STMT (stmt_info))
continue;
lhs = gimple_call_lhs (stmt);
- new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
+ if (lhs)
+ new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
+ else
+ {
+ new_stmt = gimple_build_nop ();
+ unlink_stmt_vdef (stmt_info->stmt);
+ }
gsi = gsi_for_stmt (stmt);
vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
- SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
+ if (lhs)
+ SSA_NAME_DEF_STMT (lhs) = new_stmt;
}
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 413a88750d6..31b73b08e62 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2629,12 +2629,14 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
}
- unsigned op_no = 0;
+ int op_no = 0;
if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
{
if (gimple_call_internal_p (call)
&& internal_store_fn_p (gimple_call_internal_fn (call)))
op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
+ if (slp_node)
+ op_no = vect_slp_child_index_for_operand (call, op_no);
}
enum vect_def_type rhs_dt;
@@ -8244,15 +8246,9 @@ vectorizable_store (vec_info *vinfo,
if (!internal_store_fn_p (ifn))
return false;
- if (slp_node != NULL)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP of masked stores not supported.\n");
- return false;
- }
-
int mask_index = internal_fn_mask_index (ifn);
+ if (mask_index >= 0 && slp_node)
+ mask_index = vect_slp_child_index_for_operand (call, mask_index);
if (mask_index >= 0
&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
&mask, NULL, &mask_dt, &mask_vectype))
@@ -9093,8 +9089,10 @@ vectorizable_store (vec_info *vinfo,
{
/* Get vectorized arguments for SLP_NODE. */
vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
- &vec_oprnds);
+ &vec_oprnds, mask, &vec_masks);
vec_oprnd = vec_oprnds[0];
+ if (mask)
+ vec_mask = vec_masks[0];
}
else
{
@@ -9191,6 +9189,8 @@ vectorizable_store (vec_info *vinfo,
final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
vec_num * ncopies, vectype,
vec_num * j + i);
+ if (slp && vec_mask)
+ vec_mask = vec_masks[i];
if (vec_mask)
final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
vec_mask, gsi);
@@ -9575,9 +9575,8 @@ vectorizable_load (vec_info *vinfo,
return false;
mask_index = internal_fn_mask_index (ifn);
- /* ??? For SLP the mask operand is always last. */
if (mask_index >= 0 && slp_node)
- mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
+ mask_index = vect_slp_child_index_for_operand (call, mask_index);
if (mask_index >= 0
&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
&mask, NULL, &mask_dt, &mask_vectype))
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 53a3d78d545..f1d0cd79961 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2429,6 +2429,7 @@ extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
extern void vect_free_slp_tree (slp_tree);
extern bool compatible_calls_p (gcall *, gcall *);
+extern int vect_slp_child_index_for_operand (const gimple *, int op);
/* In tree-vect-patterns.cc. */
extern void
--
2.35.3
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-08-24 12:43 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <d8d44ad7-52b0-4d07-ad19-84acf702f19b@DBAEUR03FT055.eop-EUR03.prod.protection.outlook.com>
2023-08-24 9:38 ` [PATCH] tree-optimization/111115 - SLP of masked stores Richard Sandiford
2023-08-24 12:18 ` Robin Dapp
2023-08-24 12:43 ` Richard Biener
2023-08-23 13:24 Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).