From: "Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com>
To: gcc-patches@gcc.gnu.org
Cc: Richard Biener <rguenther@suse.de>,
Richard Sandiford <richard.sandiford@arm.com>,
"jakub@redhat.com" <jakub@redhat.com>
Subject: Re: [PATCH 5/8] vect: Use inbranch simdclones in masked loops
Date: Wed, 18 Oct 2023 15:41:08 +0100 [thread overview]
Message-ID: <e6753552-fb81-4cfc-a345-eaee2fe64ee6@arm.com> (raw)
In-Reply-To: <d96af71e-d8e8-0bff-d502-5e54768ac774@arm.com>
[-- Attachment #1: Type: text/plain, Size: 611 bytes --]
Rebased, needs review.
On 30/08/2023 10:13, Andre Vieira (lists) via Gcc-patches wrote:
> This patch enables the compiler to use inbranch simdclones when
> generating masked loops in autovectorization.
>
> gcc/ChangeLog:
>
> * omp-simd-clone.cc (simd_clone_adjust_argument_types): Make function
> compatible with mask parameters in clone.
> * tree-vect-stmts.cc (vect_convert): New helper function.
> (vect_build_all_ones_mask): Allow vector boolean typed masks.
> (vectorizable_simd_clone_call): Enable the use of masked clones in
> fully masked loops.
[-- Attachment #2: sve_simd_clones_5v2.patch --]
[-- Type: text/plain, Size: 13395 bytes --]
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
index a42643400ddcf10961633448b49d4caafb999f12..ef0b9b48c7212900023bc0eaebca5e1f9389db77 100644
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -807,8 +807,14 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
{
ipa_adjusted_param adj;
memset (&adj, 0, sizeof (adj));
- tree parm = args[i];
- tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
+ tree parm = NULL_TREE;
+ tree parm_type = NULL_TREE;
+ if(i < args.length())
+ {
+ parm = args[i];
+ parm_type = node->definition ? TREE_TYPE (parm) : parm;
+ }
+
adj.base_index = i;
adj.prev_clone_index = i;
@@ -1547,7 +1553,7 @@ simd_clone_adjust (struct cgraph_node *node)
mask = gimple_assign_lhs (g);
g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
BIT_AND_EXPR, mask,
- build_int_cst (TREE_TYPE (mask), 1));
+ build_one_cst (TREE_TYPE (mask)));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
mask = gimple_assign_lhs (g);
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 731acc76350cae39c899a866584068cff247183a..6e2c70c1d3970af652c1e50e41b144162884bf24 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1594,6 +1594,20 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
}
}
+/* Return SSA name of the result of the conversion of OPERAND into type TYPE.
+ The conversion statement is inserted at GSI. */
+
+static tree
+vect_convert (vec_info *vinfo, stmt_vec_info stmt_info, tree type, tree operand,
+ gimple_stmt_iterator *gsi)
+{
+ operand = build1 (VIEW_CONVERT_EXPR, type, operand);
+ gassign *new_stmt = gimple_build_assign (make_ssa_name (type),
+ operand);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ return gimple_get_lhs (new_stmt);
+}
+
/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
that needs to be applied to all loads and stores in a vectorized loop.
@@ -2547,7 +2561,8 @@ vect_build_all_ones_mask (vec_info *vinfo,
{
if (TREE_CODE (masktype) == INTEGER_TYPE)
return build_int_cst (masktype, -1);
- else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
+ else if (VECTOR_BOOLEAN_TYPE_P (masktype)
+ || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
{
tree mask = build_int_cst (TREE_TYPE (masktype), -1);
mask = build_vector_from_val (masktype, mask);
@@ -4156,7 +4171,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
size_t i, nargs;
tree lhs, rtype, ratype;
vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
- int arg_offset = 0;
+ int masked_call_offset = 0;
/* Is STMT a vectorizable call? */
gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
@@ -4171,7 +4186,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
fndecl = TREE_OPERAND (fndecl, 0);
gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
- arg_offset = 1;
+ masked_call_offset = 1;
}
if (fndecl == NULL_TREE)
return false;
@@ -4199,7 +4214,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
/* Process function arguments. */
- nargs = gimple_call_num_args (stmt) - arg_offset;
+ nargs = gimple_call_num_args (stmt) - masked_call_offset;
/* Bail out if the function has zero arguments. */
if (nargs == 0)
@@ -4221,7 +4236,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
thisarginfo.op = NULL_TREE;
thisarginfo.simd_lane_linear = false;
- int op_no = i + arg_offset;
+ int op_no = i + masked_call_offset;
if (slp_node)
op_no = vect_slp_child_index_for_operand (stmt, op_no);
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
@@ -4303,16 +4318,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
arginfo.quick_push (thisarginfo);
}
- if (loop_vinfo
- && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not considering SIMD clones; not yet supported"
- " for variable-width vectors.\n");
- return false;
- }
-
poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
unsigned int badness = 0;
@@ -4325,9 +4330,10 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
{
unsigned int this_badness = 0;
unsigned int num_calls;
- if (!constant_multiple_p (vf * group_size,
- n->simdclone->simdlen, &num_calls)
- || n->simdclone->nargs != nargs)
+ if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
+ &num_calls)
+ || (!n->simdclone->inbranch && (masked_call_offset > 0))
+ || nargs != n->simdclone->nargs)
continue;
if (num_calls != 1)
this_badness += exact_log2 (num_calls) * 4096;
@@ -4344,7 +4350,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_VECTOR:
if (!useless_type_conversion_p
(n->simdclone->args[i].orig_type,
- TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
+ TREE_TYPE (gimple_call_arg (stmt,
+ i + masked_call_offset))))
i = -1;
else if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
@@ -4392,6 +4399,17 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
if (i == (size_t) -1)
continue;
+ if (masked_call_offset == 0
+ && n->simdclone->inbranch
+ && n->simdclone->nargs > nargs)
+ {
+ gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
+ SIMD_CLONE_ARG_TYPE_MASK);
+ /* Penalize using a masked SIMD clone in a non-masked loop, that is
+ not in a branch, as we'd have to construct an all-true mask. */
+ if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ this_badness += 64;
+ }
if (bestn == NULL || this_badness < badness)
{
bestn = n;
@@ -4414,7 +4432,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|| arginfo[i].dt == vect_external_def)
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
- tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
+ tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
+ i + masked_call_offset));
arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
slp_node);
if (arginfo[i].vectype == NULL
@@ -4523,22 +4542,37 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
if (gimple_vuse (stmt) && slp_node)
vinfo->any_known_not_updated_vssa = true;
simd_clone_info.safe_push (bestn->decl);
- for (i = 0; i < nargs; i++)
- if ((bestn->simdclone->args[i].arg_type
- == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
- || (bestn->simdclone->args[i].arg_type
- == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
- {
- simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
- simd_clone_info.safe_push (arginfo[i].op);
- tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
- ? size_type_node : TREE_TYPE (arginfo[i].op);
- tree ls = build_int_cst (lst, arginfo[i].linear_step);
- simd_clone_info.safe_push (ls);
- tree sll = arginfo[i].simd_lane_linear
- ? boolean_true_node : boolean_false_node;
- simd_clone_info.safe_push (sll);
- }
+ for (i = 0; i < bestn->simdclone->nargs; i++)
+ {
+ switch (bestn->simdclone->args[i].arg_type)
+ {
+ default:
+ continue;
+ case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
+ case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
+ {
+ auto &clone_info = STMT_VINFO_SIMD_CLONE_INFO (stmt_info);
+ clone_info.safe_grow_cleared (i * 3 + 1, true);
+ clone_info.safe_push (arginfo[i].op);
+ tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
+ ? size_type_node : TREE_TYPE (arginfo[i].op);
+ tree ls = build_int_cst (lst, arginfo[i].linear_step);
+ clone_info.safe_push (ls);
+ tree sll = arginfo[i].simd_lane_linear
+ ? boolean_true_node : boolean_false_node;
+ clone_info.safe_push (sll);
+ }
+ break;
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+ vect_record_loop_mask (loop_vinfo,
+ &LOOP_VINFO_MASKS (loop_vinfo),
+ ncopies, vectype, op);
+
+ break;
+ }
+ }
if (!bestn->simdclone->inbranch && loop_vinfo)
{
@@ -4590,6 +4624,8 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
vec_oprnds.safe_grow_cleared (nargs, true);
for (j = 0; j < ncopies; ++j)
{
+ poly_uint64 callee_nelements;
+ poly_uint64 caller_nelements;
/* Build argument list for the vectorized call. */
if (j == 0)
vargs.create (nargs);
@@ -4600,8 +4636,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
{
unsigned int k, l, m, o;
tree atype;
- poly_uint64 callee_nelements, caller_nelements;
- op = gimple_call_arg (stmt, i + arg_offset);
+ op = gimple_call_arg (stmt, i + masked_call_offset);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
@@ -4680,16 +4715,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
if (k == 1)
if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
atype))
- {
- vec_oprnd0
- = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
- gassign *new_stmt
- = gimple_build_assign (make_ssa_name (atype),
- vec_oprnd0);
- vect_finish_stmt_generation (vinfo, stmt_info,
- new_stmt, gsi);
- vargs.safe_push (gimple_assign_lhs (new_stmt));
- }
+ vargs.safe_push (vect_convert (vinfo, stmt_info,
+ atype, vec_oprnd0,
+ gsi));
else
vargs.safe_push (vec_oprnd0);
else
@@ -4738,6 +4766,24 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
vec_oprnds_i[i] = 0;
}
vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+ if (loop_vinfo
+ && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ vec_loop_masks *loop_masks
+ = &LOOP_VINFO_MASKS (loop_vinfo);
+ tree loop_mask
+ = vect_get_loop_mask (loop_vinfo, gsi,
+ loop_masks, ncopies,
+ vectype, j);
+ vec_oprnd0
+ = prepare_vec_mask (loop_vinfo,
+ TREE_TYPE (loop_mask),
+ loop_mask, vec_oprnd0,
+ gsi);
+ loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
+ loop_mask });
+
+ }
vec_oprnd0
= build3 (VEC_COND_EXPR, atype, vec_oprnd0,
build_vector_from_val (atype, one),
@@ -4901,6 +4947,64 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
}
+ if (masked_call_offset == 0
+ && bestn->simdclone->inbranch
+ && bestn->simdclone->nargs > nargs)
+ {
+ unsigned long m, o;
+ size_t mask_i = bestn->simdclone->nargs - 1;
+ tree mask;
+ gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
+ SIMD_CLONE_ARG_TYPE_MASK);
+
+ tree masktype = bestn->simdclone->args[mask_i].vector_type;
+ callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
+ o = vector_unroll_factor (nunits, callee_nelements);
+ for (m = j * o; m < (j + 1) * o; m++)
+ {
+ if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
+ ncopies, vectype, j);
+ }
+ else
+ mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
+
+ if (!useless_type_conversion_p (TREE_TYPE (mask), masktype))
+ {
+ gassign *new_stmt;
+ if (bestn->simdclone->mask_mode != VOIDmode)
+ {
+ /* This means we are dealing with integer mask modes.
+ First convert to an integer type with the same size as
+ the current vector type. */
+ unsigned HOST_WIDE_INT intermediate_size
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
+ tree mid_int_type =
+ build_nonstandard_integer_type (intermediate_size, 1);
+ mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
+ new_stmt
+ = gimple_build_assign (make_ssa_name (mid_int_type),
+ mask);
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ /* Then zero-extend to the mask mode. */
+ mask = fold_build1 (NOP_EXPR, masktype,
+ gimple_get_lhs (new_stmt));
+ }
+ else
+ mask = build1 (VIEW_CONVERT_EXPR, masktype, mask);
+
+ new_stmt = gimple_build_assign (make_ssa_name (masktype),
+ mask);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ mask = gimple_assign_lhs (new_stmt);
+ }
+ vargs.safe_push (mask);
+ }
+ }
+
gcall *new_call = gimple_build_call_vec (fndecl, vargs);
if (vec_dest)
{
next prev parent reply other threads:[~2023-10-18 14:41 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-30 8:49 aarch64, vect, omp: Add SVE support for simd clones [PR 96342] Andre Vieira (lists)
2023-08-30 9:06 ` [PATCH 1/8] parloops: Copy target and optimizations when creating a function clone Andre Vieira (lists)
2023-08-30 12:31 ` Richard Biener
2023-10-18 14:40 ` Andre Vieira (lists)
2023-08-30 9:08 ` [Patch 2/8] parloops: Allow poly nit and bound Andre Vieira (lists)
2023-08-30 12:32 ` Richard Biener
2023-10-18 14:40 ` Andre Vieira (lists)
2023-08-30 9:10 ` [Patch 3/8] vect: Fix vect_get_smallest_scalar_type for simd clones Andre Vieira (lists)
2023-08-30 12:54 ` Richard Biener
2023-10-18 14:40 ` Andre Vieira (lists)
2023-10-19 12:07 ` Richard Biener
2023-08-30 9:11 ` [PATCH 4/8] vect: don't allow fully masked loops with non-masked simd clones [PR 110485] Andre Vieira (lists)
2023-08-30 12:54 ` Richard Biener
2023-10-18 14:40 ` Andre Vieira (lists)
2023-10-19 12:06 ` Richard Biener
2023-08-30 9:13 ` [PATCH 5/8] vect: Use inbranch simdclones in masked loops Andre Vieira (lists)
2023-10-18 14:41 ` Andre Vieira (lists) [this message]
2023-10-19 12:17 ` Richard Biener
2023-08-30 9:14 ` [PATCH 6/8] vect: Add vector_mode paramater to simd_clone_usable Andre Vieira (lists)
2023-08-30 9:17 ` Andre Vieira (lists)
2023-08-30 13:01 ` Richard Biener
2023-08-30 15:02 ` Andre Vieira (lists)
2023-08-31 6:39 ` Richard Biener
2023-09-28 15:57 ` Andre Vieira (lists)
2023-08-30 9:17 ` [PATCH7/8] vect: Add TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM Andre Vieira (lists)
2023-08-30 13:04 ` Richard Biener
2023-10-04 10:32 ` Andre Vieira (lists)
2023-10-04 10:41 ` Richard Biener
2023-10-04 12:40 ` Andre Vieira (lists)
2023-10-18 14:41 ` [PATCH6/8] omp: Reorder call for TARGET_SIMD_CLONE_ADJUST (was Re: [PATCH7/8] vect: Add TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM) Andre Vieira (lists)
2023-10-30 18:34 ` Andre Vieira (lists)
2023-10-31 7:59 ` Richard Biener
2023-12-08 10:35 ` Jakub Jelinek
2023-08-30 9:19 ` [PATCH 8/8] aarch64: Add SVE support for simd clones [PR 96342] Andre Vieira (lists)
2023-10-18 14:41 ` Andre Vieira (lists)
2023-11-29 17:01 ` Richard Sandiford
2023-12-01 16:39 ` Andre Vieira (lists)
2023-10-18 14:40 ` aarch64, vect, omp: " Andre Vieira (lists)
2023-10-18 14:41 ` [PATCH 0/8] omp: Replace simd_clone_subparts with TYPE_VECTOR_SUBPARTS Andre Vieira (lists)
2023-10-19 7:10 ` Richard Biener
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=e6753552-fb81-4cfc-a345-eaee2fe64ee6@arm.com \
--to=andre.simoesdiasvieira@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jakub@redhat.com \
--cc=rguenther@suse.de \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).