From: Richard Sandiford <richard.sandiford@arm.com>
To: Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com>
Cc: <gcc-patches@gcc.gnu.org>, <richard.guenther@gmail.com>
Subject: Re: [PATCH 1/2] Mid engine setup [SU]ABDL
Date: Mon, 26 Jun 2023 21:50:08 +0100 [thread overview]
Message-ID: <mptttuu9ce7.fsf@arm.com> (raw)
In-Reply-To: <20230626153423.42763-1-oluwatamilore.adebayo@arm.com> (Oluwatamilore Adebayo's message of "Mon, 26 Jun 2023 16:34:23 +0100")
Thanks for doing this. Generally looks good, but some comments below.
Oluwatamilore Adebayo <oluwatamilore.adebayo@arm.com> writes:
> From: oluade01 <oluwatamilore.adebayo@arm.com>
>
> This updates vect_recog_abd_pattern to recognize the widening
> variant of absolute difference (ABDL, ABDL2).
>
> gcc/ChangeLog:
>
> * internal-fn.cc (widening_fn_p, decomposes_to_hilo_fn_p):
> Add IFN_VEC_WIDEN_ABD to the switch statement.
> * internal-fn.def (VEC_WIDEN_ABD): New internal hilo optab.
> * optabs.def (vec_widen_sabd_optab,
> vec_widen_sabd_hi_optab, vec_widen_sabd_lo_optab,
> vec_widen_sabd_odd_even, vec_widen_sabd_even_optab,
> vec_widen_uabd_optab,
> vec_widen_uabd_hi_optab, vec_widen_uabd_lo_optab,
> vec_widen_uabd_odd_even, vec_widen_uabd_even_optab):
> New optabs.
> * tree-vect-patterns.cc (vect_recog_abd_pattern): Update to
> to build a VEC_WIDEN_ABD call if the input precision is smaller
> than the precision of the output.
> (vect_recog_widen_abd_pattern): Should an ABD expression be
> found preceeding an extension, replace the two with a
> VEC_WIDEN_ABD.
> ---
> gcc/internal-fn.def | 5 ++
> gcc/optabs.def | 10 +++
> gcc/tree-vect-patterns.cc | 174 ++++++++++++++++++++++++++++++++------
> 3 files changed, 161 insertions(+), 28 deletions(-)
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 116965f4830cec8f60642ff011a86b6562e2c509..d67274d68b49943a88c531e903fd03b42343ab97 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -352,6 +352,11 @@ DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS,
> first,
> vec_widen_ssub, vec_widen_usub,
> binary)
> +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_ABD,
> + ECF_CONST | ECF_NOTHROW,
> + first,
> + vec_widen_sabd, vec_widen_uabd,
> + binary)
> DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary)
> DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary)
>
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 35b835a6ac56d72417dac8ddfd77a8a7e2475e65..68dfa1550f791a2fe833012157601ecfa68f1e09 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -418,6 +418,11 @@ OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a")
> OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a")
> OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a")
> OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a")
> +OPTAB_D (vec_widen_sabd_optab, "vec_widen_sabd_$a")
> +OPTAB_D (vec_widen_sabd_hi_optab, "vec_widen_sabd_hi_$a")
> +OPTAB_D (vec_widen_sabd_lo_optab, "vec_widen_sabd_lo_$a")
> +OPTAB_D (vec_widen_sabd_odd_optab, "vec_widen_sabd_odd_$a")
> +OPTAB_D (vec_widen_sabd_even_optab, "vec_widen_sabd_even_$a")
> OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a")
> OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a")
> OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a")
> @@ -436,6 +441,11 @@ OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a")
> OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a")
> OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a")
> OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a")
> +OPTAB_D (vec_widen_uabd_optab, "vec_widen_uabd_$a")
> +OPTAB_D (vec_widen_uabd_hi_optab, "vec_widen_uabd_hi_$a")
> +OPTAB_D (vec_widen_uabd_lo_optab, "vec_widen_uabd_lo_$a")
> +OPTAB_D (vec_widen_uabd_odd_optab, "vec_widen_uabd_odd_$a")
> +OPTAB_D (vec_widen_uabd_even_optab, "vec_widen_uabd_even_$a")
> OPTAB_D (vec_addsub_optab, "vec_addsub$a3")
> OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4")
> OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4")
The new optabs need to be documented in doc/md.texi.
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index e2392113bff4065c909aefc760b4c48978b73a5a..852c4e99edb19d215c354b666b991c87c48620b4 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1404,15 +1404,28 @@ vect_recog_sad_pattern (vec_info *vinfo,
> gcall *abd_stmt = dyn_cast <gcall *> (abs_stmt_vinfo->stmt);
> if (!abd_stmt
> || !gimple_call_internal_p (abd_stmt)
> - || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> + || gimple_call_num_args (abd_stmt) != 2)
> return NULL;
>
> tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>
> - if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0, &unprom[0])
> - || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> - &unprom[1]))
> + if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> + {
> + if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> + &unprom[0])
> + || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> + &unprom[1]))
> + return NULL;
> + }
> + else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> + {
> + unprom[0].op = abd_oprnd0;
> + unprom[0].type = TREE_TYPE (abd_oprnd0);
> + unprom[1].op = abd_oprnd1;
> + unprom[1].type = TREE_TYPE (abd_oprnd1);
> + }
> + else
> return NULL;
>
> half_type = unprom[0].type;
> @@ -1442,16 +1455,25 @@ vect_recog_sad_pattern (vec_info *vinfo,
>
> /* Function vect_recog_abd_pattern
>
> - Try to find the following ABsolute Difference (ABD) pattern:
> + Try to find the following ABsolute Difference (ABD) or
> + ABsolute Difference Long (ABDL) pattern:
“Long” is a bit of an architecture-specific term. Maybe just:
Try to find the following ABsolute Difference (ABD) or
widening ABD (WIDEN_ABD) pattern:
> - VTYPE x, y, out;
> + VTYPE x, y;
> + WTYPE out;
> type diff;
> loop i in range:
> S1 diff = x[i] - y[i]
> S2 out[i] = ABS_EXPR <diff>;
>
> - where 'type' is a integer and 'VTYPE' is a vector of integers
> - the same size as 'type'
> + where 'VTYPE' and 'WTYPE' are vectors of integers.
> + 'WTYPE' may be wider than 'VTYPE'.
> + 'type' is as wide as 'WTYPE'.
I don't think the existing comment is right about the types. What we're
matching is scalar code, so VTYPE and (now) WTYPE are integers rather
than vectors of integers.
I think it would be clearer to write:
S1 diff = (type) x[i] - (type) y[i]
S2 out[i] = ABS_EXPR <(WTYPE) diff>;
since the promotions happen on the operands.
It'd be good to keep the part about 'type' being an integer.
Rather than:
'WTYPE' may be wider than 'VTYPE'.
'type' is as wide as 'WTYPE'.
maybe:
'type' is no narrower than 'VTYPE' (but may be wider)
'WTYPE' is no narrower than 'type' (but may be wider)
> +
> + In the case of ABD, should the precision of 'WTYPE' be wider than
> + 'VTYPE' or narrower, conversion operations will succeed the ABD or
> + ABDL expression.
> + In the case of ABDL, the same occurs if the precision of 'WTYPE' is
> + more than twice as wide or narrower.
WTYPE can't be narrower than VTYPE though. I think with the changes
suggested above, the text before this block describes the conditions
in enough detail, and so we can just say:
WIDEN_ABD exists to optimize the case where WTYPE is at least twice as
wide as VTYPE.
> @@ -1459,12 +1481,14 @@ vect_recog_sad_pattern (vec_info *vinfo,
>
> Output:
>
> - * TYPE_out: The type of the output of this pattern
> + * TYPE_OUT: The type of the output of this pattern
>
> * Return value: A new stmt that will be used to replace the sequence of
> - stmts that constitute the pattern; either SABD or UABD:
> + stmts that constitute the pattern; either SABD, UABD, SABDL or UABDL:
> SABD_EXPR<x, y, out>
> UABD_EXPR<x, y, out>
> + SABDL_EXPR<x, y, out>
> + UABDL_EXPR<x, y, out>
SABD_EXPR/UABD_EXPR should be IFN_ABD
SABDL_EXPR/UABDL_EXPR should be IFN_WIDEN_ABD
> */
>
> static gimple *
> @@ -1479,8 +1503,9 @@ vect_recog_abd_pattern (vec_info *vinfo,
> out[i] = DAD
>
> In which
> - - X, Y, DIFF, DAD all have the same type
> - - x, y, out are all vectors of the same type
> + - X, Y have the same type
> + - x, y are all vectors of the same type
> + - DIFF, DAD, out may be wider than X, Y
> */
Maybe it would be easier to remove this comment, since I think the
comment above the function says enough.
> @@ -1496,37 +1521,74 @@ vect_recog_abd_pattern (vec_info *vinfo,
> unprom, &diff_stmt))
> return NULL;
>
> - tree abd_type = out_type, vectype;
> - tree abd_oprnds[2];
> + internal_fn ifn = IFN_ABD;
> + tree abd_in_type, abd_out_type;
> bool extend = false;
> +
> if (half_type)
> {
> - vectype = get_vectype_for_scalar_type (vinfo, half_type);
> - abd_type = half_type;
> - extend = TYPE_PRECISION (abd_type) < TYPE_PRECISION (out_type);
> + abd_in_type = half_type;
> + abd_out_type = abd_in_type;
> +
> + if (stmt_vinfo->min_output_precision == TYPE_PRECISION (abd_out_type))
> + extend = TYPE_PRECISION (abd_in_type) < TYPE_PRECISION (out_type);
> + else
> + {
> + ifn = IFN_VEC_WIDEN_ABD;
> + abd_out_type
> + = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
> + TYPE_UNSIGNED (abd_in_type));
> + extend = TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type);
> + }
> }
> else
> {
> unprom[0].op = gimple_assign_rhs1 (diff_stmt);
> unprom[1].op = gimple_assign_rhs2 (diff_stmt);
> - tree signed_out = signed_type_for (out_type);
> - vectype = get_vectype_for_scalar_type (vinfo, signed_out);
> + abd_in_type = signed_type_for (out_type);
> + abd_out_type = abd_in_type;
> }
Rather than have the "extend" variable, how about:
>
> - vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> + tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
> + tree vectype_out = get_vectype_for_scalar_type (vinfo, abd_out_type);
> + if (!vectype_in || !vectype_out)
> + return NULL;
>
> - if (!vectype
> - || !direct_internal_fn_supported_p (IFN_ABD, vectype,
> + if (ifn == IFN_VEC_WIDEN_ABD)
> + {
> + code_helper dummy_code;
> + int dummy_int;
> + auto_vec<tree> dummy_vec;
> + if (!supportable_widening_operation (vinfo, ifn, stmt_vinfo,
> + vectype_out, vectype_in,
> + &dummy_code, &dummy_code,
> + &dummy_int, &dummy_vec))
> + {
> + /* There are architectures that have the ABD instructions
> + but not the ABDL instructions. If we just return NULL here
> + we will miss an occasion where we should have used ABD.
> + So we change back to ABD and try again. */
> + ifn = IFN_ABD;
> + abd_out_type = abd_in_type;
> + extend = true;
> + }
> + }
making this:
if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2)
{
tree mid_type
= build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
TYPE_UNSIGNED (abd_in_type));
tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_vectype);
code_helper dummy_code;
int dummy_int;
auto_vec<tree> dummy_vec;
if (mid_vectype
&& supportable_widening_operation (vinfo, IFN_WIDEN_ABD, stmt_vinfo,
mid_vectype, vectype_in,
&dummy_code, &dummy_code,
&dummy_int, &dummy_vec))
{
ifn = IFN_WIDEN_ABD;
abd_out_type = mid_type;
vectype_out = mid_vectype;
}
}
The idea is to keep the assumption that we're using IFN_ABD
until we've proven conclusively otherwise.
I think the later:
if (!extend)
return abd_stmt;
should then be deleted, since we should still use vect_convert_output
if abd_out_type has a different sign from out_type.
> +
> + if (ifn == IFN_ABD
> + && !direct_internal_fn_supported_p (ifn, vectype_in,
> OPTIMIZE_FOR_SPEED))
> return NULL;
>
> + vect_pattern_detected ("vect_recog_abd_pattern", last_stmt);
> +
> + tree abd_oprnds[2];
> vect_convert_inputs (vinfo, stmt_vinfo, 2, abd_oprnds,
> - TREE_TYPE (vectype), unprom, vectype);
> + abd_in_type, unprom, vectype_in);
>
> *type_out = get_vectype_for_scalar_type (vinfo, out_type);
>
> - tree abd_result = vect_recog_temp_ssa_var (abd_type, NULL);
> - gcall *abd_stmt = gimple_build_call_internal (IFN_ABD, 2,
> + tree abd_result = vect_recog_temp_ssa_var (abd_out_type, NULL);
> + gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
> abd_oprnds[0], abd_oprnds[1]);
> gimple_call_set_lhs (abd_stmt, abd_result);
> gimple_set_location (abd_stmt, gimple_location (last_stmt));
> @@ -1535,15 +1597,15 @@ vect_recog_abd_pattern (vec_info *vinfo,
> return abd_stmt;
>
> gimple *stmt = abd_stmt;
> - if (!TYPE_UNSIGNED (abd_type))
> + if (!TYPE_UNSIGNED (abd_out_type))
And this condition would then be:
if (TYPE_PRECISION (abd_out_type) == TYPE_PRECISION (abd_in_type)
&& TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
&& !TYPE_UNSIGNED (abd_out_type))
since:
(a) we don't need to force the type to unsigned if the final
vect_convert_output is just a sign change
(b) we don't need to force the type to unsigned if abd_out_type
is large enough to hold the results without loss of precision
(or loss of sign correctness)
> {
> - tree unsign = unsigned_type_for (abd_type);
> + tree unsign = unsigned_type_for (abd_out_type);
> tree unsign_vectype = get_vectype_for_scalar_type (vinfo, unsign);
> stmt = vect_convert_output (vinfo, stmt_vinfo, unsign, stmt,
> unsign_vectype);
> }
>
> - return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype);
> + return vect_convert_output (vinfo, stmt_vinfo, out_type, stmt, vectype_out);
> }
>
> /* Recognize an operation that performs ORIG_CODE on widened inputs,
> @@ -1703,6 +1765,61 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
> &subtype);
> }
>
> +/* Try to detect abd on widened inputs, converting IFN_ABD
> + to IFN_VEC_WIDEN_ABD. */
> +static gimple *
> +vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
> + tree *type_out)
> +{
> + gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
> + if (!last_stmt || !gimple_assign_cast_p (last_stmt))
> + return NULL;
> +
> + tree last_rhs = gimple_assign_rhs1 (last_stmt);
> + stmt_vec_info abs_vinfo = vect_get_internal_def (vinfo, last_rhs);
> + if (!abs_vinfo)
> + return NULL;
> +
> + stmt_vec_info abd_pattern_vinfo = STMT_VINFO_RELATED_STMT (abs_vinfo);
> + if (!abd_pattern_vinfo)
> + return NULL;
> +
> + gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
> + if (!abd_stmt || gimple_call_internal_fn (abd_stmt) != IFN_ABD)
> + return NULL;
> +
> + tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> + tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
> +
> + tree in_type = TREE_TYPE (abd_oprnd0);
> + tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
> + if (TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type))
> + return NULL;
I think it'd be more natural to test this on TREE_TYPE (last_rhs)
rather than TREE_TYPE (abd_oprnd0), and do it after the assignment
to last_rhs above.
> +
> + tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
> + tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
> +
> + code_helper dummy_code;
> + int dummy_int;
> + auto_vec<tree> dummy_vec;
> + if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
> + vectype_out, vectype_in,
> + &dummy_code, &dummy_code,
> + &dummy_int, &dummy_vec))
> + return NULL;
> +
> + vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt);
> +
> + *type_out = vectype_out;
> +
> + tree abdl_result = vect_recog_temp_ssa_var (out_type, NULL);
> + gcall *abdl_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
> + abd_oprnd0, abd_oprnd1);
> + gimple_call_set_lhs (abdl_stmt, abdl_result);
> + gimple_set_location (abdl_stmt, gimple_location (last_stmt));
> + return abdl_stmt;
I think “widen_abd” would be better than “abdl” here.
Richard
> +}
> +
> /* Function vect_recog_ctz_ffs_pattern
>
> Try to find the following pattern:
> @@ -6670,6 +6787,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> { vect_recog_mask_conversion_pattern, "mask_conversion" },
> { vect_recog_widen_plus_pattern, "widen_plus" },
> { vect_recog_widen_minus_pattern, "widen_minus" },
> + { vect_recog_widen_abd_pattern, "widen_abd" },
> /* These must come after the double widening ones. */
> };
next prev parent reply other threads:[~2023-06-26 20:50 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-26 15:34 Oluwatamilore Adebayo
2023-06-26 20:50 ` Richard Sandiford [this message]
2023-06-27 7:46 ` Richard Sandiford
2023-06-28 15:07 ` Oluwatamilore Adebayo
2023-06-28 15:09 ` Oluwatamilore Adebayo
2023-06-29 10:14 ` Richard Sandiford
2023-06-30 8:23 ` Oluwatamilore Adebayo
2023-06-30 8:26 ` Oluwatamilore Adebayo
2023-06-30 11:28 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mptttuu9ce7.fsf@arm.com \
--to=richard.sandiford@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=oluwatamilore.adebayo@arm.com \
--cc=richard.guenther@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).