From: Richard Sandiford <richard.sandiford@arm.com>
To: "Andre Vieira \(lists\)" <andre.simoesdiasvieira@arm.com>
Cc: "gcc-patches\@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
Subject: Re: [PATCH][AArch64] Implement ACLE Data Intrinsics
Date: Fri, 17 Jun 2022 11:54:49 +0100 [thread overview]
Message-ID: <mptzgib8squ.fsf@arm.com> (raw)
In-Reply-To: <c9db158d-b0d5-3f4e-97dd-0f779195ad1d@arm.com> (Andre Vieira's message of "Fri, 10 Jun 2022 14:37:14 +0100")
"Andre Vieira (lists)" <andre.simoesdiasvieira@arm.com> writes:
> Hi,
>
> This patch adds support for the ACLE Data Intrinsics to the AArch64 port.
>
> Bootstrapped and regression tested on aarch64-none-linux.
>
> OK for trunk?
Sorry for the slow review.
>
> gcc/ChangeLog:
>
> 2022-06-10 Andre Vieira <andre.simoesdiasvieira@arm.com>
>
> * config/aarch64/aarch64.md (rbit<mode>2): Rename this ...
> (@aarch64_rbit<mode>): ... this and change it in...
> (ffs<mode>2,ctz<mode>2): ... here.
> (@aarch64_rev16<mode>): New.
> * config/aarch64/aarch64-builtins.cc: (aarch64_builtins):
> Define the following enum AARCH64_REV16, AARCH64_REV16L,
> AARCH64_REV16LL,
> AARCH64_RBIT, AARCH64_RBITL, AARCH64_RBITLL.
> (aarch64_init_data_intrinsics): New.
> (handle_arm_acle_h): Add call to aarch64_init_data_intrinsics.
> (aarch64_expand_builtin_data_intrinsic): New.
> (aarch64_general_expand_builtin): Add call to
> aarch64_expand_builtin_data_intrinsic.
> * config/aarch64/arm_acle.h (__clz, __clzl, __clzll, __cls,
> __clsl, __clsll, __rbit,
> __rbitl, __rbitll, __rev, __revl, __revll, __rev16, __rev16l,
> __rev16ll, __ror, __rorl,
> __rorll, __revsh): New.
>
> gcc/testsuite/ChangeLog:
>
> 2022-06-10 Andre Vieira <andre.simoesdiasvieira@arm.com>
>
> * gcc.target/aarch64/acle/data-intrinsics.c: New test.
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index e0a741ac663188713e21f457affa57217d074783..91a687dee13a27c21f0c50de9ba777aa900d6096 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -613,6 +613,12 @@ enum aarch64_builtins
> AARCH64_LS64_BUILTIN_ST64B,
> AARCH64_LS64_BUILTIN_ST64BV,
> AARCH64_LS64_BUILTIN_ST64BV0,
> + AARCH64_REV16,
> + AARCH64_REV16L,
> + AARCH64_REV16LL,
> + AARCH64_RBIT,
> + AARCH64_RBITL,
> + AARCH64_RBITLL,
> AARCH64_BUILTIN_MAX
> };
>
> @@ -1664,10 +1670,41 @@ aarch64_init_ls64_builtins (void)
> = aarch64_general_add_builtin (data[i].name, data[i].type, data[i].code);
> }
>
> +static void
> +aarch64_init_data_intrinsics (void)
> +{
> + tree uint32_fntype = build_function_type_list (uint32_type_node,
> + uint32_type_node, NULL_TREE);
> + tree long_fntype = build_function_type_list (long_unsigned_type_node,
> + long_unsigned_type_node,
> + NULL_TREE);
Very minor, but ulong_fntype might be clearer, since the other two
variable names are explicitly unsigned.
> + tree uint64_fntype = build_function_type_list (uint64_type_node,
> + uint64_type_node, NULL_TREE);
> + aarch64_builtin_decls[AARCH64_REV16]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
> + AARCH64_REV16);
> + aarch64_builtin_decls[AARCH64_REV16L]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", long_fntype,
> + AARCH64_REV16L);
> + aarch64_builtin_decls[AARCH64_REV16LL]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
> + AARCH64_REV16LL);
> + aarch64_builtin_decls[AARCH64_RBIT]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
> + AARCH64_RBIT);
> + aarch64_builtin_decls[AARCH64_RBITL]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", long_fntype,
> + AARCH64_RBITL);
> + aarch64_builtin_decls[AARCH64_RBITLL]
> + = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
> + AARCH64_RBITLL);
> +}
> +
> /* Implement #pragma GCC aarch64 "arm_acle.h". */
> void
> handle_arm_acle_h (void)
> {
> + aarch64_init_data_intrinsics ();
> if (TARGET_LS64)
> aarch64_init_ls64_builtins ();
> }
> @@ -2393,6 +2430,32 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
> emit_insn (pat);
> return target;
> }
> +/* Function to expand an expression EXP which calls one of the ACLE Data
> + Intrinsic builtins FCODE with the result going to TARGET. */
> +static rtx
> +aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
> +{
> + rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
> + machine_mode mode = GET_MODE (op0);
> + rtx pat;
> + switch (fcode)
> + {
> + case AARCH64_REV16:
> + case AARCH64_REV16L:
> + case AARCH64_REV16LL:
> + pat = gen_aarch64_rev16 (mode, target, op0);
Does this work when op0 is a constant or comes from memory?
Same for when target is a memory. E.g. does:
void test_rev16 (uint32_t *ptr)
{
*ptr = __rev16 (*ptr);
}
work?
It'd be more robust to use the expand_insn interface instead;
see aarch64_expand_builtin_ls64 for an example.
> + break;
> + case AARCH64_RBIT:
> + case AARCH64_RBITL:
> + case AARCH64_RBITLL:
> + pat = gen_aarch64_rbit (mode, target, op0);
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + emit_insn (pat);
> + return target;
> +}
>
> /* Expand an expression EXP as fpsr or fpcr setter (depending on
> UNSPEC) using MODE. */
> @@ -2551,6 +2614,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
> if (fcode >= AARCH64_MEMTAG_BUILTIN_START
> && fcode <= AARCH64_MEMTAG_BUILTIN_END)
> return aarch64_expand_builtin_memtag (fcode, exp, target);
> + if (fcode >= AARCH64_REV16
> + && fcode <= AARCH64_RBITLL)
> + return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
>
> gcc_unreachable ();
> }
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index acec8c1146765c0fac73c15351853324b8f03209..ef0aed25c6b26eff61f9f6030dc5921a534e3d19 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -4950,7 +4950,7 @@ (define_expand "ffs<mode>2"
> rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx);
> rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
>
> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
> emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
> emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx));
> DONE;
> @@ -4996,7 +4996,7 @@ (define_insn "clrsb<mode>2"
> [(set_attr "type" "clz")]
> )
>
> -(define_insn "rbit<mode>2"
> +(define_insn "@aarch64_rbit<mode>"
> [(set (match_operand:GPI 0 "register_operand" "=r")
> (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_RBIT))]
> ""
> @@ -5017,7 +5017,7 @@ (define_insn_and_split "ctz<mode>2"
> "reload_completed"
> [(const_int 0)]
> "
> - emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
> + emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1]));
> emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
> DONE;
> ")
> @@ -6022,6 +6022,13 @@ (define_insn "bswaphi2"
> [(set_attr "type" "rev")]
> )
>
> +(define_insn "@aarch64_rev16<mode>"
> + [(set (match_operand:GPI 0 "register_operand" "=r")
> + (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))]
> + ""
> + "rev16\\t%<w>0, %<w>1"
> + [(set_attr "type" "rev")])
> +
> (define_insn "*aarch64_bfxil<mode>"
> [(set (match_operand:GPI 0 "register_operand" "=r,r")
> (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0")
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 9775a48c65825b424d3eb442384f5ab87b734fd7..faddd5d0a780c5d65ba430bd3174c701e848c794 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -28,6 +28,7 @@
> #define _GCC_ARM_ACLE_H
>
> #include <stdint.h>
> +#include <stddef.h>
>
> #pragma GCC aarch64 "arm_acle.h"
>
> @@ -35,6 +36,54 @@
> extern "C" {
> #endif
>
> +#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE) \
> +__extension__ extern __inline TYPE \
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
> +NAME (TYPE value, uint32_t rotate) \
The names of the parameters and local variables need the same __
uglification as __revl. Same for _GCC_ARM_ACLE_DATA_FN.
> +{ \
> + size_t size = sizeof (TYPE) * __CHAR_BIT__; \
> + rotate = rotate % size; \
> + return value >> rotate | value << (size - rotate); \
This runs into UB for rotate == 0.
> +}
> +
> +_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t)
> +_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long)
> +_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t)
Would be good to undef the macro once we're done with it, to reduce
noise in things like -dM.
> +
> +#define _GCC_ARM_ACLE_DATA_FN(NAME, BUILTIN, TYPE) \
> +__extension__ extern __inline TYPE \
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
> +__##NAME (TYPE value) \
> +{ \
> + return __builtin_##BUILTIN (value); \
> +}
> +
> +_GCC_ARM_ACLE_DATA_FN (clz, clz, uint32_t)
> +_GCC_ARM_ACLE_DATA_FN (clzl, clzl, unsigned long)
> +_GCC_ARM_ACLE_DATA_FN (clzll, clzll, uint64_t)
> +_GCC_ARM_ACLE_DATA_FN (cls, clrsb, uint32_t)
> +_GCC_ARM_ACLE_DATA_FN (clsl, clrsbl, unsigned long)
> +_GCC_ARM_ACLE_DATA_FN (clsll, clrsbll, uint64_t)
The ACLE spec says that these should return unsigned int, so I guess
either these functions should have their own macro or the macro above
should have a separate parameter for the return type.
Thanks,
Richard
> +_GCC_ARM_ACLE_DATA_FN (rev16, aarch64_rev16, uint32_t)
> +_GCC_ARM_ACLE_DATA_FN (rev16l, aarch64_rev16l, unsigned long)
> +_GCC_ARM_ACLE_DATA_FN (rev16ll, aarch64_rev16ll, uint64_t)
> +_GCC_ARM_ACLE_DATA_FN (rbit, aarch64_rbit, uint32_t)
> +_GCC_ARM_ACLE_DATA_FN (rbitl, aarch64_rbitl, unsigned long)
> +_GCC_ARM_ACLE_DATA_FN (rbitll, aarch64_rbitll, uint64_t)
> +_GCC_ARM_ACLE_DATA_FN (revsh, bswap16, int16_t)
> +_GCC_ARM_ACLE_DATA_FN (rev, bswap32, uint32_t)
> +_GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t)
> +
> +__extension__ extern __inline unsigned long
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__revl (unsigned long __value)
> +{
> + if (sizeof (unsigned long) == 8)
> + return __revll (__value);
> + else
> + return __rev (__value);
> +}
> +
> #pragma GCC push_options
> #pragma GCC target ("arch=armv8.3-a")
> __extension__ extern __inline int32_t
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..90813184704dfcdaf2d24d523ff744aa6cbedf1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/data-intrinsics.c
> @@ -0,0 +1,215 @@
> +/* Test the ACLE data intrinsics. */
> +/* { dg-do assemble } */
> +/* { dg-additional-options "--save-temps -O1" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#include "arm_acle.h"
> +
> +/*
> +** test_clz:
> +** clz w0, w0
> +** ret
> +*/
> +
> +uint32_t test_clz (uint32_t a)
> +{
> + return __clz (a);
> +}
> +
> +/*
> +** test_clzl:
> +** clz [wx]0, [wx]0
> +** ret
> +*/
> +
> +unsigned long test_clzl (unsigned long a)
> +{
> + return __clzl (a);
> +}
> +
> +/*
> +** test_clzll:
> +** clz x0, x0
> +** ret
> +*/
> +
> +uint64_t test_clzll (uint64_t a)
> +{
> + return __clzll (a);
> +}
> +
> +/*
> +** test_cls:
> +** cls w0, w0
> +** ret
> +*/
> +
> +uint32_t test_cls (uint32_t a)
> +{
> + return __cls (a);
> +}
> +
> +/*
> +** test_clsl:
> +** cls [wx]0, [wx]0
> +** ret
> +*/
> +
> +unsigned long test_clsl (unsigned long a)
> +{
> + return __clsl (a);
> +}
> +
> +/*
> +** test_clsll:
> +** cls x0, x0
> +** ret
> +*/
> +
> +uint64_t test_clsll (uint64_t a)
> +{
> + return __clsll (a);
> +}
> +
> +/*
> +** test_rbit:
> +** rbit w0, w0
> +** ret
> +*/
> +
> +uint32_t test_rbit (uint32_t a)
> +{
> + return __rbit (a);
> +}
> +
> +/*
> +** test_rbitl:
> +** rbit [wx]0, [wx]0
> +** ret
> +*/
> +
> +unsigned long test_rbitl (unsigned long a)
> +{
> + return __rbitl (a);
> +}
> +
> +/*
> +** test_rbitll:
> +** rbit x0, x0
> +** ret
> +*/
> +
> +uint64_t test_rbitll (uint64_t a)
> +{
> + return __rbitll (a);
> +}
> +
> +/*
> +** test_rev:
> +** rev w0, w0
> +** ret
> +*/
> +
> +uint32_t test_rev (uint32_t a)
> +{
> + return __builtin_bswap32 (a);
> +}
> +
> +/*
> +** test_revl:
> +** rev [wx]0, [wx]0
> +** ret
> +*/
> +
> +unsigned long test_revl (unsigned long a)
> +{
> + return __revl (a);
> +}
> +
> +/*
> +** test_revll:
> +** rev x0, x0
> +** ret
> +*/
> +
> +uint64_t test_revll (uint64_t a)
> +{
> + return __revll (a);
> +}
> +
> +/*
> +** test_rev16:
> +** rev16 w0, w0
> +** ret
> +*/
> +
> +uint32_t test_rev16 (uint32_t a)
> +{
> + return __rev16 (a);
> +}
> +
> +/*
> +** test_rev16l:
> +** rev16 [wx]0, [wx]0
> +** ret
> +*/
> +
> +unsigned long test_rev16l (unsigned long a)
> +{
> + return __rev16l (a);
> +}
> +
> +/*
> +** test_rev16ll:
> +** rev16 x0, x0
> +** ret
> +*/
> +
> +uint64_t test_rev16ll (uint64_t a)
> +{
> + return __rev16ll (a);
> +}
> +
> +/*
> +** test_ror:
> +** ror w0, w0, w1
> +** ret
> +*/
> +
> +uint32_t test_ror (uint32_t a, uint32_t r)
> +{
> + return __ror (a, r);
> +}
> +
> +/*
> +** test_rorl:
> +** ror [wx]0, [wx]0, [wx]1
> +** ret
> +*/
> +
> +unsigned long test_rorl (unsigned long a, uint32_t r)
> +{
> + return __rorl (a, r);
> +}
> +
> +/*
> +** test_rorll:
> +** ror x0, x0, x1
> +** ret
> +*/
> +
> +uint64_t test_rorll (uint64_t a, uint32_t r)
> +{
> + return __rorll (a, r);
> +}
> +
> +/*
> +** test_revsh:
> +** rev16 w0, w0
> +** ret
> +*/
> +
> +int16_t test_revsh (int16_t a)
> +{
> + return __revsh (a);
> +}
next prev parent reply other threads:[~2022-06-17 10:54 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-10 13:37 Andre Vieira (lists)
2022-06-17 10:54 ` Richard Sandiford [this message]
2022-06-28 14:04 ` Andre Vieira (lists)
2022-06-29 7:18 ` Richard Sandiford
2022-07-01 10:25 ` Andre Vieira (lists)
2022-07-01 11:26 ` Richard Sandiford
2022-08-11 15:11 ` Andre Vieira (lists)
2022-08-11 15:21 ` Kyrylo Tkachov
2022-10-04 10:33 ` Andre Vieira (lists)
2022-10-05 8:47 ` Kyrylo Tkachov
2022-10-06 7:23 ` Richard Biener
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mptzgib8squ.fsf@arm.com \
--to=richard.sandiford@arm.com \
--cc=Kyrylo.Tkachov@arm.com \
--cc=andre.simoesdiasvieira@arm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).