public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Dennis Zhang <xczhang@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r10-9145] aarch64: intrinsics to extract half of bf16 vector Date: Fri, 11 Dec 2020 16:19:08 +0000 (GMT) [thread overview] Message-ID: <20201211161908.6BBB53973020@sourceware.org> (raw) https://gcc.gnu.org/g:c25f7eac6555d67523f0520c7e93bbc398d0da84 commit r10-9145-gc25f7eac6555d67523f0520c7e93bbc398d0da84 Author: Dennis Zhang <dennis.zhang@arm.com> Date: Fri Dec 11 16:18:05 2020 +0000 aarch64: intrinsics to extract half of bf16 vector This patch implements ACLE intrinsics vget_low_bf16 and vget_high_bf16 to extract lower or higher half from a bfloat16x8 vector. The vget_high_bf16 is done by 'dup' instruction. The vget_low_bf16 is just to return the lower half of a vector register. Tests include both big- and little-endian cases. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry. (vget_hi_half): Likewise. * config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry. (aarch64_vget_hi_halfv8bf): Likewise. * config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic. (vget_high_bf16): Likewise. gcc/testsuite/ChangeLog * gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test. (cherry picked from commit 3553c658533e430b232997bdfd97faf6606fb102) Diff: --- gcc/config/aarch64/aarch64-simd-builtins.def | 4 ++++ gcc/config/aarch64/aarch64-simd.md | 21 +++++++++++++++++ gcc/config/aarch64/arm_neon.h | 14 +++++++++++ .../aarch64/advsimd-intrinsics/bf16_get.c | 27 ++++++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 7192f3954d3..facd46dc32c 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -718,6 +718,10 @@ VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf) VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf) + /* Implemented by aarch64_vget_lo/hi_halfv8bf. */ + VAR1 (UNOP, vget_lo_half, 0, v8bf) + VAR1 (UNOP, vget_hi_half, 0, v8bf) + /* Implemented by aarch64_simd_<sur>mmlav16qi. */ VAR1 (TERNOP, simd_smmla, 0, v16qi) VAR1 (TERNOPU, simd_ummla, 0, v16qi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2e8aa668b10..6d99be39dbc 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7159,6 +7159,27 @@ [(set_attr "type" "neon_dot<VDQSF:q>")] ) +;; vget_low/high_bf16 +(define_expand "aarch64_vget_lo_halfv8bf" + [(match_operand:V4BF 0 "register_operand") + (match_operand:V8BF 1 "register_operand")] + "TARGET_BF16_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false); + emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); + DONE; +}) + +(define_expand "aarch64_vget_hi_halfv8bf" + [(match_operand:V4BF 0 "register_operand") + (match_operand:V8BF 1 "register_operand")] + "TARGET_BF16_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); + emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); + DONE; +}) + ;; bfmmla (define_insn "aarch64_bfmmlaqv4sf" [(set (match_operand:V4SF 0 "register_operand" "=w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 69cccd32786..0009e50f3fe 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vget_low_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vget_lo_halfv8bf (__a); +} + +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vget_high_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vget_hi_halfv8bf (__a); +} + __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvt_f32_bf16 (bfloat16x4_t __a) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c new file mode 100644 index 00000000000..2193753ffbb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c @@ -0,0 +1,27 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include <arm_neon.h> + +/* +**test_vget_low_bf16: +** ret +*/ +bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a) +{ + return vget_low_bf16 (a); +} + +/* +**test_vget_high_bf16: +** dup d0, v0.d\[1\] +** ret +*/ +bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a) +{ + return vget_high_bf16 (a); +}
reply other threads:[~2020-12-11 16:19 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20201211161908.6BBB53973020@sourceware.org \ --to=xczhang@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).