public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r10-9145] aarch64: intrinsics to extract half of bf16 vector
@ 2020-12-11 16:19 Dennis Zhang
0 siblings, 0 replies; only message in thread
From: Dennis Zhang @ 2020-12-11 16:19 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:c25f7eac6555d67523f0520c7e93bbc398d0da84
commit r10-9145-gc25f7eac6555d67523f0520c7e93bbc398d0da84
Author: Dennis Zhang <dennis.zhang@arm.com>
Date: Fri Dec 11 16:18:05 2020 +0000
aarch64: intrinsics to extract half of bf16 vector
This patch implements ACLE intrinsics vget_low_bf16 and vget_high_bf16
to extract lower or higher half from a bfloat16x8 vector. The
vget_high_bf16 is done by 'dup' instruction. The vget_low_bf16 is just
to return the lower half of a vector register. Tests include both big-
and little-endian cases.
gcc/ChangeLog:
* config/aarch64/aarch64-simd-builtins.def (vget_lo_half): New entry.
(vget_hi_half): Likewise.
* config/aarch64/aarch64-simd.md (aarch64_vget_lo_halfv8bf): New entry.
(aarch64_vget_hi_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (vget_low_bf16): New intrinsic.
(vget_high_bf16): Likewise.
gcc/testsuite/ChangeLog
* gcc.target/aarch64/advsimd-intrinsics/bf16_get.c: New test.
(cherry picked from commit 3553c658533e430b232997bdfd97faf6606fb102)
Diff:
---
gcc/config/aarch64/aarch64-simd-builtins.def | 4 ++++
gcc/config/aarch64/aarch64-simd.md | 21 +++++++++++++++++
gcc/config/aarch64/arm_neon.h | 14 +++++++++++
.../aarch64/advsimd-intrinsics/bf16_get.c | 27 ++++++++++++++++++++++
4 files changed, 66 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 7192f3954d3..facd46dc32c 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -718,6 +718,10 @@
VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf)
VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf)
+ /* Implemented by aarch64_vget_lo/hi_halfv8bf. */
+ VAR1 (UNOP, vget_lo_half, 0, v8bf)
+ VAR1 (UNOP, vget_hi_half, 0, v8bf)
+
/* Implemented by aarch64_simd_<sur>mmlav16qi. */
VAR1 (TERNOP, simd_smmla, 0, v16qi)
VAR1 (TERNOPU, simd_ummla, 0, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2e8aa668b10..6d99be39dbc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7159,6 +7159,27 @@
[(set_attr "type" "neon_dot<VDQSF:q>")]
)
+;; vget_low/high_bf16
+(define_expand "aarch64_vget_lo_halfv8bf"
+ [(match_operand:V4BF 0 "register_operand")
+ (match_operand:V8BF 1 "register_operand")]
+ "TARGET_BF16_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
+ emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+ DONE;
+})
+
+(define_expand "aarch64_vget_hi_halfv8bf"
+ [(match_operand:V4BF 0 "register_operand")
+ (match_operand:V8BF 1 "register_operand")]
+ "TARGET_BF16_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
+ emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+ DONE;
+})
+
;; bfmmla
(define_insn "aarch64_bfmmlaqv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 69cccd32786..0009e50f3fe 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
}
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_low_bf16 (bfloat16x8_t __a)
+{
+ return __builtin_aarch64_vget_lo_halfv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_high_bf16 (bfloat16x8_t __a)
+{
+ return __builtin_aarch64_vget_hi_halfv8bf (__a);
+}
+
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_f32_bf16 (bfloat16x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
new file mode 100644
index 00000000000..2193753ffbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_vget_low_bf16:
+** ret
+*/
+bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
+{
+ return vget_low_bf16 (a);
+}
+
+/*
+**test_vget_high_bf16:
+** dup d0, v0.d\[1\]
+** ret
+*/
+bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
+{
+ return vget_high_bf16 (a);
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-12-11 16:19 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-11 16:19 [gcc r10-9145] aarch64: intrinsics to extract half of bf16 vector Dennis Zhang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).