* [PATCH] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]
@ 2024-05-22 2:05 Pengxuan Zheng
2024-05-22 12:27 ` Richard Sandiford
0 siblings, 1 reply; 3+ messages in thread
From: Pengxuan Zheng @ 2024-05-22 2:05 UTC (permalink / raw)
To: gcc-patches; +Cc: Pengxuan Zheng
This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_*
intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from
arm_neon.h to use the new intrinsics framework.
PR target/102171
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS):
New macro to create definitions for all vget_high intrinsics.
(VGET_HIGH_BUILTIN): Likewise.
(enum aarch64_builtins): Add vget_high function codes.
(AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro.
(aarch64_general_fold_builtin): Fold vget_high calls.
* config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins.
* config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete.
(aarch64_vget_hi_halfv8bf): Likewise.
* config/aarch64/arm_neon.h (__attribute__): Delete.
(vget_high_f16): Likewise.
(vget_high_f32): Likewise.
(vget_high_f64): Likewise.
(vget_high_p8): Likewise.
(vget_high_p16): Likewise.
(vget_high_p64): Likewise.
(vget_high_s8): Likewise.
(vget_high_s16): Likewise.
(vget_high_s32): Likewise.
(vget_high_s64): Likewise.
(vget_high_u8): Likewise.
(vget_high_u16): Likewise.
(vget_high_u32): Likewise.
(vget_high_u64): Likewise.
(vget_high_bf16): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vget_high_2.c: New test.
* gcc.target/aarch64/vget_high_2_be.c: New test.
Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
---
gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++---
gcc/config/aarch64/aarch64-simd-builtins.def | 6 -
gcc/config/aarch64/aarch64-simd.md | 22 ----
gcc/config/aarch64/arm_neon.h | 105 ------------------
.../gcc.target/aarch64/vget_high_2.c | 30 +++++
.../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++
6 files changed, 104 insertions(+), 149 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 11b888016ed..f8eeccb554d 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
VGET_LOW_BUILTIN(u64) \
VGET_LOW_BUILTIN(bf16)
+#define AARCH64_SIMD_VGET_HIGH_BUILTINS \
+ VGET_HIGH_BUILTIN(f16) \
+ VGET_HIGH_BUILTIN(f32) \
+ VGET_HIGH_BUILTIN(f64) \
+ VGET_HIGH_BUILTIN(p8) \
+ VGET_HIGH_BUILTIN(p16) \
+ VGET_HIGH_BUILTIN(p64) \
+ VGET_HIGH_BUILTIN(s8) \
+ VGET_HIGH_BUILTIN(s16) \
+ VGET_HIGH_BUILTIN(s32) \
+ VGET_HIGH_BUILTIN(s64) \
+ VGET_HIGH_BUILTIN(u8) \
+ VGET_HIGH_BUILTIN(u16) \
+ VGET_HIGH_BUILTIN(u32) \
+ VGET_HIGH_BUILTIN(u64) \
+ VGET_HIGH_BUILTIN(bf16)
+
typedef struct
{
const char *name;
@@ -717,6 +734,9 @@ typedef struct
#define VGET_LOW_BUILTIN(A) \
AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
+#define VGET_HIGH_BUILTIN(A) \
+ AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
+
#undef VAR1
#define VAR1(T, N, MAP, FLAG, A) \
AARCH64_SIMD_BUILTIN_##T##_##N##A,
@@ -753,6 +773,7 @@ enum aarch64_builtins
/* SIMD intrinsic builtins. */
AARCH64_SIMD_VREINTERPRET_BUILTINS
AARCH64_SIMD_VGET_LOW_BUILTINS
+ AARCH64_SIMD_VGET_HIGH_BUILTINS
/* ARMv8.3-A Pointer Authentication Builtins. */
AARCH64_PAUTH_BUILTIN_AUTIA1716,
AARCH64_PAUTH_BUILTIN_PACIA1716,
@@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
false \
},
-#define AARCH64_SIMD_VGET_LOW_BUILTINS \
- VGET_LOW_BUILTIN(f16) \
- VGET_LOW_BUILTIN(f32) \
- VGET_LOW_BUILTIN(f64) \
- VGET_LOW_BUILTIN(p8) \
- VGET_LOW_BUILTIN(p16) \
- VGET_LOW_BUILTIN(p64) \
- VGET_LOW_BUILTIN(s8) \
- VGET_LOW_BUILTIN(s16) \
- VGET_LOW_BUILTIN(s32) \
- VGET_LOW_BUILTIN(s64) \
- VGET_LOW_BUILTIN(u8) \
- VGET_LOW_BUILTIN(u16) \
- VGET_LOW_BUILTIN(u32) \
- VGET_LOW_BUILTIN(u64) \
- VGET_LOW_BUILTIN(bf16)
+#undef VGET_HIGH_BUILTIN
+#define VGET_HIGH_BUILTIN(A) \
+ {"vget_high_" #A, \
+ AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
+ 2, \
+ { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
+ { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
+ FLAG_AUTO_FP, \
+ false \
+ },
static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
AARCH64_SIMD_VREINTERPRET_BUILTINS
AARCH64_SIMD_VGET_LOW_BUILTINS
+ AARCH64_SIMD_VGET_HIGH_BUILTINS
};
@@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
#define VGET_LOW_BUILTIN(A) \
case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
+#undef VGET_HIGH_BUILTIN
+#define VGET_HIGH_BUILTIN(A) \
+ case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
+
/* Try to fold a call to the built-in function with subcode FCODE. The
function is passed the N_ARGS arguments in ARGS and it returns a value
of type TYPE. Return the new expression on success and NULL_TREE on
@@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
{
auto pos = BYTES_BIG_ENDIAN ? 64 : 0;
+ return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
+ bitsize_int (pos));
+ }
+ AARCH64_SIMD_VGET_HIGH_BUILTINS
+ {
+ auto pos = BYTES_BIG_ENDIAN ? 0 : 64;
+
return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
bitsize_int (pos));
}
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index a9f0558f8b6..e65f73d7ba2 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -65,9 +65,6 @@
BUILTIN_VS (UNOP, ctz, 2, NONE)
BUILTIN_VB (UNOP, popcount, 2, NONE)
- /* Implemented by aarch64_get_high<mode>. */
- BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP)
-
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
@@ -958,9 +955,6 @@
VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf)
VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf)
- /* Implemented by aarch64_vget_hi_halfv8bf. */
- VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf)
-
/* Implemented by aarch64_simd_<sur>mmlav16qi. */
VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 875ea52b02f..c311888e4bd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>"
}
)
-(define_expand "aarch64_get_high<mode>"
- [(match_operand:<VHALF> 0 "register_operand")
- (match_operand:VQMOV 1 "register_operand")]
- "TARGET_FLOAT"
- {
- rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
- emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
- DONE;
- }
-)
-
(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
[(set (match_operand:<VHALF> 0 "register_operand")
(vec_select:<VHALF>
@@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
[(set_attr "type" "neon_dot<VDQSF:q>")]
)
-;; vget_high_bf16
-(define_expand "aarch64_vget_hi_halfv8bf"
- [(match_operand:V4BF 0 "register_operand")
- (match_operand:V8BF 1 "register_operand")]
- "TARGET_BF16_SIMD"
-{
- rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
- emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
- DONE;
-})
-
;; bfmmla
(define_insn "aarch64_bfmmlaqv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 92c2c5361cd..c4a09528ffd 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
return __aarch64_vset_lane_any (__elem, __vec, __index);
}
-__extension__ extern __inline float16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_f16 (float16x8_t __a)
-{
- return __builtin_aarch64_get_highv8hf (__a);
-}
-
-__extension__ extern __inline float32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_f32 (float32x4_t __a)
-{
- return __builtin_aarch64_get_highv4sf (__a);
-}
-
-__extension__ extern __inline float64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_f64 (float64x2_t __a)
-{
- return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)};
-}
-
-__extension__ extern __inline poly8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_p8 (poly8x16_t __a)
-{
- return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
-}
-
-__extension__ extern __inline poly16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_p16 (poly16x8_t __a)
-{
- return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
-}
-
-__extension__ extern __inline poly64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_p64 (poly64x2_t __a)
-{
- return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a);
-}
-
-__extension__ extern __inline int8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_s8 (int8x16_t __a)
-{
- return __builtin_aarch64_get_highv16qi (__a);
-}
-
-__extension__ extern __inline int16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_s16 (int16x8_t __a)
-{
- return __builtin_aarch64_get_highv8hi (__a);
-}
-
-__extension__ extern __inline int32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_s32 (int32x4_t __a)
-{
- return __builtin_aarch64_get_highv4si (__a);
-}
-
-__extension__ extern __inline int64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_s64 (int64x2_t __a)
-{
- return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)};
-}
-
-__extension__ extern __inline uint8x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_u8 (uint8x16_t __a)
-{
- return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
-}
-
-__extension__ extern __inline uint16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_u16 (uint16x8_t __a)
-{
- return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
-}
-
-__extension__ extern __inline uint32x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_u32 (uint32x4_t __a)
-{
- return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a);
-}
-
-__extension__ extern __inline uint64x1_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_u64 (uint64x2_t __a)
-{
- return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)};
-}
-
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
}
-__extension__ extern __inline bfloat16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vget_high_bf16 (bfloat16x8_t __a)
-{
- return __builtin_aarch64_vget_hi_halfv8bf (__a);
-}
-
__extension__ extern __inline float32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vcvt_f32_bf16 (bfloat16x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
new file mode 100644
index 00000000000..9593fb685e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */
+
+#include <arm_neon.h>
+
+#define VARIANTS \
+VARIANT (uint8x8_t, uint8x16_t, u8) \
+VARIANT (uint16x4_t, uint16x8_t, u16) \
+VARIANT (uint32x2_t, uint32x4_t, u32) \
+VARIANT (uint64x1_t, uint64x2_t, u64) \
+VARIANT (int8x8_t, int8x16_t, s8) \
+VARIANT (int16x4_t, int16x8_t, s16) \
+VARIANT (int32x2_t, int32x4_t, s32) \
+VARIANT (int64x1_t, int64x2_t, s64) \
+VARIANT (float16x4_t, float16x8_t, f16) \
+VARIANT (float32x2_t, float32x4_t, f32) \
+VARIANT (float64x1_t, float64x2_t, f64) \
+VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
+
+/* vget_high_* intrinsics should become BIT_FIELD_REF. */
+#define VARIANT(TYPE64, TYPE128, SUFFIX) \
+TYPE64 \
+test_vget_high_##SUFFIX (TYPE128 vec) \
+{ \
+ return vget_high_##SUFFIX (vec); \
+}
+
+VARIANTS
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
new file mode 100644
index 00000000000..5928c3a4597
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target stdint_types_mbig_endian } */
+/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */
+
+#include <arm_neon.h>
+
+#define VARIANTS \
+VARIANT (uint8x8_t, uint8x16_t, u8) \
+VARIANT (uint16x4_t, uint16x8_t, u16) \
+VARIANT (uint32x2_t, uint32x4_t, u32) \
+VARIANT (uint64x1_t, uint64x2_t, u64) \
+VARIANT (int8x8_t, int8x16_t, s8) \
+VARIANT (int16x4_t, int16x8_t, s16) \
+VARIANT (int32x2_t, int32x4_t, s32) \
+VARIANT (int64x1_t, int64x2_t, s64) \
+VARIANT (float16x4_t, float16x8_t, f16) \
+VARIANT (float32x2_t, float32x4_t, f32) \
+VARIANT (float64x1_t, float64x2_t, f64) \
+VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
+
+/* vget_high_* intrinsics should become BIT_FIELD_REF. */
+#define VARIANT(TYPE64, TYPE128, SUFFIX) \
+TYPE64 \
+test_vget_high_##SUFFIX (TYPE128 vec) \
+{ \
+ return vget_high_##SUFFIX (vec); \
+}
+
+VARIANTS
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
--
2.17.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]
2024-05-22 2:05 [PATCH] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171] Pengxuan Zheng
@ 2024-05-22 12:27 ` Richard Sandiford
2024-05-22 20:08 ` Andrew Pinski
0 siblings, 1 reply; 3+ messages in thread
From: Richard Sandiford @ 2024-05-22 12:27 UTC (permalink / raw)
To: Pengxuan Zheng; +Cc: gcc-patches
Pengxuan Zheng <quic_pzheng@quicinc.com> writes:
> This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_*
> intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from
> arm_neon.h to use the new intrinsics framework.
>
> PR target/102171
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS):
> New macro to create definitions for all vget_high intrinsics.
> (VGET_HIGH_BUILTIN): Likewise.
> (enum aarch64_builtins): Add vget_high function codes.
> (AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro.
> (aarch64_general_fold_builtin): Fold vget_high calls.
> * config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins.
> * config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete.
> (aarch64_vget_hi_halfv8bf): Likewise.
> * config/aarch64/arm_neon.h (__attribute__): Delete.
> (vget_high_f16): Likewise.
> (vget_high_f32): Likewise.
> (vget_high_f64): Likewise.
> (vget_high_p8): Likewise.
> (vget_high_p16): Likewise.
> (vget_high_p64): Likewise.
> (vget_high_s8): Likewise.
> (vget_high_s16): Likewise.
> (vget_high_s32): Likewise.
> (vget_high_s64): Likewise.
> (vget_high_u8): Likewise.
> (vget_high_u16): Likewise.
> (vget_high_u32): Likewise.
> (vget_high_u64): Likewise.
> (vget_high_bf16): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/vget_high_2.c: New test.
> * gcc.target/aarch64/vget_high_2_be.c: New test.
OK, thanks.
Richard
> Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
> ---
> gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++---
> gcc/config/aarch64/aarch64-simd-builtins.def | 6 -
> gcc/config/aarch64/aarch64-simd.md | 22 ----
> gcc/config/aarch64/arm_neon.h | 105 ------------------
> .../gcc.target/aarch64/vget_high_2.c | 30 +++++
> .../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++
> 6 files changed, 104 insertions(+), 149 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 11b888016ed..f8eeccb554d 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
> VGET_LOW_BUILTIN(u64) \
> VGET_LOW_BUILTIN(bf16)
>
> +#define AARCH64_SIMD_VGET_HIGH_BUILTINS \
> + VGET_HIGH_BUILTIN(f16) \
> + VGET_HIGH_BUILTIN(f32) \
> + VGET_HIGH_BUILTIN(f64) \
> + VGET_HIGH_BUILTIN(p8) \
> + VGET_HIGH_BUILTIN(p16) \
> + VGET_HIGH_BUILTIN(p64) \
> + VGET_HIGH_BUILTIN(s8) \
> + VGET_HIGH_BUILTIN(s16) \
> + VGET_HIGH_BUILTIN(s32) \
> + VGET_HIGH_BUILTIN(s64) \
> + VGET_HIGH_BUILTIN(u8) \
> + VGET_HIGH_BUILTIN(u16) \
> + VGET_HIGH_BUILTIN(u32) \
> + VGET_HIGH_BUILTIN(u64) \
> + VGET_HIGH_BUILTIN(bf16)
> +
> typedef struct
> {
> const char *name;
> @@ -717,6 +734,9 @@ typedef struct
> #define VGET_LOW_BUILTIN(A) \
> AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
>
> +#define VGET_HIGH_BUILTIN(A) \
> + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
> +
> #undef VAR1
> #define VAR1(T, N, MAP, FLAG, A) \
> AARCH64_SIMD_BUILTIN_##T##_##N##A,
> @@ -753,6 +773,7 @@ enum aarch64_builtins
> /* SIMD intrinsic builtins. */
> AARCH64_SIMD_VREINTERPRET_BUILTINS
> AARCH64_SIMD_VGET_LOW_BUILTINS
> + AARCH64_SIMD_VGET_HIGH_BUILTINS
> /* ARMv8.3-A Pointer Authentication Builtins. */
> AARCH64_PAUTH_BUILTIN_AUTIA1716,
> AARCH64_PAUTH_BUILTIN_PACIA1716,
> @@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
> false \
> },
>
> -#define AARCH64_SIMD_VGET_LOW_BUILTINS \
> - VGET_LOW_BUILTIN(f16) \
> - VGET_LOW_BUILTIN(f32) \
> - VGET_LOW_BUILTIN(f64) \
> - VGET_LOW_BUILTIN(p8) \
> - VGET_LOW_BUILTIN(p16) \
> - VGET_LOW_BUILTIN(p64) \
> - VGET_LOW_BUILTIN(s8) \
> - VGET_LOW_BUILTIN(s16) \
> - VGET_LOW_BUILTIN(s32) \
> - VGET_LOW_BUILTIN(s64) \
> - VGET_LOW_BUILTIN(u8) \
> - VGET_LOW_BUILTIN(u16) \
> - VGET_LOW_BUILTIN(u32) \
> - VGET_LOW_BUILTIN(u64) \
> - VGET_LOW_BUILTIN(bf16)
> +#undef VGET_HIGH_BUILTIN
> +#define VGET_HIGH_BUILTIN(A) \
> + {"vget_high_" #A, \
> + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
> + 2, \
> + { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
> + { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
> + FLAG_AUTO_FP, \
> + false \
> + },
>
> static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
> AARCH64_SIMD_VREINTERPRET_BUILTINS
> AARCH64_SIMD_VGET_LOW_BUILTINS
> + AARCH64_SIMD_VGET_HIGH_BUILTINS
> };
>
>
> @@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
> #define VGET_LOW_BUILTIN(A) \
> case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
>
> +#undef VGET_HIGH_BUILTIN
> +#define VGET_HIGH_BUILTIN(A) \
> + case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
> +
> /* Try to fold a call to the built-in function with subcode FCODE. The
> function is passed the N_ARGS arguments in ARGS and it returns a value
> of type TYPE. Return the new expression on success and NULL_TREE on
> @@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
> {
> auto pos = BYTES_BIG_ENDIAN ? 64 : 0;
>
> + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
> + bitsize_int (pos));
> + }
> + AARCH64_SIMD_VGET_HIGH_BUILTINS
> + {
> + auto pos = BYTES_BIG_ENDIAN ? 0 : 64;
> +
> return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
> bitsize_int (pos));
> }
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> index a9f0558f8b6..e65f73d7ba2 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -65,9 +65,6 @@
> BUILTIN_VS (UNOP, ctz, 2, NONE)
> BUILTIN_VB (UNOP, popcount, 2, NONE)
>
> - /* Implemented by aarch64_get_high<mode>. */
> - BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP)
> -
> /* Implemented by aarch64_<sur>q<r>shl<mode>. */
> BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
> BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
> @@ -958,9 +955,6 @@
> VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf)
> VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf)
>
> - /* Implemented by aarch64_vget_hi_halfv8bf. */
> - VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf)
> -
> /* Implemented by aarch64_simd_<sur>mmlav16qi. */
> VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
> VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 875ea52b02f..c311888e4bd 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>"
> }
> )
>
> -(define_expand "aarch64_get_high<mode>"
> - [(match_operand:<VHALF> 0 "register_operand")
> - (match_operand:VQMOV 1 "register_operand")]
> - "TARGET_FLOAT"
> - {
> - rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> - emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
> - DONE;
> - }
> -)
> -
> (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
> [(set (match_operand:<VHALF> 0 "register_operand")
> (vec_select:<VHALF>
> @@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
> [(set_attr "type" "neon_dot<VDQSF:q>")]
> )
>
> -;; vget_high_bf16
> -(define_expand "aarch64_vget_hi_halfv8bf"
> - [(match_operand:V4BF 0 "register_operand")
> - (match_operand:V8BF 1 "register_operand")]
> - "TARGET_BF16_SIMD"
> -{
> - rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
> - emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
> - DONE;
> -})
> -
> ;; bfmmla
> (define_insn "aarch64_bfmmlaqv4sf"
> [(set (match_operand:V4SF 0 "register_operand" "=w")
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 92c2c5361cd..c4a09528ffd 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
> return __aarch64_vset_lane_any (__elem, __vec, __index);
> }
>
> -__extension__ extern __inline float16x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_f16 (float16x8_t __a)
> -{
> - return __builtin_aarch64_get_highv8hf (__a);
> -}
> -
> -__extension__ extern __inline float32x2_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_f32 (float32x4_t __a)
> -{
> - return __builtin_aarch64_get_highv4sf (__a);
> -}
> -
> -__extension__ extern __inline float64x1_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_f64 (float64x2_t __a)
> -{
> - return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)};
> -}
> -
> -__extension__ extern __inline poly8x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_p8 (poly8x16_t __a)
> -{
> - return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
> -}
> -
> -__extension__ extern __inline poly16x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_p16 (poly16x8_t __a)
> -{
> - return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
> -}
> -
> -__extension__ extern __inline poly64x1_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_p64 (poly64x2_t __a)
> -{
> - return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a);
> -}
> -
> -__extension__ extern __inline int8x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_s8 (int8x16_t __a)
> -{
> - return __builtin_aarch64_get_highv16qi (__a);
> -}
> -
> -__extension__ extern __inline int16x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_s16 (int16x8_t __a)
> -{
> - return __builtin_aarch64_get_highv8hi (__a);
> -}
> -
> -__extension__ extern __inline int32x2_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_s32 (int32x4_t __a)
> -{
> - return __builtin_aarch64_get_highv4si (__a);
> -}
> -
> -__extension__ extern __inline int64x1_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_s64 (int64x2_t __a)
> -{
> - return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)};
> -}
> -
> -__extension__ extern __inline uint8x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_u8 (uint8x16_t __a)
> -{
> - return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
> -}
> -
> -__extension__ extern __inline uint16x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_u16 (uint16x8_t __a)
> -{
> - return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
> -}
> -
> -__extension__ extern __inline uint32x2_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_u32 (uint32x4_t __a)
> -{
> - return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a);
> -}
> -
> -__extension__ extern __inline uint64x1_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_u64 (uint64x2_t __a)
> -{
> - return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)};
> -}
> -
>
> __extension__ extern __inline int8x16_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> @@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
> return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
> }
>
> -__extension__ extern __inline bfloat16x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vget_high_bf16 (bfloat16x8_t __a)
> -{
> - return __builtin_aarch64_vget_hi_halfv8bf (__a);
> -}
> -
> __extension__ extern __inline float32x4_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> vcvt_f32_bf16 (bfloat16x4_t __a)
> diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> new file mode 100644
> index 00000000000..9593fb685e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */
> +
> +#include <arm_neon.h>
> +
> +#define VARIANTS \
> +VARIANT (uint8x8_t, uint8x16_t, u8) \
> +VARIANT (uint16x4_t, uint16x8_t, u16) \
> +VARIANT (uint32x2_t, uint32x4_t, u32) \
> +VARIANT (uint64x1_t, uint64x2_t, u64) \
> +VARIANT (int8x8_t, int8x16_t, s8) \
> +VARIANT (int16x4_t, int16x8_t, s16) \
> +VARIANT (int32x2_t, int32x4_t, s32) \
> +VARIANT (int64x1_t, int64x2_t, s64) \
> +VARIANT (float16x4_t, float16x8_t, f16) \
> +VARIANT (float32x2_t, float32x4_t, f32) \
> +VARIANT (float64x1_t, float64x2_t, f64) \
> +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
> +
> +/* vget_high_* intrinsics should become BIT_FIELD_REF. */
> +#define VARIANT(TYPE64, TYPE128, SUFFIX) \
> +TYPE64 \
> +test_vget_high_##SUFFIX (TYPE128 vec) \
> +{ \
> + return vget_high_##SUFFIX (vec); \
> +}
> +
> +VARIANTS
> +
> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
> new file mode 100644
> index 00000000000..5928c3a4597
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target stdint_types_mbig_endian } */
> +/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */
> +
> +#include <arm_neon.h>
> +
> +#define VARIANTS \
> +VARIANT (uint8x8_t, uint8x16_t, u8) \
> +VARIANT (uint16x4_t, uint16x8_t, u16) \
> +VARIANT (uint32x2_t, uint32x4_t, u32) \
> +VARIANT (uint64x1_t, uint64x2_t, u64) \
> +VARIANT (int8x8_t, int8x16_t, s8) \
> +VARIANT (int16x4_t, int16x8_t, s16) \
> +VARIANT (int32x2_t, int32x4_t, s32) \
> +VARIANT (int64x1_t, int64x2_t, s64) \
> +VARIANT (float16x4_t, float16x8_t, f16) \
> +VARIANT (float32x2_t, float32x4_t, f32) \
> +VARIANT (float64x1_t, float64x2_t, f64) \
> +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
> +
> +/* vget_high_* intrinsics should become BIT_FIELD_REF. */
> +#define VARIANT(TYPE64, TYPE128, SUFFIX) \
> +TYPE64 \
> +test_vget_high_##SUFFIX (TYPE128 vec) \
> +{ \
> + return vget_high_##SUFFIX (vec); \
> +}
> +
> +VARIANTS
> +
> +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171]
2024-05-22 12:27 ` Richard Sandiford
@ 2024-05-22 20:08 ` Andrew Pinski
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Pinski @ 2024-05-22 20:08 UTC (permalink / raw)
To: Pengxuan Zheng, gcc-patches, richard.sandiford
On Wed, May 22, 2024 at 5:28 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Pengxuan Zheng <quic_pzheng@quicinc.com> writes:
> > This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_*
> > intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from
> > arm_neon.h to use the new intrinsics framework.
> >
> > PR target/102171
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS):
> > New macro to create definitions for all vget_high intrinsics.
> > (VGET_HIGH_BUILTIN): Likewise.
> > (enum aarch64_builtins): Add vget_high function codes.
> > (AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro.
> > (aarch64_general_fold_builtin): Fold vget_high calls.
> > * config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins.
> > * config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete.
> > (aarch64_vget_hi_halfv8bf): Likewise.
> > * config/aarch64/arm_neon.h (__attribute__): Delete.
> > (vget_high_f16): Likewise.
> > (vget_high_f32): Likewise.
> > (vget_high_f64): Likewise.
> > (vget_high_p8): Likewise.
> > (vget_high_p16): Likewise.
> > (vget_high_p64): Likewise.
> > (vget_high_s8): Likewise.
> > (vget_high_s16): Likewise.
> > (vget_high_s32): Likewise.
> > (vget_high_s64): Likewise.
> > (vget_high_u8): Likewise.
> > (vget_high_u16): Likewise.
> > (vget_high_u32): Likewise.
> > (vget_high_u64): Likewise.
> > (vget_high_bf16): Likewise.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/aarch64/vget_high_2.c: New test.
> > * gcc.target/aarch64/vget_high_2_be.c: New test.
>
> OK, thanks.
Pushed as r15-778-g1d1ef1c22752b3 .
Thanks,
Andrew
>
> Richard
>
> > Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
> > ---
> > gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++---
> > gcc/config/aarch64/aarch64-simd-builtins.def | 6 -
> > gcc/config/aarch64/aarch64-simd.md | 22 ----
> > gcc/config/aarch64/arm_neon.h | 105 ------------------
> > .../gcc.target/aarch64/vget_high_2.c | 30 +++++
> > .../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++
> > 6 files changed, 104 insertions(+), 149 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
> >
> > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> > index 11b888016ed..f8eeccb554d 100644
> > --- a/gcc/config/aarch64/aarch64-builtins.cc
> > +++ b/gcc/config/aarch64/aarch64-builtins.cc
> > @@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
> > VGET_LOW_BUILTIN(u64) \
> > VGET_LOW_BUILTIN(bf16)
> >
> > +#define AARCH64_SIMD_VGET_HIGH_BUILTINS \
> > + VGET_HIGH_BUILTIN(f16) \
> > + VGET_HIGH_BUILTIN(f32) \
> > + VGET_HIGH_BUILTIN(f64) \
> > + VGET_HIGH_BUILTIN(p8) \
> > + VGET_HIGH_BUILTIN(p16) \
> > + VGET_HIGH_BUILTIN(p64) \
> > + VGET_HIGH_BUILTIN(s8) \
> > + VGET_HIGH_BUILTIN(s16) \
> > + VGET_HIGH_BUILTIN(s32) \
> > + VGET_HIGH_BUILTIN(s64) \
> > + VGET_HIGH_BUILTIN(u8) \
> > + VGET_HIGH_BUILTIN(u16) \
> > + VGET_HIGH_BUILTIN(u32) \
> > + VGET_HIGH_BUILTIN(u64) \
> > + VGET_HIGH_BUILTIN(bf16)
> > +
> > typedef struct
> > {
> > const char *name;
> > @@ -717,6 +734,9 @@ typedef struct
> > #define VGET_LOW_BUILTIN(A) \
> > AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
> >
> > +#define VGET_HIGH_BUILTIN(A) \
> > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
> > +
> > #undef VAR1
> > #define VAR1(T, N, MAP, FLAG, A) \
> > AARCH64_SIMD_BUILTIN_##T##_##N##A,
> > @@ -753,6 +773,7 @@ enum aarch64_builtins
> > /* SIMD intrinsic builtins. */
> > AARCH64_SIMD_VREINTERPRET_BUILTINS
> > AARCH64_SIMD_VGET_LOW_BUILTINS
> > + AARCH64_SIMD_VGET_HIGH_BUILTINS
> > /* ARMv8.3-A Pointer Authentication Builtins. */
> > AARCH64_PAUTH_BUILTIN_AUTIA1716,
> > AARCH64_PAUTH_BUILTIN_PACIA1716,
> > @@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
> > false \
> > },
> >
> > -#define AARCH64_SIMD_VGET_LOW_BUILTINS \
> > - VGET_LOW_BUILTIN(f16) \
> > - VGET_LOW_BUILTIN(f32) \
> > - VGET_LOW_BUILTIN(f64) \
> > - VGET_LOW_BUILTIN(p8) \
> > - VGET_LOW_BUILTIN(p16) \
> > - VGET_LOW_BUILTIN(p64) \
> > - VGET_LOW_BUILTIN(s8) \
> > - VGET_LOW_BUILTIN(s16) \
> > - VGET_LOW_BUILTIN(s32) \
> > - VGET_LOW_BUILTIN(s64) \
> > - VGET_LOW_BUILTIN(u8) \
> > - VGET_LOW_BUILTIN(u16) \
> > - VGET_LOW_BUILTIN(u32) \
> > - VGET_LOW_BUILTIN(u64) \
> > - VGET_LOW_BUILTIN(bf16)
> > +#undef VGET_HIGH_BUILTIN
> > +#define VGET_HIGH_BUILTIN(A) \
> > + {"vget_high_" #A, \
> > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
> > + 2, \
> > + { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
> > + { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
> > + FLAG_AUTO_FP, \
> > + false \
> > + },
> >
> > static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
> > AARCH64_SIMD_VREINTERPRET_BUILTINS
> > AARCH64_SIMD_VGET_LOW_BUILTINS
> > + AARCH64_SIMD_VGET_HIGH_BUILTINS
> > };
> >
> >
> > @@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
> > #define VGET_LOW_BUILTIN(A) \
> > case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
> >
> > +#undef VGET_HIGH_BUILTIN
> > +#define VGET_HIGH_BUILTIN(A) \
> > + case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
> > +
> > /* Try to fold a call to the built-in function with subcode FCODE. The
> > function is passed the N_ARGS arguments in ARGS and it returns a value
> > of type TYPE. Return the new expression on success and NULL_TREE on
> > @@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
> > {
> > auto pos = BYTES_BIG_ENDIAN ? 64 : 0;
> >
> > + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
> > + bitsize_int (pos));
> > + }
> > + AARCH64_SIMD_VGET_HIGH_BUILTINS
> > + {
> > + auto pos = BYTES_BIG_ENDIAN ? 0 : 64;
> > +
> > return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
> > bitsize_int (pos));
> > }
> > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> > index a9f0558f8b6..e65f73d7ba2 100644
> > --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> > @@ -65,9 +65,6 @@
> > BUILTIN_VS (UNOP, ctz, 2, NONE)
> > BUILTIN_VB (UNOP, popcount, 2, NONE)
> >
> > - /* Implemented by aarch64_get_high<mode>. */
> > - BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP)
> > -
> > /* Implemented by aarch64_<sur>q<r>shl<mode>. */
> > BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
> > BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
> > @@ -958,9 +955,6 @@
> > VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf)
> > VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf)
> >
> > - /* Implemented by aarch64_vget_hi_halfv8bf. */
> > - VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf)
> > -
> > /* Implemented by aarch64_simd_<sur>mmlav16qi. */
> > VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
> > VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
> > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> > index 875ea52b02f..c311888e4bd 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>"
> > }
> > )
> >
> > -(define_expand "aarch64_get_high<mode>"
> > - [(match_operand:<VHALF> 0 "register_operand")
> > - (match_operand:VQMOV 1 "register_operand")]
> > - "TARGET_FLOAT"
> > - {
> > - rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> > - emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
> > - DONE;
> > - }
> > -)
> > -
> > (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
> > [(set (match_operand:<VHALF> 0 "register_operand")
> > (vec_select:<VHALF>
> > @@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
> > [(set_attr "type" "neon_dot<VDQSF:q>")]
> > )
> >
> > -;; vget_high_bf16
> > -(define_expand "aarch64_vget_hi_halfv8bf"
> > - [(match_operand:V4BF 0 "register_operand")
> > - (match_operand:V8BF 1 "register_operand")]
> > - "TARGET_BF16_SIMD"
> > -{
> > - rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
> > - emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
> > - DONE;
> > -})
> > -
> > ;; bfmmla
> > (define_insn "aarch64_bfmmlaqv4sf"
> > [(set (match_operand:V4SF 0 "register_operand" "=w")
> > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> > index 92c2c5361cd..c4a09528ffd 100644
> > --- a/gcc/config/aarch64/arm_neon.h
> > +++ b/gcc/config/aarch64/arm_neon.h
> > @@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index)
> > return __aarch64_vset_lane_any (__elem, __vec, __index);
> > }
> >
> > -__extension__ extern __inline float16x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_f16 (float16x8_t __a)
> > -{
> > - return __builtin_aarch64_get_highv8hf (__a);
> > -}
> > -
> > -__extension__ extern __inline float32x2_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_f32 (float32x4_t __a)
> > -{
> > - return __builtin_aarch64_get_highv4sf (__a);
> > -}
> > -
> > -__extension__ extern __inline float64x1_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_f64 (float64x2_t __a)
> > -{
> > - return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)};
> > -}
> > -
> > -__extension__ extern __inline poly8x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_p8 (poly8x16_t __a)
> > -{
> > - return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
> > -}
> > -
> > -__extension__ extern __inline poly16x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_p16 (poly16x8_t __a)
> > -{
> > - return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
> > -}
> > -
> > -__extension__ extern __inline poly64x1_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_p64 (poly64x2_t __a)
> > -{
> > - return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a);
> > -}
> > -
> > -__extension__ extern __inline int8x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_s8 (int8x16_t __a)
> > -{
> > - return __builtin_aarch64_get_highv16qi (__a);
> > -}
> > -
> > -__extension__ extern __inline int16x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_s16 (int16x8_t __a)
> > -{
> > - return __builtin_aarch64_get_highv8hi (__a);
> > -}
> > -
> > -__extension__ extern __inline int32x2_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_s32 (int32x4_t __a)
> > -{
> > - return __builtin_aarch64_get_highv4si (__a);
> > -}
> > -
> > -__extension__ extern __inline int64x1_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_s64 (int64x2_t __a)
> > -{
> > - return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)};
> > -}
> > -
> > -__extension__ extern __inline uint8x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_u8 (uint8x16_t __a)
> > -{
> > - return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a);
> > -}
> > -
> > -__extension__ extern __inline uint16x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_u16 (uint16x8_t __a)
> > -{
> > - return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a);
> > -}
> > -
> > -__extension__ extern __inline uint32x2_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_u32 (uint32x4_t __a)
> > -{
> > - return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a);
> > -}
> > -
> > -__extension__ extern __inline uint64x1_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_u64 (uint64x2_t __a)
> > -{
> > - return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)};
> > -}
> > -
> >
> > __extension__ extern __inline int8x16_t
> > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > @@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
> > return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
> > }
> >
> > -__extension__ extern __inline bfloat16x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -vget_high_bf16 (bfloat16x8_t __a)
> > -{
> > - return __builtin_aarch64_vget_hi_halfv8bf (__a);
> > -}
> > -
> > __extension__ extern __inline float32x4_t
> > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > vcvt_f32_bf16 (bfloat16x4_t __a)
> > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> > new file mode 100644
> > index 00000000000..9593fb685e3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +#define VARIANTS \
> > +VARIANT (uint8x8_t, uint8x16_t, u8) \
> > +VARIANT (uint16x4_t, uint16x8_t, u16) \
> > +VARIANT (uint32x2_t, uint32x4_t, u32) \
> > +VARIANT (uint64x1_t, uint64x2_t, u64) \
> > +VARIANT (int8x8_t, int8x16_t, s8) \
> > +VARIANT (int16x4_t, int16x8_t, s16) \
> > +VARIANT (int32x2_t, int32x4_t, s32) \
> > +VARIANT (int64x1_t, int64x2_t, s64) \
> > +VARIANT (float16x4_t, float16x8_t, f16) \
> > +VARIANT (float32x2_t, float32x4_t, f32) \
> > +VARIANT (float64x1_t, float64x2_t, f64) \
> > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
> > +
> > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */
> > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \
> > +TYPE64 \
> > +test_vget_high_##SUFFIX (TYPE128 vec) \
> > +{ \
> > + return vget_high_##SUFFIX (vec); \
> > +}
> > +
> > +VARIANTS
> > +
> > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
> > new file mode 100644
> > index 00000000000..5928c3a4597
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c
> > @@ -0,0 +1,31 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target stdint_types_mbig_endian } */
> > +/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +#define VARIANTS \
> > +VARIANT (uint8x8_t, uint8x16_t, u8) \
> > +VARIANT (uint16x4_t, uint16x8_t, u16) \
> > +VARIANT (uint32x2_t, uint32x4_t, u32) \
> > +VARIANT (uint64x1_t, uint64x2_t, u64) \
> > +VARIANT (int8x8_t, int8x16_t, s8) \
> > +VARIANT (int16x4_t, int16x8_t, s16) \
> > +VARIANT (int32x2_t, int32x4_t, s32) \
> > +VARIANT (int64x1_t, int64x2_t, s64) \
> > +VARIANT (float16x4_t, float16x8_t, f16) \
> > +VARIANT (float32x2_t, float32x4_t, f32) \
> > +VARIANT (float64x1_t, float64x2_t, f64) \
> > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16)
> > +
> > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */
> > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \
> > +TYPE64 \
> > +test_vget_high_##SUFFIX (TYPE128 vec) \
> > +{ \
> > + return vget_high_##SUFFIX (vec); \
> > +}
> > +
> > +VARIANTS
> > +
> > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-05-22 20:09 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-22 2:05 [PATCH] aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171] Pengxuan Zheng
2024-05-22 12:27 ` Richard Sandiford
2024-05-22 20:08 ` Andrew Pinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).