From 08c5cf4b5c6c846a4f62b6ad8776f2388b135e55 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Thu, 7 Apr 2016 14:48:29 +0100 Subject: [PATCH 06/17] [PATCH 6/17][ARM] Add data processing intrinsics for float16_t. 2016-05-17 Matthew Wahab * config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and V4HF modes. (arm_evpc_neon_vtrn): Likewise. (arm_evpc_neon_vrev): Likewise. (arm_evpc_neon_vext): Likewise. * config/arm/arm_neon.h (vbsl_f16): New. (vbslq_f16): New. (vdup_n_f16): New. (vdupq_n_f16): New. (vdup_lane_f16): New. (vdupq_lane_f16): New. (vext_f16): New. (vextq_f16): New. (vmov_n_f16): New. (vmovq_n_f16): New. (vrev64_f16): New. (vrev64q_f16): New. (vtrn_f16): New. (vtrnq_f16): New. (vuzp_f16): New. (vuzpq_f16): New. (vzip_f16): New. (vzipq_f16): New. * config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants). (vdup_lane): New (v8hf, v4hf variants). (vext): New (v8hf, v4hf variants). (vbsl): New (v8hf, v4hf variants). * config/arm/iterators.md (VDQWH): New. (VH): New. (V_double_vector_mode): Add V8HF and V4HF. Fix white-space. (Scalar_mul_8_16): Fix white-space. (Is_d_reg): Add V4HF and V8HF. * config/arm/neon.md (neon_vdup_lane_internal): New. (neon_vdup_lane): New. (neon_vtrn_internal): Replace VDQW with VDQWH. (*neon_vtrn_insn): Likewise. (neon_vzip_internal): Likewise. Also fix white-space. (*neon_vzip_insn): Likewise (neon_vuzp_internal): Likewise. (*neon_vuzp_insn): Likewise * config/arm/vec-common.md (vec_perm_const): New. testsuite/ 2016-05-17 Matthew Wahab * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (FP16_SUPPORTED): New (expected-hfloat-16x4): Make conditional on __fp16 support. (expected-hfloat-16x8): Likewise. (vdup_n_f16): Disable for non-AArch64 targets. * gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support for testing __fp16. * gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests, conditional on FP16_SUPPORTED. * gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise. --- gcc/config/arm/arm.c | 10 ++ gcc/config/arm/arm_neon.h | 175 +++++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 4 + gcc/config/arm/iterators.md | 26 +-- gcc/config/arm/neon.md | 115 +++++++++----- gcc/config/arm/vec-common.md | 14 ++ .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 13 +- .../gcc.target/aarch64/advsimd-intrinsics/vbsl.c | 28 ++++ .../aarch64/advsimd-intrinsics/vdup-vmov.c | 75 +++++++++ .../aarch64/advsimd-intrinsics/vdup_lane.c | 23 +++ .../gcc.target/aarch64/advsimd-intrinsics/vext.c | 30 ++++ .../gcc.target/aarch64/advsimd-intrinsics/vrev.c | 20 +++ .../aarch64/advsimd-intrinsics/vshuffle.inc | 42 ++++- .../gcc.target/aarch64/advsimd-intrinsics/vtrn.c | 20 +++ .../gcc.target/aarch64/advsimd-intrinsics/vuzp.c | 20 +++ .../gcc.target/aarch64/advsimd-intrinsics/vzip.c | 20 +++ 16 files changed, 586 insertions(+), 49 deletions(-) diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 26a8a48..6892040 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -28420,6 +28420,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vuzpv8qi_internal; break; case V8HImode: gen = gen_neon_vuzpv8hi_internal; break; case V4HImode: gen = gen_neon_vuzpv4hi_internal; break; + case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break; + case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break; case V4SImode: gen = gen_neon_vuzpv4si_internal; break; case V2SImode: gen = gen_neon_vuzpv2si_internal; break; case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break; @@ -28493,6 +28495,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vzipv8qi_internal; break; case V8HImode: gen = gen_neon_vzipv8hi_internal; break; case V4HImode: gen = gen_neon_vzipv4hi_internal; break; + case V8HFmode: gen = gen_neon_vzipv8hf_internal; break; + case V4HFmode: gen = gen_neon_vzipv4hf_internal; break; case V4SImode: gen = gen_neon_vzipv4si_internal; break; case V2SImode: gen = gen_neon_vzipv2si_internal; break; case V2SFmode: gen = gen_neon_vzipv2sf_internal; break; @@ -28545,6 +28549,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vrev32v8qi; break; case V8HImode: gen = gen_neon_vrev64v8hi; break; case V4HImode: gen = gen_neon_vrev64v4hi; break; + case V8HFmode: gen = gen_neon_vrev64v8hf; break; + case V4HFmode: gen = gen_neon_vrev64v4hf; break; default: return false; } @@ -28628,6 +28634,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) case V8QImode: gen = gen_neon_vtrnv8qi_internal; break; case V8HImode: gen = gen_neon_vtrnv8hi_internal; break; case V4HImode: gen = gen_neon_vtrnv4hi_internal; break; + case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break; + case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break; case V4SImode: gen = gen_neon_vtrnv4si_internal; break; case V2SImode: gen = gen_neon_vtrnv2si_internal; break; case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break; @@ -28703,6 +28711,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d) case V8HImode: gen = gen_neon_vextv8hi; break; case V2SImode: gen = gen_neon_vextv2si; break; case V4SImode: gen = gen_neon_vextv4si; break; + case V4HFmode: gen = gen_neon_vextv4hf; break; + case V8HFmode: gen = gen_neon_vextv8hf; break; case V2SFmode: gen = gen_neon_vextv2sf; break; case V4SFmode: gen = gen_neon_vextv4sf; break; case V2DImode: gen = gen_neon_vextv2di; break; diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 07503d7..5b433b4 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -14830,6 +14830,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) #pragma GCC pop_options + /* Half-precision data processing intrinsics. */ +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c) +{ + return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c) +{ + return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vdup_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vdupq_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vdup_lane_f16 (float16x4_t __a, const int __b) +{ + return __builtin_neon_vdup_lanev4hf (__a, __b); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vdupq_lane_f16 (float16x4_t __a, const int __b) +{ + return __builtin_neon_vdup_lanev8hf (__a, __b); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vext_f16 (float16x4_t __a, float16x4_t __b, const int __c) +{ + return __builtin_neon_vextv4hf (__a, __b, __c); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c) +{ + return __builtin_neon_vextv8hf (__a, __b, __c); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vmov_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv4hf (__a); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vmovq_n_f16 (float16_t __a) +{ + return __builtin_neon_vdup_nv8hf (__a); +} + +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vrev64_f16 (float16x4_t __a) +{ + return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 }); +} + +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vrev64q_f16 (float16x8_t __a) +{ + return + (float16x8_t)__builtin_shuffle (__a, + (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vtrn_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vtrnq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 }); +#endif + return __rv; +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vuzp_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vuzpq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 5, 7, 1, 3, 13, 15, 9, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 4, 6, 0, 2, 12, 14, 8, 10 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 }); +#endif + return __rv; +} + +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vzip_f16 (float16x4_t __a, float16x4_t __b) +{ + float16x4x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 }); +#endif + return __rv; +} + +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vzipq_f16 (float16x8_t __a, float16x8_t __b) +{ + float16x8x2_t __rv; +#ifdef __ARM_BIG_ENDIAN + __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 10, 2, 11, 3, 8, 0, 9, 1 }); + __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) + { 14, 6, 15, 7, 12, 4, 13, 5 }); +#else + __rv.val[0] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 }); + __rv.val[1] = __builtin_shuffle (__a, __b, + (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 }); +#endif + return __rv; +} + +#endif + #ifdef __cplusplus } #endif diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index d9fac78..a4ba516 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane, VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di) VAR10 (UNOP, vdup_n, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (UNOP, vdup_n, v8hf, v4hf) VAR10 (GETLANE, vdup_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (GETLANE, vdup_lane, v8hf, v4hf) VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) @@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si) VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si) VAR10 (SETLANE, vext, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (SETLANE, vext, v8hf, v4hf) VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) VAR2 (UNOP, vrev16, v8qi, v16qi) @@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf) VAR1 (UNOP, vcvtv4hf, v4sf) VAR10 (TERNOP, vbsl, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR2 (TERNOP, vbsl, v8hf, v4hf) VAR2 (UNOP, copysignf, v2sf, v4sf) VAR2 (UNOP, vrintn, v2sf, v4sf) VAR2 (UNOP, vrinta, v2sf, v4sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index aba1023..3f9d9e4 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -119,6 +119,10 @@ ;; All supported vector modes (except those with 64-bit integer elements). (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) +;; All supported vector modes including 16-bit float modes. +(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF + V8HF V4HF]) + ;; Supported integer vector modes (not 64 bit elements). (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI]) @@ -174,6 +178,9 @@ ;; Modes with 8-bit, 16-bit and 32-bit elements. (define_mode_iterator VU [V16QI V8HI V4SI]) +;; Vector modes for 16-bit floating-point support. +(define_mode_iterator VH [V8HF V4HF]) + ;; Iterators used for fixed-point support. (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA]) @@ -475,9 +482,10 @@ ;; Used for neon_vdup_lane, where the second operand is double-sized ;; even when the first one is quad. (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") - (V4SI "V2SI") (V4SF "V2SF") - (V8QI "V8QI") (V4HI "V4HI") - (V2SI "V2SI") (V2SF "V2SF")]) + (V4SI "V2SI") (V4SF "V2SF") + (V8QI "V8QI") (V4HI "V4HI") + (V2SI "V2SI") (V2SF "V2SF") + (V8HF "V4HF") (V4HF "V4HF")]) ;; Mode of result of comparison operations (and bit-select operand 1). (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") @@ -582,17 +590,17 @@ (DI "false") (V2DI "false")]) (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") - (V4HI "true") (V8HI "true") - (V2SI "false") (V4SI "false") - (V2SF "false") (V4SF "false") - (DI "false") (V2DI "false")]) - + (V4HI "true") (V8HI "true") + (V2SI "false") (V4SI "false") + (V2SF "false") (V4SF "false") + (DI "false") (V2DI "false")]) (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") (V4HI "true") (V8HI "false") (V2SI "true") (V4SI "false") (V2SF "true") (V4SF "false") - (DI "true") (V2DI "false")]) + (DI "true") (V2DI "false") + (V4HF "true") (V8HF "false")]) (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") (V4HF "4") (V8HF "8") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 6b4896d..5fcc991 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_dup")] ) +(define_insn "neon_vdup_lane_internal" + [(set (match_operand:VH 0 "s_register_operand" "=w") + (vec_duplicate:VH + (vec_select: + (match_operand: 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_NEON && TARGET_FP16" +{ + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + if () + return "vdup.\t%P0, %P1[%c2]"; + else + return "vdup.\t%q0, %P1[%c2]"; +} + [(set_attr "type" "neon_dup")] +) + (define_expand "neon_vdup_lane" [(match_operand:VDQW 0 "s_register_operand" "=w") (match_operand: 1 "s_register_operand" "w") @@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN) DONE; }) +(define_expand "neon_vdup_lane" + [(match_operand:VH 0 "s_register_operand") + (match_operand: 1 "s_register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_NEON && TARGET_FP16" +{ + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_UNIT_BITSIZE (mode); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + emit_insn (gen_neon_vdup_lane_internal (operands[0], operands[1], + operands[2])); + DONE; +}) + ; Scalar index is ignored, since only zero is valid here. (define_expand "neon_vdup_lanedi" [(match_operand:DI 0 "s_register_operand" "=w") @@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vtrn_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] UNSPEC_VTRN1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] + (set (match_operand:VDQWH 3 "s_register_operand") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vtrn_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VTRN2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VTRN1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VTRN2))] "TARGET_NEON" "vtrn.\t%0, %2" [(set_attr "type" "neon_permute")] @@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vzip_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] + UNSPEC_VZIP1)) + (set (match_operand:VDQWH 3 "s_register_operand") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vzip_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VZIP2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VZIP1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VZIP2))] "TARGET_NEON" "vzip.\t%0, %2" [(set_attr "type" "neon_zip")] @@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN) (define_expand "neon_vuzp_internal" [(parallel - [(set (match_operand:VDQW 0 "s_register_operand" "") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") - (match_operand:VDQW 2 "s_register_operand" "")] + [(set (match_operand:VDQWH 0 "s_register_operand") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 3 "s_register_operand" "") - (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] + (set (match_operand:VDQWH 3 "s_register_operand" "") + (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] "TARGET_NEON" "" ) ;; Note: Different operand numbering to handle tied registers correctly. (define_insn "*neon_vuzp_insn" - [(set (match_operand:VDQW 0 "s_register_operand" "=&w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=&w") - (unspec:VDQW [(match_dup 1) (match_dup 3)] - UNSPEC_VUZP2))] + [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") + (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") + (match_operand:VDQWH 3 "s_register_operand" "2")] + UNSPEC_VUZP1)) + (set (match_operand:VDQWH 2 "s_register_operand" "=&w") + (unspec:VDQWH [(match_dup 1) (match_dup 3)] + UNSPEC_VUZP2))] "TARGET_NEON" "vuzp.\t%0, %2" [(set_attr "type" "neon_zip")] diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index ce98f71..645b01e 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -124,6 +124,20 @@ FAIL; }) +(define_expand "vec_perm_const" + [(match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand") + (match_operand:VH 2 "s_register_operand") + (match_operand: 3)] + "TARGET_NEON" +{ + if (arm_expand_vec_perm_const (operands[0], operands[1], + operands[2], operands[3])) + DONE; + else + FAIL; +}) + (define_expand "vec_perm" [(match_operand:VE 0 "s_register_operand" "") (match_operand:VE 1 "s_register_operand" "") diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 49fbd84..001e320 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t); extern void *memcpy(void *, const void *, size_t); extern size_t strlen(const char *); +/* Helper macro to select FP16 tests. */ +#if (!defined (__aarch64__) \ + && (defined (__ARM_FP16_FORMAT_IEEE) \ + || defined (__ARM_FP16_FORMAT_ALTERNATIVE))) +#define FP16_SUPPORTED (1) +#else +#undef FP16_SUPPORTED +#endif + /* Various string construction helpers. */ /* @@ -500,7 +509,9 @@ static void clean_results (void) /* Helpers to initialize vectors. */ #define VDUP(VAR, Q, T1, T2, W, N, V) \ VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +#if (defined (__aarch64__) \ + && (defined (__ARM_FP16_FORMAT_IEEE) \ + || defined (__ARM_FP16_FORMAT_ALTERNATIVE))) /* Work around that there is no vdup_n_f16 intrinsic. */ #define vdup_n_f16(VAL) \ __extension__ \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c index c4fdbb4..e9b3dfd 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89, + 0xcb09, 0xca89 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf6, 0xf6, 0xf6, 0xf6, @@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89, + 0xcb09, 0xca89, + 0xca09, 0xc989, + 0xc909, 0xc889 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001, 0xc1600001, 0xc1500001 }; @@ -66,6 +76,10 @@ void exec_vbsl (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); + VLOAD(vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); @@ -80,6 +94,9 @@ void exec_vbsl (void) VDUP(vector2, , uint, u, 16, 4, 0xFFF2); VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); +#if defined (FP16_SUPPORTED) + VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */ +#endif VDUP(vector2, , float, f, 32, 2, -30.3f); VDUP(vector2, , poly, p, 8, 8, 0xF3); VDUP(vector2, , poly, p, 16, 4, 0xFFF2); @@ -94,6 +111,9 @@ void exec_vbsl (void) VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); VDUP(vector2, q, poly, p, 8, 16, 0xF3); VDUP(vector2, q, poly, p, 16, 8, 0xFFF2); +#if defined (FP16_SUPPORTED) + VDUP(vector2, q, float, f, 16, 8, -2.4f); +#endif VDUP(vector2, q, float, f, 32, 4, -30.4f); VDUP(vector_first, , uint, u, 8, 8, 0xF4); @@ -111,10 +131,18 @@ void exec_vbsl (void) TEST_VBSL(uint, , poly, p, 16, 4); TEST_VBSL(uint, q, poly, p, 8, 16); TEST_VBSL(uint, q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VBSL(uint, , float, f, 16, 4); + TEST_VBSL(uint, q, float, f, 16, 8); +#endif TEST_VBSL(uint, , float, f, 32, 2); TEST_VBSL(uint, q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c index 22d45d5..aef4173 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00, + 0xcc00, 0xcc00 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, @@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00, + 0xcc00, 0xcc00, + 0xcc00, 0xcc00, + 0xcc00, 0xcc00 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, 0xc1800000, 0xc1800000 }; @@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80, + 0xcb80, 0xcb80 }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, @@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80, + 0xcb80, 0xcb80, + 0xcb80, 0xcb80, + 0xcb80, 0xcb80 }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; @@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00, + 0xcb00, 0xcb00 }; +#endif VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, @@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00, + 0xcb00, 0xcb00, + 0xcb00, 0xcb00, + 0xcb00, 0xcb00 }; +#endif VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, 0xc1600000, 0xc1600000 }; @@ -171,6 +201,9 @@ void exec_vdup_vmov (void) TEST_VDUP(, uint, u, 64, 1); TEST_VDUP(, poly, p, 8, 8); TEST_VDUP(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VDUP(, float, f, 16, 4); +#endif TEST_VDUP(, float, f, 32, 2); TEST_VDUP(q, int, s, 8, 16); @@ -183,8 +216,26 @@ void exec_vdup_vmov (void) TEST_VDUP(q, uint, u, 64, 2); TEST_VDUP(q, poly, p, 8, 16); TEST_VDUP(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VDUP(q, float, f, 16, 8); +#endif TEST_VDUP(q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + switch (i) { + case 0: + CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); + break; + case 1: + CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); + break; + case 2: + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); + break; + default: + abort(); + } +#else switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); @@ -198,6 +249,7 @@ void exec_vdup_vmov (void) default: abort(); } +#endif } /* Do the same tests with vmov. Use the same expected results. */ @@ -216,6 +268,9 @@ void exec_vdup_vmov (void) TEST_VMOV(, uint, u, 64, 1); TEST_VMOV(, poly, p, 8, 8); TEST_VMOV(, poly, p, 16, 4); +#if defined (FP16_SUPPORTED) + TEST_VMOV(, float, f, 16, 4); +#endif TEST_VMOV(, float, f, 32, 2); TEST_VMOV(q, int, s, 8, 16); @@ -228,8 +283,26 @@ void exec_vdup_vmov (void) TEST_VMOV(q, uint, u, 64, 2); TEST_VMOV(q, poly, p, 8, 16); TEST_VMOV(q, poly, p, 16, 8); +#if defined (FP16_SUPPORTED) + TEST_VMOV(q, float, f, 16, 8); +#endif TEST_VMOV(q, float, f, 32, 4); +#if defined (FP16_SUPPORTED) + switch (i) { + case 0: + CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); + break; + case 1: + CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); + break; + case 2: + CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); + break; + default: + abort(); + } +#else switch (i) { case 0: CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); @@ -243,6 +316,8 @@ void exec_vdup_vmov (void) default: abort(); } +#endif + } } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c index ef708dc..c4b8f14 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c @@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, @@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80, + 0xca80, 0xca80, + 0xca80, 0xca80, + 0xca80, 0xca80 }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, 0xc1700000, 0xc1700000 }; @@ -63,6 +73,9 @@ void exec_vdup_lane (void) clean_results (); TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD(vector, buffer, , float, f, 16, 4); +#endif VLOAD(vector, buffer, , float, f, 32, 2); /* Choose lane arbitrarily. */ @@ -76,6 +89,9 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANE(, float, f, 16, 4, 4, 3); +#endif TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); @@ -88,9 +104,16 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); +#if defined (FP16_SUPPORTED) + TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3); +#endif TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c index 98f88a6..908294a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, @@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd, 0x4204cccd, 0x4204cccd }; @@ -60,6 +70,10 @@ void exec_vext (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#ifdef FP16_SUPPORTED + VLOAD(vector1, buffer, , float, f, 16, 4); + VLOAD(vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); @@ -74,6 +88,9 @@ void exec_vext (void) VDUP(vector2, , uint, u, 64, 1, 0x88); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); @@ -86,6 +103,9 @@ void exec_vext (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif VDUP(vector2, q, float, f, 32, 4, 33.2f); /* Choose arbitrary extract offsets. */ @@ -99,6 +119,9 @@ void exec_vext (void) TEST_VEXT(, uint, u, 64, 1, 0); TEST_VEXT(, poly, p, 8, 8, 6); TEST_VEXT(, poly, p, 16, 4, 2); +#if defined (FP16_SUPPORTED) + TEST_VEXT(, float, f, 16, 4, 2); +#endif TEST_VEXT(, float, f, 32, 2, 1); TEST_VEXT(q, int, s, 8, 16, 14); @@ -111,9 +134,16 @@ void exec_vext (void) TEST_VEXT(q, uint, u, 64, 2, 1); TEST_VEXT(q, poly, p, 8, 16, 12); TEST_VEXT(q, poly, p, 16, 8, 6); +#if defined (FP16_SUPPORTED) + TEST_VEXT(q, float, f, 16, 8, 7); +#endif TEST_VEXT(q, float, f, 32, 4, 3); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS (TEST_MSG, ""); +#else CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c index 3b574da..0c01318 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c @@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00, + 0xcb80, 0xcc00 }; +#endif VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0, @@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8 }; VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00, + 0xcb80, 0xcc00, + 0xc880, 0xc900, + 0xc980, 0xca00 }; +#endif VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, 0xc1500000, 0xc1600000 }; @@ -104,6 +114,10 @@ void exec_vrev (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if defined (FP16_SUPPORTED) + VLOAD (vector, buffer, , float, f, 16, 4); + VLOAD (vector, buffer, q, float, f, 16, 8); +#endif VLOAD(vector, buffer, , float, f, 32, 2); VLOAD(vector, buffer, q, float, f, 32, 4); @@ -187,6 +201,12 @@ void exec_vrev (void) CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); +#if defined (FP16_SUPPORTED) + TEST_VREV (, float, f, 16, 4, 64); + TEST_VREV (q, float, f, 16, 8, 64); + CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, ""); + CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, ""); +#endif TEST_VREV(, float, f, 32, 2, 64); TEST_VREV(q, float, f, 32, 4, 64); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, ""); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc index b55a205..ad5bf31 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc @@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void) DECL_VSHUFFLE(float, 32, 4) DECL_ALL_VSHUFFLE(); +#if defined (FP16_SUPPORTED) + DECL_VSHUFFLE (float, 16, 4); + DECL_VSHUFFLE (float, 16, 8); +#endif /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#if defined (FP16_SUPPORTED) + VLOAD (vector1, buffer, , float, f, 16, 4); + VLOAD (vector1, buffer, q, float, f, 16, 8); +#endif VLOAD(vector1, buffer, , float, f, 32, 2); VLOAD(vector1, buffer, q, float, f, 32, 4); @@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ +#endif VDUP(vector2, , float, f, 32, 2, 33.6f); VDUP(vector2, q, int, s, 8, 16, 0x11); @@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 32, 4, 0x77); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); +#if defined (FP16_SUPPORTED) + VDUP (vector2, q, float, f, 16, 8, 14.6f); +#endif VDUP(vector2, q, float, f, 32, 4, 33.8f); - + #define TEST_ALL_VSHUFFLE(INSN) \ TEST_VSHUFFLE(INSN, , int, s, 8, 8); \ TEST_VSHUFFLE(INSN, , int, s, 16, 4); \ @@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void) TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \ TEST_VSHUFFLE(INSN, q, float, f, 32, 4) +#define TEST_VSHUFFLE_FP16(INSN) \ + TEST_VSHUFFLE(INSN, , float, f, 16, 4); \ + TEST_VSHUFFLE(INSN, q, float, f, 16, 8); + #define TEST_ALL_EXTRA_CHUNKS() \ TEST_EXTRA_CHUNK(int, 8, 8, 1); \ TEST_EXTRA_CHUNK(int, 16, 4, 1); \ @@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void) CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ - } \ + } + +#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \ + { \ + CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \ + CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \ + } clean_results (); /* Execute the tests. */ TEST_ALL_VSHUFFLE(INSN_NAME); +#if defined (FP16_SUPPORTED) + TEST_VSHUFFLE_FP16 (INSN_NAME); +#endif CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)"); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)"); +#endif TEST_ALL_EXTRA_CHUNKS(); +#if defined (FP16_SUPPORTED) + TEST_EXTRA_CHUNK (float, 16, 4, 1); + TEST_EXTRA_CHUNK (float, 16, 8, 1); +#endif + CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)"); +#if defined (FP16_SUPPORTED) + CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)"); +#endif } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c index 2c4a09c..ea2d8d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c @@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, 0xf2, 0xf3, 0x11, 0x11, @@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0x4b4d, 0x4b4d, + 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x42073333, 0x42073333 }; @@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, 0xfa, 0xfb, 0x11, 0x11, @@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980, + 0x4b4d, 0x4b4d, + 0xc900, 0xc880, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000, 0x42073333, 0x42073333 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c index ab6e576..43b49ca 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, @@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, + 0xcb00, 0xca80, + 0xca00, 0xc980, + 0xc900, 0xc880 }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0xc1600000, 0xc1500000 }; @@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, @@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333, 0x42073333, 0x42073333 }; diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c index b5fe516..20f4f5d 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11, 0xf1, 0xf9, 0x11, 0x11, @@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00, + 0x4b4d, 0x4b4d, + 0xcb80, 0xc980, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, 0x42073333, 0x42073333 }; @@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11, 0xf5, 0xfd, 0x11, 0x11, @@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +#if defined (FP16_SUPPORTED) +VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900, + 0x4b4d, 0x4b4d, + 0xca80, 0xc880, + 0x4b4d, 0x4b4d }; +#endif VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, 0x42073333, 0x42073333 }; -- 2.1.4