Hi All, This implements the new widening reduction optab in the backend. Instead of introducing a duplicate definition for the same thing I have renamed the intrinsics defintions to use the same optab. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (saddlv, uaddlv): Rename to reduc_splus_widen_scal_ and reduc_uplus_widen_scal_ respectively. * config/aarch64/aarch64-simd.md (aarch64_addlv): Renamed to ... (reduc_plus_widen_scal_): ... This. * config/aarch64/arm_neon.h (vaddlv_s8, vaddlv_s16, vaddlv_u8, vaddlv_u16, vaddlvq_s8, vaddlvq_s16, vaddlvq_s32, vaddlvq_u8, vaddlvq_u16, vaddlvq_u32, vaddlv_s32, vaddlv_u32): Use it. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index cf46b31627b84476a25762ffc708fd84a4086e43..a4b21e1495c5699d8557a4bcb9e73ef98ae60b35 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -190,9 +190,9 @@ BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE) BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE) - /* Implemented by aarch64_addlv. */ - BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE) - BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE) + /* Implemented by reduc_plus_widen_scal_. */ + BUILTIN_VDQV_L (UNOP, reduc_splus_widen_scal_, 10, NONE) + BUILTIN_VDQV_L (UNOPU, reduc_uplus_widen_scal_, 10, NONE) /* Implemented by aarch64_abd. */ BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cf8c094bd4b76981cef2dd5dd7b8e6be0d56101f..25aed74f8cf939562ed65a578fe32ca76605b58a 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3455,7 +3455,7 @@ (define_expand "reduc_plus_scal_v4sf" DONE; }) -(define_insn "aarch64_addlv" +(define_insn "reduc_plus_widen_scal_" [(set (match_operand: 0 "register_operand" "=w") (unspec: [(match_operand:VDQV_L 1 "register_operand" "w")] USADDLV))] diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index cf6af728ca99dae1cb6ab647466cfec32f7e913e..7b2c4c016191bcd6c3e075d27810faedb23854b7 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -3664,70 +3664,70 @@ __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s8 (int8x8_t __a) { - return __builtin_aarch64_saddlvv8qi (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v8qi (__a); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s16 (int16x4_t __a) { - return __builtin_aarch64_saddlvv4hi (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v4hi (__a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u8 (uint8x8_t __a) { - return __builtin_aarch64_uaddlvv8qi_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v8qi_uu (__a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u16 (uint16x4_t __a) { - return __builtin_aarch64_uaddlvv4hi_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v4hi_uu (__a); } __extension__ extern __inline int16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s8 (int8x16_t __a) { - return __builtin_aarch64_saddlvv16qi (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v16qi (__a); } __extension__ extern __inline int32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s16 (int16x8_t __a) { - return __builtin_aarch64_saddlvv8hi (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v8hi (__a); } __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_s32 (int32x4_t __a) { - return __builtin_aarch64_saddlvv4si (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v4si (__a); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u8 (uint8x16_t __a) { - return __builtin_aarch64_uaddlvv16qi_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v16qi_uu (__a); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u16 (uint16x8_t __a) { - return __builtin_aarch64_uaddlvv8hi_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v8hi_uu (__a); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlvq_u32 (uint32x4_t __a) { - return __builtin_aarch64_uaddlvv4si_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v4si_uu (__a); } __extension__ extern __inline float32x2_t @@ -6461,14 +6461,14 @@ __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_s32 (int32x2_t __a) { - return __builtin_aarch64_saddlvv2si (__a); + return __builtin_aarch64_reduc_splus_widen_scal_v2si (__a); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vaddlv_u32 (uint32x2_t __a) { - return __builtin_aarch64_uaddlvv2si_uu (__a); + return __builtin_aarch64_reduc_uplus_widen_scal_v2si_uu (__a); } __extension__ extern __inline int16x4_t --