From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1652) id 3A8393858435; Mon, 20 Nov 2023 11:24:51 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3A8393858435 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1700479491; bh=lBXToaSURyvHCUwLU7TXcle3X1bpHlavhymuV7iZZSE=; h=From:To:Subject:Date:From; b=eJKLUv7CMPktvW09RmEOMYIk6meXTRNPaZv3So68TUpnaOA0iuStfcxNgWwwLYTw+ EwTBpCdR/PvSDC2nqz4VMJ56OoWfla4qh0xXrwRBHKfqPBeTitx17dZbeUE6u354TV gykrh5Pq/m5XSkK7ECcO6d6mPvVyNAbbW6aXqsH0= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Christophe Lyon To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-5622] arm: [MVE intrinsics] rework vldq1 vst1q X-Act-Checkin: gcc X-Git-Author: Christophe Lyon X-Git-Refname: refs/heads/master X-Git-Oldrev: 3282fecd823a19cd682bb5013503a8db08250071 X-Git-Newrev: 4d7647edfd7d985fbefe13de03c8bc2e3a74fc61 Message-Id: <20231120112451.3A8393858435@sourceware.org> Date: Mon, 20 Nov 2023 11:24:51 +0000 (GMT) List-Id: https://gcc.gnu.org/g:4d7647edfd7d985fbefe13de03c8bc2e3a74fc61 commit r14-5622-g4d7647edfd7d985fbefe13de03c8bc2e3a74fc61 Author: Christophe Lyon Date: Wed Nov 15 07:58:00 2023 +0000 arm: [MVE intrinsics] rework vldq1 vst1q Implement vld1q, vst1q using the new MVE builtins framework. 2023-11-16 Christophe Lyon gcc/ * config/arm/arm-mve-builtins-base.cc (vld1_impl, vld1q) (vst1_impl, vst1q): New. * config/arm/arm-mve-builtins-base.def (vld1q, vst1q): New. * config/arm/arm-mve-builtins-base.h (vld1q, vst1q): New. * config/arm/arm_mve.h (vld1q): Delete. (vst1q): Delete. (vld1q_s8): Delete. (vld1q_s32): Delete. (vld1q_s16): Delete. (vld1q_u8): Delete. (vld1q_u32): Delete. (vld1q_u16): Delete. (vld1q_f32): Delete. (vld1q_f16): Delete. (vst1q_f32): Delete. (vst1q_f16): Delete. (vst1q_s8): Delete. (vst1q_s32): Delete. (vst1q_s16): Delete. (vst1q_u8): Delete. (vst1q_u32): Delete. (vst1q_u16): Delete. (__arm_vld1q_s8): Delete. (__arm_vld1q_s32): Delete. (__arm_vld1q_s16): Delete. (__arm_vld1q_u8): Delete. (__arm_vld1q_u32): Delete. (__arm_vld1q_u16): Delete. (__arm_vst1q_s8): Delete. (__arm_vst1q_s32): Delete. (__arm_vst1q_s16): Delete. (__arm_vst1q_u8): Delete. (__arm_vst1q_u32): Delete. (__arm_vst1q_u16): Delete. (__arm_vld1q_f32): Delete. (__arm_vld1q_f16): Delete. (__arm_vst1q_f32): Delete. (__arm_vst1q_f16): Delete. (__arm_vld1q): Delete. (__arm_vst1q): Delete. * config/arm/mve.md (mve_vld1q_f): Rename into ... (@mve_vld1q_f): ... this. (mve_vld1q_): Rename into ... (@mve_vld1q_) ... this. (mve_vst1q_f): Rename into ... (@mve_vst1q_f): ... this. (mve_vst1q_): Rename into ... (@mve_vst1q_) ... this. Diff: --- gcc/config/arm/arm-mve-builtins-base.cc | 58 +++++++ gcc/config/arm/arm-mve-builtins-base.def | 4 + gcc/config/arm/arm-mve-builtins-base.h | 4 +- gcc/config/arm/arm_mve.h | 282 ------------------------------- gcc/config/arm/mve.md | 8 +- 5 files changed, 69 insertions(+), 287 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 5478cac8aeb..cfe1b954a29 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -83,6 +83,62 @@ class vuninitializedq_impl : public quiet } }; +class vld1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_READ_MEMORY; + } + + rtx + expand (function_expander &e) const override + { + insn_code icode; + if (e.type_suffix (0).float_p) + icode = code_for_mve_vld1q_f(e.vector_mode (0)); + else + { + if (e.type_suffix (0).unsigned_p) + icode = code_for_mve_vld1q(VLD1Q_U, + e.vector_mode (0)); + else + icode = code_for_mve_vld1q(VLD1Q_S, + e.vector_mode (0)); + } + return e.use_contiguous_load_insn (icode); + } +}; + +class vst1_impl : public full_width_access +{ +public: + unsigned int + call_properties (const function_instance &) const override + { + return CP_WRITE_MEMORY; + } + + rtx + expand (function_expander &e) const override + { + insn_code icode; + if (e.type_suffix (0).float_p) + icode = code_for_mve_vst1q_f(e.vector_mode (0)); + else + { + if (e.type_suffix (0).unsigned_p) + icode = code_for_mve_vst1q(VST1Q_U, + e.vector_mode (0)); + else + icode = code_for_mve_vst1q(VST1Q_S, + e.vector_mode (0)); + } + return e.use_contiguous_store_insn (icode); + } +}; + } /* end anonymous namespace */ namespace arm_mve { @@ -290,6 +346,7 @@ FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_ FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -1, -1, -1, -1, VFMSQ_M_F, -1, -1, -1)) FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ) FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ) +FUNCTION (vld1q, vld1_impl,) FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ) FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ) FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ) @@ -405,6 +462,7 @@ FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ) FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ) FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ) FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ) +FUNCTION (vst1q, vst1_impl,) FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ) FUNCTION (vuninitializedq, vuninitializedq_impl,) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index 01dfbdef8a3..16879246237 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -47,6 +47,7 @@ DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none) DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none) DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none) DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none) +DEF_MVE_FUNCTION (vld1q, load, all_integer, none) DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none) DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none) DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none) @@ -150,6 +151,7 @@ DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none) DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none) DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none) DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none) +DEF_MVE_FUNCTION (vst1q, store, all_integer, none) DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none) DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none) #undef REQUIRES_FLOAT @@ -182,6 +184,7 @@ DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none) DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none) DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none) +DEF_MVE_FUNCTION (vld1q, load, all_float, none) DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none) DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none) DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none) @@ -203,6 +206,7 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vst1q, store, all_float, none) DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none) DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none) #undef REQUIRES_FLOAT diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index c574c32ac53..8c7e5fe5c3e 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -63,6 +63,7 @@ extern const function_base *const vhaddq; extern const function_base *const vhcaddq_rot270; extern const function_base *const vhcaddq_rot90; extern const function_base *const vhsubq; +extern const function_base *const vld1q; extern const function_base *const vmaxaq; extern const function_base *const vmaxavq; extern const function_base *const vmaxnmaq; @@ -103,8 +104,8 @@ extern const function_base *const vmovnbq; extern const function_base *const vmovntq; extern const function_base *const vmulhq; extern const function_base *const vmullbq_int; -extern const function_base *const vmulltq_int; extern const function_base *const vmullbq_poly; +extern const function_base *const vmulltq_int; extern const function_base *const vmulltq_poly; extern const function_base *const vmulq; extern const function_base *const vmvnq; @@ -178,6 +179,7 @@ extern const function_base *const vshrntq; extern const function_base *const vshrq; extern const function_base *const vsliq; extern const function_base *const vsriq; +extern const function_base *const vst1q; extern const function_base *const vsubq; extern const function_base *const vuninitializedq; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index b82d94e59bd..cc027f9cbb5 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -56,7 +56,6 @@ #define vstrbq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p(__base, __offset, __value, __p) #define vstrwq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p) #define vldrbq_gather_offset_z(__base, __offset, __p) __arm_vldrbq_gather_offset_z(__base, __offset, __p) -#define vld1q(__base) __arm_vld1q(__base) #define vldrhq_gather_offset(__base, __offset) __arm_vldrhq_gather_offset(__base, __offset) #define vldrhq_gather_offset_z(__base, __offset, __p) __arm_vldrhq_gather_offset_z(__base, __offset, __p) #define vldrhq_gather_shifted_offset(__base, __offset) __arm_vldrhq_gather_shifted_offset(__base, __offset) @@ -69,7 +68,6 @@ #define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p) #define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset) #define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p) -#define vst1q(__addr, __value) __arm_vst1q(__addr, __value) #define vstrhq_scatter_offset(__base, __offset, __value) __arm_vstrhq_scatter_offset(__base, __offset, __value) #define vstrhq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p(__base, __offset, __value, __p) #define vstrhq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrhq_scatter_shifted_offset(__base, __offset, __value) @@ -346,12 +344,6 @@ #define vldrbq_z_u32(__base, __p) __arm_vldrbq_z_u32(__base, __p) #define vldrwq_gather_base_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_u32(__addr, __offset, __p) #define vldrwq_gather_base_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_z_s32(__addr, __offset, __p) -#define vld1q_s8(__base) __arm_vld1q_s8(__base) -#define vld1q_s32(__base) __arm_vld1q_s32(__base) -#define vld1q_s16(__base) __arm_vld1q_s16(__base) -#define vld1q_u8(__base) __arm_vld1q_u8(__base) -#define vld1q_u32(__base) __arm_vld1q_u32(__base) -#define vld1q_u16(__base) __arm_vld1q_u16(__base) #define vldrhq_gather_offset_s32(__base, __offset) __arm_vldrhq_gather_offset_s32(__base, __offset) #define vldrhq_gather_offset_s16(__base, __offset) __arm_vldrhq_gather_offset_s16(__base, __offset) #define vldrhq_gather_offset_u32(__base, __offset) __arm_vldrhq_gather_offset_u32(__base, __offset) @@ -380,8 +372,6 @@ #define vldrwq_u32(__base) __arm_vldrwq_u32(__base) #define vldrwq_z_s32(__base, __p) __arm_vldrwq_z_s32(__base, __p) #define vldrwq_z_u32(__base, __p) __arm_vldrwq_z_u32(__base, __p) -#define vld1q_f32(__base) __arm_vld1q_f32(__base) -#define vld1q_f16(__base) __arm_vld1q_f16(__base) #define vldrhq_f16(__base) __arm_vldrhq_f16(__base) #define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p) #define vldrwq_f32(__base) __arm_vldrwq_f32(__base) @@ -416,14 +406,6 @@ #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) -#define vst1q_f32(__addr, __value) __arm_vst1q_f32(__addr, __value) -#define vst1q_f16(__addr, __value) __arm_vst1q_f16(__addr, __value) -#define vst1q_s8(__addr, __value) __arm_vst1q_s8(__addr, __value) -#define vst1q_s32(__addr, __value) __arm_vst1q_s32(__addr, __value) -#define vst1q_s16(__addr, __value) __arm_vst1q_s16(__addr, __value) -#define vst1q_u8(__addr, __value) __arm_vst1q_u8(__addr, __value) -#define vst1q_u32(__addr, __value) __arm_vst1q_u32(__addr, __value) -#define vst1q_u16(__addr, __value) __arm_vst1q_u16(__addr, __value) #define vstrhq_f16(__addr, __value) __arm_vstrhq_f16(__addr, __value) #define vstrhq_scatter_offset_s32( __base, __offset, __value) __arm_vstrhq_scatter_offset_s32( __base, __offset, __value) #define vstrhq_scatter_offset_s16( __base, __offset, __value) __arm_vstrhq_scatter_offset_s16( __base, __offset, __value) @@ -1537,48 +1519,6 @@ __arm_vldrwq_gather_base_z_u32 (uint32x4_t __addr, const int __offset, mve_pred1 return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_s8 (int8_t const * __base) -{ - return __builtin_mve_vld1q_sv16qi ((__builtin_neon_qi *) __base); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_s32 (int32_t const * __base) -{ - return __builtin_mve_vld1q_sv4si ((__builtin_neon_si *) __base); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_s16 (int16_t const * __base) -{ - return __builtin_mve_vld1q_sv8hi ((__builtin_neon_hi *) __base); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_u8 (uint8_t const * __base) -{ - return __builtin_mve_vld1q_uv16qi ((__builtin_neon_qi *) __base); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_u32 (uint32_t const * __base) -{ - return __builtin_mve_vld1q_uv4si ((__builtin_neon_si *) __base); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_u16 (uint16_t const * __base) -{ - return __builtin_mve_vld1q_uv8hi ((__builtin_neon_hi *) __base); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t __offset) @@ -1917,48 +1857,6 @@ __arm_vldrwq_gather_shifted_offset_z_u32 (uint32_t const * __base, uint32x4_t __ return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_s8 (int8_t * __addr, int8x16_t __value) -{ - __builtin_mve_vst1q_sv16qi ((__builtin_neon_qi *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_s32 (int32_t * __addr, int32x4_t __value) -{ - __builtin_mve_vst1q_sv4si ((__builtin_neon_si *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_s16 (int16_t * __addr, int16x8_t __value) -{ - __builtin_mve_vst1q_sv8hi ((__builtin_neon_hi *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_u8 (uint8_t * __addr, uint8x16_t __value) -{ - __builtin_mve_vst1q_uv16qi ((__builtin_neon_qi *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_u32 (uint32_t * __addr, uint32x4_t __value) -{ - __builtin_mve_vst1q_uv4si ((__builtin_neon_si *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_u16 (uint16_t * __addr, uint16x8_t __value) -{ - __builtin_mve_vst1q_uv8hi ((__builtin_neon_hi *) __addr, __value); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrhq_scatter_offset_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value) @@ -4421,20 +4319,6 @@ __arm_vornq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_f32 (float32_t const * __base) -{ - return __builtin_mve_vld1q_fv4sf((__builtin_neon_si *) __base); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q_f16 (float16_t const * __base) -{ - return __builtin_mve_vld1q_fv8hf((__builtin_neon_hi *) __base); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_f32 (float32_t const * __base) @@ -4547,20 +4431,6 @@ __arm_vstrwq_f32 (float32_t * __addr, float32x4_t __value) __builtin_mve_vstrwq_fv4sf ((__builtin_neon_si *) __addr, __value); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_f32 (float32_t * __addr, float32x4_t __value) -{ - __builtin_mve_vst1q_fv4sf ((__builtin_neon_si *) __addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q_f16 (float16_t * __addr, float16x8_t __value) -{ - __builtin_mve_vst1q_fv8hf ((__builtin_neon_hi *) __addr, __value); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrhq_f16 (float16_t * __addr, float16x8_t __value) @@ -5651,48 +5521,6 @@ __arm_vldrbq_gather_offset_z (uint8_t const * __base, uint16x8_t __offset, mve_p return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p); } -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (int8_t const * __base) -{ - return __arm_vld1q_s8 (__base); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (int32_t const * __base) -{ - return __arm_vld1q_s32 (__base); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (int16_t const * __base) -{ - return __arm_vld1q_s16 (__base); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (uint8_t const * __base) -{ - return __arm_vld1q_u8 (__base); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (uint32_t const * __base) -{ - return __arm_vld1q_u32 (__base); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (uint16_t const * __base) -{ - return __arm_vld1q_u16 (__base); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset) @@ -5917,48 +5745,6 @@ __arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offs return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (int8_t * __addr, int8x16_t __value) -{ - __arm_vst1q_s8 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (int32_t * __addr, int32x4_t __value) -{ - __arm_vst1q_s32 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (int16_t * __addr, int16x8_t __value) -{ - __arm_vst1q_s16 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (uint8_t * __addr, uint8x16_t __value) -{ - __arm_vst1q_u8 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (uint32_t * __addr, uint32x4_t __value) -{ - __arm_vst1q_u32 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (uint16_t * __addr, uint16x8_t __value) -{ - __arm_vst1q_u16 (__addr, __value); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrhq_scatter_offset (int16_t * __base, uint32x4_t __offset, int32x4_t __value) @@ -7809,20 +7595,6 @@ __arm_vornq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pre return __arm_vornq_m_f16 (__inactive, __a, __b, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (float32_t const * __base) -{ - return __arm_vld1q_f32 (__base); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vld1q (float16_t const * __base) -{ - return __arm_vld1q_f16 (__base); -} - __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset) @@ -7893,20 +7665,6 @@ __arm_vstrwq (float32_t * __addr, float32x4_t __value) __arm_vstrwq_f32 (__addr, __value); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (float32_t * __addr, float32x4_t __value) -{ - __arm_vst1q_f32 (__addr, __value); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vst1q (float16_t * __addr, float16x8_t __value) -{ - __arm_vst1q_f16 (__addr, __value); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vstrhq (float16_t * __addr, float16x8_t __value) @@ -8670,17 +8428,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) -#define __arm_vld1q(p0) (\ - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)), \ - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \ - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *)))) - #define __arm_vld1q_z(p0,p1) ( \ _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \ @@ -8792,17 +8539,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]: __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x2_t)), \ int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]: __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x2_t)));}) -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: __arm_vst1q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vst1q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4_t)));}) - #define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t)), \ @@ -9149,15 +8885,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));}) -#define __arm_vld1q(p0) (\ - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \ - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \ - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \ - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \ - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \ - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)))) - #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ @@ -9206,15 +8933,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \ int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));}) -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t)));}) - #define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \ int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \ diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 366cec0812a..b0d3443da9c 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3690,7 +3690,7 @@ } [(set_attr "length" "8")]) -(define_expand "mve_vld1q_f" +(define_expand "@mve_vld1q_f" [(match_operand:MVE_0 0 "s_register_operand") (unspec:MVE_0 [(match_operand: 1 "mve_memory_operand")] VLD1Q_F) ] @@ -3700,7 +3700,7 @@ DONE; }) -(define_expand "mve_vld1q_" +(define_expand "@mve_vld1q_" [(match_operand:MVE_2 0 "s_register_operand") (unspec:MVE_2 [(match_operand:MVE_2 1 "mve_memory_operand")] VLD1Q) ] @@ -4408,7 +4408,7 @@ } [(set_attr "length" "4")]) -(define_expand "mve_vst1q_f" +(define_expand "@mve_vst1q_f" [(match_operand: 0 "mve_memory_operand") (unspec: [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F) ] @@ -4418,7 +4418,7 @@ DONE; }) -(define_expand "mve_vst1q_" +(define_expand "@mve_vst1q_" [(match_operand:MVE_2 0 "mve_memory_operand") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q) ]