diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 624839ef58b3e4b49cb70dfc3dfbca141941eb7f..7afa3396a6d3e46165ca634ecc60ec42fad78a6e 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -241,6 +241,12 @@ typedef struct { #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ VAR9 (T, N, A, B, C, D, E, F, G, H, I) \ VAR1 (T, N, J) +#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, K) +#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, L) /* The NEON builtin data can be found in arm_neon_builtins.def. The mode entries in the following table correspond to the "key" type of the diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index b1c9cc76a4cc3480cd23ec254390f492721c4d04..66622dfcfe2d6f3d575db98a1420f6a58e13baee 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -166,6 +166,20 @@ typedef struct uint64x2x2_t uint64x2_t val[2]; } uint64x2x2_t; +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x4x2_t +{ + float16x4_t val[2]; +} float16x4x2_t; +#endif + +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x8x2_t +{ + float16x8_t val[2]; +} float16x8x2_t; +#endif + typedef struct float32x2x2_t { float32x2_t val[2]; @@ -292,6 +306,20 @@ typedef struct uint64x2x3_t uint64x2_t val[3]; } uint64x2x3_t; +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x4x3_t +{ + float16x4_t val[3]; +} float16x4x3_t; +#endif + +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x8x3_t +{ + float16x8_t val[3]; +} float16x8x3_t; +#endif + typedef struct float32x2x3_t { float32x2_t val[3]; @@ -418,6 +446,20 @@ typedef struct uint64x2x4_t uint64x2_t val[4]; } uint64x2x4_t; +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x4x4_t +{ + float16x4_t val[4]; +} float16x4x4_t; +#endif + +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +typedef struct float16x8x4_t +{ + float16x8_t val[4]; +} float16x8x4_t; +#endif + typedef struct float32x2x4_t { float32x2_t val[4]; @@ -6045,6 +6087,14 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b) return (int64x2_t)__builtin_neon_vcombinedi (__a, __b); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vcombine_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcombinev4hf (__a, __b); +} +#endif + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vcombine_f32 (float32x2_t __a, float32x2_t __b) { @@ -6119,6 +6169,14 @@ vget_high_s64 (int64x2_t __a) return (int64x1_t)__builtin_neon_vget_highv2di (__a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vget_high_f16 (float16x8_t __a) +{ + return __builtin_neon_vget_highv8hf (__a); +} +#endif + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vget_high_f32 (float32x4_t __a) { @@ -6179,6 +6237,14 @@ vget_low_s32 (int32x4_t __a) return (int32x2_t)__builtin_neon_vget_lowv4si (__a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vget_low_f16 (float16x8_t __a) +{ + return __builtin_neon_vget_lowv8hf (__a); +} +#endif + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vget_low_f32 (float32x4_t __a) { @@ -8730,6 +8796,14 @@ vld1_s64 (const int64_t * __a) return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vld1_f16 (const float16_t * __a) +{ + return __builtin_neon_vld1v4hf (__a); +} +#endif + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_f32 (const float32_t * __a) { @@ -8804,6 +8878,14 @@ vld1q_s64 (const int64_t * __a) return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vld1q_f16 (const float16_t * __a) +{ + return __builtin_neon_vld1v8hf (__a); +} +#endif + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_f32 (const float32_t * __a) { @@ -9208,6 +9290,14 @@ vst1_s64 (int64_t * __a, int64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f16 (float16_t * __a, float16x4_t __b) +{ + __builtin_neon_vst1v4hf (__a, __b); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_f32 (float32_t * __a, float32x2_t __b) { @@ -9282,6 +9372,14 @@ vst1q_s64 (int64_t * __a, int64x2_t __b) __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f16 (float16_t * __a, float16x8_t __b) +{ + __builtin_neon_vst1v8hf (__a, __b); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_f32 (float32_t * __a, float32x4_t __b) { @@ -9342,6 +9440,14 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hf (__a, __b, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) { @@ -9416,6 +9522,14 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hf (__a, __b, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) { @@ -9496,6 +9610,16 @@ vld2_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vld2_f16 (const float16_t * __a) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2v4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_f32 (const float32_t * __a) { @@ -9594,6 +9718,16 @@ vld2q_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vld2q_f16 (const float16_t * __a) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2v8hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vld2q_f32 (const float32_t * __a) { @@ -9669,6 +9803,17 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev4hf ( __a, __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) { @@ -9741,6 +9886,17 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) +vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld2_lanev8hf (__a, __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) { @@ -9801,6 +9957,16 @@ vld2_dup_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) +vld2_dup_f16 (const float16_t * __a) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld2_dupv4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vld2_dup_f32 (const float32_t * __a) { @@ -9896,6 +10062,15 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b) __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_f16 (float16_t * __a, float16x4x2_t __b) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2v4hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_f32 (float32_t * __a, float32x2x2_t __b) { @@ -9982,6 +10157,15 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b) __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f16 (float16_t * __a, float16x8x2_t __b) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2v8hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_f32 (float32_t * __a, float32x4x2_t __b) { @@ -10045,6 +10229,15 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c) __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst2_lanev4hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) { @@ -10101,6 +10294,15 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c) __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst2_lanev8hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) { @@ -10153,6 +10355,16 @@ vld3_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) +vld3_f16 (const float16_t * __a) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3v4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_f32 (const float32_t * __a) { @@ -10251,6 +10463,16 @@ vld3q_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) +vld3q_f16 (const float16_t * __a) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3v8hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) vld3q_f32 (const float32_t * __a) { @@ -10326,6 +10548,17 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) +vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev4hf (__a, __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) { @@ -10398,6 +10631,17 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) +vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld3_lanev8hf (__a, __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) { @@ -10458,6 +10702,16 @@ vld3_dup_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) +vld3_dup_f16 (const float16_t * __a) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld3_dupv4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) vld3_dup_f32 (const float32_t * __a) { @@ -10553,6 +10807,15 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b) __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_f16 (float16_t * __a, float16x4x3_t __b) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3v4hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_f32 (float32_t * __a, float32x2x3_t __b) { @@ -10639,6 +10902,15 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b) __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f16 (float16_t * __a, float16x8x3_t __b) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3v8hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_f32 (float32_t * __a, float32x4x3_t __b) { @@ -10702,6 +10974,15 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c) __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst3_lanev4hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) { @@ -10758,6 +11039,15 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c) __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst3_lanev8hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) { @@ -10810,6 +11100,16 @@ vld4_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) +vld4_f16 (const float16_t * __a) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4v4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_f32 (const float32_t * __a) { @@ -10908,6 +11208,16 @@ vld4q_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) +vld4q_f16 (const float16_t * __a) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4v8hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) vld4q_f32 (const float32_t * __a) { @@ -10983,6 +11293,18 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) +vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev4hf (__a, + __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) { @@ -11055,6 +11377,18 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) +vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld4_lanev8hf (__a, + __bu.__o, __c); + return __rv.__i; +} +#endif + __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) { @@ -11115,6 +11449,16 @@ vld4_dup_s32 (const int32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) +vld4_dup_f16 (const float16_t * __a) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld4_dupv4hf (__a); + return __rv.__i; +} +#endif + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) vld4_dup_f32 (const float32_t * __a) { @@ -11210,6 +11554,15 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b) __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_f16 (float16_t * __a, float16x4x4_t __b) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4v4hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_f32 (float32_t * __a, float32x2x4_t __b) { @@ -11296,6 +11649,15 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b) __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f16 (float16_t * __a, float16x8x4_t __b) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4v8hf (__a, __bu.__o); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_f32 (float32_t * __a, float32x4x4_t __b) { @@ -11359,6 +11721,15 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c) __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev4hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) { @@ -11415,6 +11786,15 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c) __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst4_lanev8hf (__a, __bu.__o, __c); +} +#endif + __extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index f150b98b8096e94c6b39bbe477e5052b15f0313f..0b719df760747af7642bd14ab14a9b2144d43359 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR10 (GETLANE, vdup_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) -VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di) -VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di) +VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR3 (UNOP, vmovn, v8hi, v4si, v2di) VAR3 (UNOP, vqmovns, v8hi, v4si, v2di) VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) @@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti) VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti) VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti) VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti) -VAR10 (LOAD1, vld1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR12 (LOAD1, vld1, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR10 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR10 (LOAD1, vld1_dup, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR10 (STORE1, vst1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR10 (STORE1LANE, vst1_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR9 (LOAD1, vld2, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst2, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR9 (LOAD1, vld3, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst3, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR9 (LOAD1, vld4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR12 (STORE1, vst1, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR12 (STORE1LANE, vst1_lane, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR11 (LOAD1, vld2, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld2_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst2, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst2_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR11 (LOAD1, vld3, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld3_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst3, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst3_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR11 (LOAD1, vld4, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld4_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst4, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst4_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 1e7f3f17a8aff29baca783e839c9c844d23e21b3..47cc1eebecdbe4469a3e3666a39ae2a0d6272b9f 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -65,20 +65,32 @@ ;; Integer modes supported by Neon and IWMMXT, except V2DI (define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI]) -;; Double-width vector modes. +;; Double-width vector modes, on which we support arithmetic (no HF!) (define_mode_iterator VD [V8QI V4HI V2SI V2SF]) +;; Double-width vector modes plus 64-bit elements for vreinterpret + vcreate. +(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI]) + ;; Double-width vector modes plus 64-bit elements. -(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI]) +(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI]) + +;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane. +(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF]) ;; Double-width vector modes without floating-point elements. (define_mode_iterator VDI [V8QI V4HI V2SI]) -;; Quad-width vector modes. +;; Quad-width vector modes supporting arithmetic (no HF!). (define_mode_iterator VQ [V16QI V8HI V4SI V4SF]) +;; Quad-width vector modes, including V8HF. +(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF]) + +;; Quad-width vector modes with 16- or 32-bit elements +(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF]) + ;; Quad-width vector modes plus 64-bit elements. -(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI]) +(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI]) ;; Quad-width vector modes without floating-point elements. (define_mode_iterator VQI [V16QI V8HI V4SI]) @@ -111,7 +123,8 @@ (define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) ;; Vector modes, including 64-bit integer elements. -(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI]) +(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI + V4HF V8HF V2SF V4SF DI V2DI]) ;; Vector modes including 64-bit integer elements, but no floats. (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) @@ -366,7 +379,8 @@ ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") - (V4HI "HI") (V8HI "HI") + (V4HI "HI") (V8HI "HI") + (V4HF "HF") (V8HF "HF") (V2SI "SI") (V4SI "SI") (V2SF "SF") (V4SF "SF") (DI "DI") (V2DI "DI")]) @@ -383,6 +397,7 @@ ;; size for structure lane/dup loads and stores. (define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") (V4HI "SI") (V8HI "SI") + (V4HF "SF") (V8HF "SF") (V2SI "V2SI") (V4SI "V2SI") (V2SF "V2SF") (V4SF "V2SF") (DI "V2DI") (V2DI "V2DI")]) @@ -390,6 +405,7 @@ ;; Similar, for three elements. (define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") (V4HI "BLK") (V8HI "BLK") + (V4HF "BLK") (V8HF "BLK") (V2SI "BLK") (V4SI "BLK") (V2SF "BLK") (V4SF "BLK") (DI "EI") (V2DI "EI")]) @@ -397,6 +413,7 @@ ;; Similar, for four elements. (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") (V4HI "V4HI") (V8HI "V4HI") + (V4HF "V4HF") (V8HF "V4HF") (V2SI "V4SI") (V4SI "V4SI") (V2SF "V4SF") (V4SF "V4SF") (DI "OI") (V2DI "OI")]) @@ -421,7 +438,8 @@ ;; Modes with half the number of equal-sized elements. (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") - (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") + (V8HF "V4HF") (V4SI "V2SI") + (V4SF "V2SF") (V2DF "DF") (V2DI "DI")]) ;; Same, but lower-case. @@ -431,8 +449,9 @@ ;; Modes with twice the number of equal-sized elements. (define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") - (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF") - (DI "V2DI")]) + (V2SI "V4SI") (V4HF "V8HF") + (V2SF "V4SF") (DF "V2DF") + (DI "V2DI")]) ;; Same, but lower-case. (define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi") @@ -454,8 +473,9 @@ ;; Mode of result of comparison operations (and bit-select operand 1). (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") - (V4HI "V4HI") (V8HI "V8HI") + (V4HI "V4HI") (V8HI "V8HI") (V2SI "V2SI") (V4SI "V4SI") + (V4HF "V4HI") (V8HF "V8HI") (V2SF "V2SI") (V4SF "V4SI") (DI "DI") (V2DI "V2DI")]) @@ -492,12 +512,14 @@ (define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8") (V4HI "u16") (V8HI "u16") (V2SI "32") (V4SI "32") + (V4HF "u16") (V8HF "u16") (V2SF "32") (V4SF "32")]) (define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") (V4HI "16") (V8HI "16") (V2SI "32") (V4SI "32") (DI "64") (V2DI "64") + (V4HF "16") (V8HF "16") (V2SF "32") (V4SF "32")]) (define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") @@ -564,6 +586,7 @@ (DI "true") (V2DI "false")]) (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") + (V4HF "4") (V8HF "8") (V4HI "4") (V8HI "8") (V2SI "2") (V4SI "4") (V2SF "2") (V4SF "4") @@ -607,6 +630,7 @@ (define_mode_attr q [(V8QI "") (V16QI "_q") (V4HI "") (V8HI "_q") (V2SI "") (V4SI "_q") + (V4HF "") (V8HF "_q") (V2SF "") (V4SF "_q") (DI "") (V2DI "_q") (DF "") (V2DF "_q")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 4af74ce96ede0f7dbbea1ba95f938b5facdf3570..f8d6e74653709a492d85b24e69597d478223c622 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -320,11 +320,11 @@ [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")]) (define_insn "vec_set_internal" - [(set (match_operand:VQ 0 "s_register_operand" "=w,w") - (vec_merge:VQ - (vec_duplicate:VQ + [(set (match_operand:VQ2 0 "s_register_operand" "=w,w") + (vec_merge:VQ2 + (vec_duplicate:VQ2 (match_operand: 1 "nonimmediate_operand" "Um,r")) - (match_operand:VQ 3 "s_register_operand" "0,0") + (match_operand:VQ2 3 "s_register_operand" "0,0") (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { @@ -407,7 +407,7 @@ (define_insn "vec_extract" [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VQ 1 "s_register_operand" "w,w") + (match_operand:VQ2 1 "s_register_operand" "w,w") (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { @@ -2607,7 +2607,7 @@ [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extend:SI (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") + (match_operand:VQ2 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { @@ -2634,7 +2634,7 @@ [(set (match_operand:SI 0 "s_register_operand" "=r") (zero_extend:SI (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") + (match_operand:VQ2 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { @@ -2774,7 +2774,7 @@ }) (define_expand "neon_vcreate" - [(match_operand:VDX 0 "s_register_operand" "") + [(match_operand:VD_RE 0 "s_register_operand" "") (match_operand:DI 1 "general_operand" "")] "TARGET_NEON" { @@ -4125,7 +4125,7 @@ (define_expand "neon_vreinterpretv8qi" [(match_operand:V8QI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4134,7 +4134,7 @@ (define_expand "neon_vreinterpretv4hi" [(match_operand:V4HI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4143,7 +4143,7 @@ (define_expand "neon_vreinterpretv2si" [(match_operand:V2SI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4152,7 +4152,7 @@ (define_expand "neon_vreinterpretv2sf" [(match_operand:V2SF 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4161,7 +4161,7 @@ (define_expand "neon_vreinterpretdi" [(match_operand:DI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4420,14 +4420,14 @@ (define_expand "vec_load_lanesoi" [(set (match_operand:OI 0 "s_register_operand") (unspec:OI [(match_operand:OI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON") (define_insn "neon_vld2" [(set (match_operand:OI 0 "s_register_operand" "=w") (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" "vld2.\t%h0, %A1" @@ -4438,7 +4438,7 @@ (unspec:TI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:TI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_LANE))] "TARGET_NEON" { @@ -4463,7 +4463,7 @@ (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_LANE))] "TARGET_NEON" { @@ -4534,14 +4534,14 @@ (define_expand "vec_store_lanesoi" [(set (match_operand:OI 0 "neon_struct_operand") (unspec:OI [(match_operand:OI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON") (define_insn "neon_vst2" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" "vst2.\t%h1, %A0" @@ -4553,7 +4553,7 @@ (unspec: [(match_operand:TI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2_LANE))] "TARGET_NEON" { @@ -4578,7 +4578,7 @@ (unspec: [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2_LANE))] "TARGET_NEON" { @@ -4631,7 +4631,7 @@ (define_expand "vec_load_lanesci" [(match_operand:CI 0 "s_register_operand") (match_operand:CI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vld3 (operands[0], operands[1])); @@ -4641,7 +4641,7 @@ (define_expand "neon_vld3" [(match_operand:CI 0 "s_register_operand") (match_operand:CI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4656,7 +4656,7 @@ (define_insn "neon_vld3qa" [(set (match_operand:CI 0 "s_register_operand" "=w") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3A))] "TARGET_NEON" { @@ -4676,7 +4676,7 @@ [(set (match_operand:CI 0 "s_register_operand" "=w") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (match_operand:CI 2 "s_register_operand" "0") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3B))] "TARGET_NEON" { @@ -4697,7 +4697,7 @@ (unspec:EI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:EI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_LANE))] "TARGET_NEON" { @@ -4724,7 +4724,7 @@ (unspec:CI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:CI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_LANE))] "TARGET_NEON" { @@ -4804,7 +4804,7 @@ (define_expand "vec_store_lanesci" [(match_operand:CI 0 "neon_struct_operand") (match_operand:CI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vst3 (operands[0], operands[1])); @@ -4814,7 +4814,7 @@ (define_expand "neon_vst3" [(match_operand:CI 0 "neon_struct_operand") (match_operand:CI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4829,7 +4829,7 @@ (define_insn "neon_vst3qa" [(set (match_operand:EI 0 "neon_struct_operand" "=Um") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3A))] "TARGET_NEON" { @@ -4848,7 +4848,7 @@ (define_insn "neon_vst3qb" [(set (match_operand:EI 0 "neon_struct_operand" "=Um") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3B))] "TARGET_NEON" { @@ -4869,7 +4869,7 @@ (unspec: [(match_operand:EI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3_LANE))] "TARGET_NEON" { @@ -4896,7 +4896,7 @@ (unspec: [(match_operand:CI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3_LANE))] "TARGET_NEON" { @@ -4951,7 +4951,7 @@ (define_expand "vec_load_lanesxi" [(match_operand:XI 0 "s_register_operand") (match_operand:XI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vld4 (operands[0], operands[1])); @@ -4961,7 +4961,7 @@ (define_expand "neon_vld4" [(match_operand:XI 0 "s_register_operand") (match_operand:XI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4976,7 +4976,7 @@ (define_insn "neon_vld4qa" [(set (match_operand:XI 0 "s_register_operand" "=w") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4A))] "TARGET_NEON" { @@ -4997,7 +4997,7 @@ [(set (match_operand:XI 0 "s_register_operand" "=w") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (match_operand:XI 2 "s_register_operand" "0") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4B))] "TARGET_NEON" { @@ -5019,7 +5019,7 @@ (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_LANE))] "TARGET_NEON" { @@ -5047,7 +5047,7 @@ (unspec:XI [(match_operand: 1 "neon_struct_operand" "Um") (match_operand:XI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_LANE))] "TARGET_NEON" { @@ -5132,7 +5132,7 @@ (define_expand "vec_store_lanesxi" [(match_operand:XI 0 "neon_struct_operand") (match_operand:XI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vst4 (operands[0], operands[1])); @@ -5142,7 +5142,7 @@ (define_expand "neon_vst4" [(match_operand:XI 0 "neon_struct_operand") (match_operand:XI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -5157,7 +5157,7 @@ (define_insn "neon_vst4qa" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4A))] "TARGET_NEON" { @@ -5177,7 +5177,7 @@ (define_insn "neon_vst4qb" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4B))] "TARGET_NEON" { @@ -5199,7 +5199,7 @@ (unspec: [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4_LANE))] "TARGET_NEON" { @@ -5227,7 +5227,7 @@ (unspec: [(match_operand:XI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4_LANE))] "TARGET_NEON" {