From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 99257 invoked by alias); 7 Jul 2015 12:35:37 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 99106 invoked by uid 89); 7 Jul 2015 12:35:36 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.8 required=5.0 tests=AWL,BAYES_20,SPF_PASS autolearn=ham version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (146.101.78.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 07 Jul 2015 12:35:13 +0000 Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by eu-smtp-1.mimecast.com with ESMTP id uk-mta-31-pqvVUAMoRQikeRqBi9iiBg-1 Received: from [10.2.207.65] ([10.1.2.79]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Tue, 7 Jul 2015 13:35:07 +0100 Message-ID: <559BC77C.7030100@arm.com> Date: Tue, 07 Jul 2015 12:35:00 -0000 From: Alan Lawrence User-Agent: Thunderbird 2.0.0.24 (X11/20101213) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" Subject: [PATCH 6/16][ARM] Remaining float16 intrinsics: vld..., vst..., vget_low/high, vcombine In-Reply-To: <559BC6EC.3000907@arm.com> X-MC-Unique: pqvVUAMoRQikeRqBi9iiBg-1 Content-Type: multipart/mixed; boundary="------------090106040408080804070807" X-IsSubscribed: yes X-SW-Source: 2015-07/txt/msg00479.txt.bz2 This is a multi-part message in MIME format. --------------090106040408080804070807 Content-Type: text/plain; charset=WINDOWS-1252; format=flowed Content-Transfer-Encoding: quoted-printable Content-length: 64 As per https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01341.html --------------090106040408080804070807 Content-Type: text/x-patch; name=06_arm_iterators.patch Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="06_arm_iterators.patch" Content-length: 47392 commit ae6264b144d25fadcbf219e68ddf3d8c5f40be34 Author: Alan Lawrence Date: Thu Dec 11 11:53:59 2014 +0000 ARM 4/4 v2: v(ld|st)[234](q?|_lane|_dup), vcombine, vget_(low|high) (v2= w/ V_uf_sclr) =20=20=20=20 All are tied together with so many iterators! =20=20=20=20 Also vec_extract diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 17e39d8..1ee0a3d 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -241,6 +241,12 @@ typedef struct { #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ VAR9 (T, N, A, B, C, D, E, F, G, H, I) \ VAR1 (T, N, J) +#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, K) +#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, L) =20 /* The NEON builtin data can be found in arm_neon_builtins.def. The mode entries in the following table correspond to the "key" type of= the diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index db73c70..93fb44f 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -162,6 +162,16 @@ typedef struct uint64x2x2_t uint64x2_t val[2]; } uint64x2x2_t; =20 +typedef struct float16x4x2_t +{ + float16x4_t val[2]; +} float16x4x2_t; + +typedef struct float16x8x2_t +{ + float16x8_t val[2]; +} float16x8x2_t; + typedef struct float32x2x2_t { float32x2_t val[2]; @@ -288,6 +298,16 @@ typedef struct uint64x2x3_t uint64x2_t val[3]; } uint64x2x3_t; =20 +typedef struct float16x4x3_t +{ + float16x4_t val[3]; +} float16x4x3_t; + +typedef struct float16x8x3_t +{ + float16x8_t val[3]; +} float16x8x3_t; + typedef struct float32x2x3_t { float32x2_t val[3]; @@ -414,6 +434,16 @@ typedef struct uint64x2x4_t uint64x2_t val[4]; } uint64x2x4_t; =20 +typedef struct float16x4x4_t +{ + float16x4_t val[4]; +} float16x4x4_t; + +typedef struct float16x8x4_t +{ + float16x8_t val[4]; +} float16x8x4_t; + typedef struct float32x2x4_t { float32x2_t val[4]; @@ -6031,6 +6061,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b) return (int64x2_t)__builtin_neon_vcombinedi (__a, __b); } =20 +__extension__ static __inline float16x8_t __attribute__ ((__always_inline_= _)) +vcombine_f16 (float16x4_t __a, float16x4_t __b) +{ + return __builtin_neon_vcombinev4hf (__a, __b); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline_= _)) vcombine_f32 (float32x2_t __a, float32x2_t __b) { @@ -6105,6 +6141,12 @@ vget_high_s64 (int64x2_t __a) return (int64x1_t)__builtin_neon_vget_highv2di (__a); } =20 +__extension__ static __inline float16x4_t __attribute__ ((__always_inline_= _)) +vget_high_f16 (float16x8_t __a) +{ + return __builtin_neon_vget_highv8hf (__a); +} + __extension__ static __inline float32x2_t __attribute__ ((__always_inline_= _)) vget_high_f32 (float32x4_t __a) { @@ -6165,6 +6207,12 @@ vget_low_s32 (int32x4_t __a) return (int32x2_t)__builtin_neon_vget_lowv4si (__a); } =20 +__extension__ static __inline float16x4_t __attribute__ ((__always_inline_= _)) +vget_low_f16 (float16x8_t __a) +{ + return __builtin_neon_vget_lowv8hf (__a); +} + __extension__ static __inline float32x2_t __attribute__ ((__always_inline_= _)) vget_low_f32 (float32x4_t __a) { @@ -8712,6 +8760,12 @@ vld1_s64 (const int64_t * __a) return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a= ); } =20 +__extension__ static __inline float16x4_t __attribute__ ((__always_inline_= _)) +vld1_f16 (const float16_t * __a) +{ + return __builtin_neon_vld1v4hf ((const __builtin_neon_hf *) __a); +} + __extension__ static __inline float32x2_t __attribute__ ((__always_inline_= _)) vld1_f32 (const float32_t * __a) { @@ -8786,6 +8840,12 @@ vld1q_s64 (const int64_t * __a) return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) _= _a); } =20 +__extension__ static __inline float16x8_t __attribute__ ((__always_inline_= _)) +vld1q_f16 (const float16_t * __a) +{ + return __builtin_neon_vld1v8hf ((const __builtin_neon_hf *) __a); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline_= _)) vld1q_f32 (const float32_t * __a) { @@ -9183,6 +9243,12 @@ vst1_s64 (int64_t * __a, int64x1_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_f16 (float16_t * __a, float16x4_t __b) +{ + __builtin_neon_vst1v4hf ((__builtin_neon_hf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1_f32 (float32_t * __a, float32x2_t __b) { __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); @@ -9257,6 +9323,12 @@ vst1q_s64 (int64_t * __a, int64x2_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_f16 (float16_t * __a, float16x8_t __b) +{ + __builtin_neon_vst1v8hf ((__builtin_neon_hf *) __a, __b); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_f32 (float32_t * __a, float32x4_t __b) { __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); @@ -9317,6 +9389,12 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const i= nt __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c) +{ + __builtin_neon_vst1_lanev4hf ((__builtin_neon_hf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) { __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); @@ -9391,6 +9469,12 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const = int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c) +{ + __builtin_neon_vst1_lanev8hf ((__builtin_neon_hf *) __a, __b, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) { __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); @@ -9470,6 +9554,14 @@ vld2_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin= e__)) +vld2_f16 (const float16_t * __a) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o =3D __builtin_neon_vld2v4hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin= e__)) vld2_f32 (const float32_t * __a) { @@ -9568,6 +9660,14 @@ vld2q_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inlin= e__)) +vld2q_f16 (const float16_t * __a) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o =3D __builtin_neon_vld2v8hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x4x2_t __attribute__ ((__always_inlin= e__)) vld2q_f32 (const float32_t * __a) { @@ -9643,6 +9743,16 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b,= const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin= e__)) +vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b }; + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o =3D __builtin_neon_vld2_lanev4hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin= e__)) vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) { @@ -9715,6 +9825,16 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b= , const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x8x2_t __attribute__ ((__always_inlin= e__)) +vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o =3D __builtin_neon_vld2_lanev8hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x4x2_t __attribute__ ((__always_inlin= e__)) vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) { @@ -9775,6 +9895,13 @@ vld2_dup_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin= e__)) +vld2_dup_f16 (const float16_t * __a) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o =3D __builtin_neon_vld2_dupv4hf ((const __builtin_neon_hf *) __= a); + return __rv.__i; +} __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin= e__)) vld2_dup_f32 (const float32_t * __a) { @@ -9871,6 +9998,13 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_f16 (float16_t * __a, float16x4x2_t __b) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b }; + __builtin_neon_vst2v4hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst2_f32 (float32_t * __a, float32x2x2_t __b) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b }; @@ -9957,6 +10091,13 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_f16 (float16_t * __a, float16x8x2_t __b) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + __builtin_neon_vst2v8hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_f32 (float32_t * __a, float32x4x2_t __b) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; @@ -10020,6 +10161,13 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, con= st int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b }; + __builtin_neon_vst2_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) { union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b }; @@ -10076,6 +10224,13 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, co= nst int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + __builtin_neon_vst2_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; @@ -10127,6 +10282,14 @@ vld3_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin= e__)) +vld3_f16 (const float16_t * __a) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o =3D __builtin_neon_vld3v4hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin= e__)) vld3_f32 (const float32_t * __a) { @@ -10225,6 +10388,14 @@ vld3q_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x8x3_t __attribute__ ((__always_inlin= e__)) +vld3q_f16 (const float16_t * __a) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o =3D __builtin_neon_vld3v8hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x4x3_t __attribute__ ((__always_inlin= e__)) vld3q_f32 (const float32_t * __a) { @@ -10300,6 +10471,16 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __= b, const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin= e__)) +vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b }; + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o =3D __builtin_neon_vld3_lanev4hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin= e__)) vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) { @@ -10372,6 +10553,16 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t _= _b, const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x8x3_t __attribute__ ((__always_inlin= e__)) +vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b }; + union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o =3D __builtin_neon_vld3_lanev8hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x4x3_t __attribute__ ((__always_inlin= e__)) vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) { @@ -10432,6 +10623,14 @@ vld3_dup_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin= e__)) +vld3_dup_f16 (const float16_t * __a) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o =3D __builtin_neon_vld3_dupv4hf ((const __builtin_neon_hf *) __= a); + return __rv.__i; +} + __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin= e__)) vld3_dup_f32 (const float32_t * __a) { @@ -10528,6 +10727,13 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_f16 (float16_t * __a, float16x4x3_t __b) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b }; + __builtin_neon_vst3v4hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst3_f32 (float32_t * __a, float32x2x3_t __b) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b }; @@ -10614,6 +10820,13 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_f16 (float16_t * __a, float16x8x3_t __b) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b }; + __builtin_neon_vst3v8hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_f32 (float32_t * __a, float32x4x3_t __b) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b }; @@ -10677,6 +10890,13 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, con= st int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b }; + __builtin_neon_vst3_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) { union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b }; @@ -10733,6 +10953,13 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, co= nst int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b }; + __builtin_neon_vst3_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b }; @@ -10784,6 +11011,14 @@ vld4_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin= e__)) +vld4_f16 (const float16_t * __a) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o =3D __builtin_neon_vld4v4hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin= e__)) vld4_f32 (const float32_t * __a) { @@ -10882,6 +11117,14 @@ vld4q_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inlin= e__)) +vld4q_f16 (const float16_t * __a) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o =3D __builtin_neon_vld4v8hf ((const __builtin_neon_hf *) __a); + return __rv.__i; +} + __extension__ static __inline float32x4x4_t __attribute__ ((__always_inlin= e__)) vld4q_f32 (const float32_t * __a) { @@ -10957,6 +11200,16 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __= b, const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin= e__)) +vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o =3D __builtin_neon_vld4_lanev4hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin= e__)) vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) { @@ -11029,6 +11282,16 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t _= _b, const int __c) return __rv.__i; } =20 +__extension__ static __inline float16x8x4_t __attribute__ ((__always_inlin= e__)) +vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b }; + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o =3D __builtin_neon_vld4_lanev8hf ((const __builtin_neon_hf *) _= _a, + __bu.__o, __c); + return __rv.__i; +} + __extension__ static __inline float32x4x4_t __attribute__ ((__always_inlin= e__)) vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) { @@ -11089,6 +11352,14 @@ vld4_dup_s32 (const int32_t * __a) return __rv.__i; } =20 +__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin= e__)) +vld4_dup_f16 (const float16_t * __a) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o =3D __builtin_neon_vld4_dupv4hf ((const __builtin_neon_hf *) __= a); + return __rv.__i; +} + __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin= e__)) vld4_dup_f32 (const float32_t * __a) { @@ -11185,6 +11456,13 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_f16 (float16_t * __a, float16x4x4_t __b) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + __builtin_neon_vst4v4hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst4_f32 (float32_t * __a, float32x2x4_t __b) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; @@ -11271,6 +11549,13 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_f16 (float16_t * __a, float16x8x4_t __b) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b }; + __builtin_neon_vst4v8hf ((__builtin_neon_hf *) __a, __bu.__o); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_f32 (float32_t * __a, float32x4x4_t __b) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b }; @@ -11334,6 +11619,13 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, con= st int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; + __builtin_neon_vst4_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) { union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b }; @@ -11390,6 +11682,13 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, co= nst int __c) } =20 __extension__ static __inline void __attribute__ ((__always_inline__)) +vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b }; + __builtin_neon_vst4_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c); +} + +__extension__ static __inline void __attribute__ ((__always_inline__)) vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b }; diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon= _builtins.def index f150b98..0b719df 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR10 (GETLANE, vdup_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) -VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di) -VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di) +VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) VAR3 (UNOP, vmovn, v8hi, v4si, v2di) VAR3 (UNOP, vqmovns, v8hi, v4si, v2di) VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) @@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf= , v2di, ti) VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti) VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti) VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti) -VAR10 (LOAD1, vld1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) +VAR12 (LOAD1, vld1, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v= 2di) VAR10 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) VAR10 (LOAD1, vld1_dup, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR10 (STORE1, vst1, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR10 (STORE1LANE, vst1_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR9 (LOAD1, vld2, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst2, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst2_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR9 (LOAD1, vld3, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst3, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst3_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR9 (LOAD1, vld4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (LOAD1LANE, vld4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di) -VAR9 (STORE1, vst4, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) -VAR7 (STORE1LANE, vst4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +VAR12 (STORE1, vst1, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR12 (STORE1LANE, vst1_lane, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR11 (LOAD1, vld2, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld2_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst2, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst2_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR11 (LOAD1, vld3, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld3_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst3, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst3_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR11 (LOAD1, vld4, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (LOAD1LANE, vld4_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) +VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di) +VAR11 (STORE1, vst4, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf) +VAR9 (STORE1LANE, vst4_lane, + v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 1e7f3f1..47cc1ee 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -65,20 +65,32 @@ ;; Integer modes supported by Neon and IWMMXT, except V2DI (define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI]) =20 -;; Double-width vector modes. +;; Double-width vector modes, on which we support arithmetic (no HF!) (define_mode_iterator VD [V8QI V4HI V2SI V2SF]) =20 +;; Double-width vector modes plus 64-bit elements for vreinterpret + vcrea= te. +(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI]) + ;; Double-width vector modes plus 64-bit elements. -(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI]) +(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI]) + +;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane. +(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF]) =20 ;; Double-width vector modes without floating-point elements. (define_mode_iterator VDI [V8QI V4HI V2SI]) =20 -;; Quad-width vector modes. +;; Quad-width vector modes supporting arithmetic (no HF!). (define_mode_iterator VQ [V16QI V8HI V4SI V4SF]) =20 +;; Quad-width vector modes, including V8HF. +(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF]) + +;; Quad-width vector modes with 16- or 32-bit elements +(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF]) + ;; Quad-width vector modes plus 64-bit elements. -(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI]) +(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI]) =20 ;; Quad-width vector modes without floating-point elements. (define_mode_iterator VQI [V16QI V8HI V4SI]) @@ -111,7 +123,8 @@ (define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) =20 ;; Vector modes, including 64-bit integer elements. -(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2= DI]) +(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI + V4HF V8HF V2SF V4SF DI V2DI]) =20 ;; Vector modes including 64-bit integer elements, but no floats. (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) @@ -366,7 +379,8 @@ =20 ;; Define element mode for each vector mode. (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") - (V4HI "HI") (V8HI "HI") + (V4HI "HI") (V8HI "HI") + (V4HF "HF") (V8HF "HF") (V2SI "SI") (V4SI "SI") (V2SF "SF") (V4SF "SF") (DI "DI") (V2DI "DI")]) @@ -383,6 +397,7 @@ ;; size for structure lane/dup loads and stores. (define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI") (V4HI "SI") (V8HI "SI") + (V4HF "SF") (V8HF "SF") (V2SI "V2SI") (V4SI "V2SI") (V2SF "V2SF") (V4SF "V2SF") (DI "V2DI") (V2DI "V2DI")]) @@ -390,6 +405,7 @@ ;; Similar, for three elements. (define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK") (V4HI "BLK") (V8HI "BLK") + (V4HF "BLK") (V8HF "BLK") (V2SI "BLK") (V4SI "BLK") (V2SF "BLK") (V4SF "BLK") (DI "EI") (V2DI "EI")]) @@ -397,6 +413,7 @@ ;; Similar, for four elements. (define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI") (V4HI "V4HI") (V8HI "V4HI") + (V4HF "V4HF") (V8HF "V4HF") (V2SI "V4SI") (V4SI "V4SI") (V2SF "V4SF") (V4SF "V4SF") (DI "OI") (V2DI "OI")]) @@ -421,7 +438,8 @@ =20 ;; Modes with half the number of equal-sized elements. (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") - (V4SI "V2SI") (V4SF "V2SF") (V2DF "DF") + (V8HF "V4HF") (V4SI "V2SI") + (V4SF "V2SF") (V2DF "DF") (V2DI "DI")]) =20 ;; Same, but lower-case. @@ -431,8 +449,9 @@ =20 ;; Modes with twice the number of equal-sized elements. (define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI") - (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF") - (DI "V2DI")]) + (V2SI "V4SI") (V4HF "V8HF") + (V2SF "V4SF") (DF "V2DF") + (DI "V2DI")]) =20 ;; Same, but lower-case. (define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi") @@ -454,8 +473,9 @@ =20 ;; Mode of result of comparison operations (and bit-select operand 1). (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") - (V4HI "V4HI") (V8HI "V8HI") + (V4HI "V4HI") (V8HI "V8HI") (V2SI "V2SI") (V4SI "V4SI") + (V4HF "V4HI") (V8HF "V8HI") (V2SF "V2SI") (V4SF "V4SI") (DI "DI") (V2DI "V2DI")]) =20 @@ -492,12 +512,14 @@ (define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8") (V4HI "u16") (V8HI "u16") (V2SI "32") (V4SI "32") + (V4HF "u16") (V8HF "u16") (V2SF "32") (V4SF "32")]) =20 (define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") (V4HI "16") (V8HI "16") (V2SI "32") (V4SI "32") (DI "64") (V2DI "64") + (V4HF "16") (V8HF "16") (V2SF "32") (V4SF "32")]) =20 (define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") @@ -564,6 +586,7 @@ (DI "true") (V2DI "false")]) =20 (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") + (V4HF "4") (V8HF "8") (V4HI "4") (V8HI "8") (V2SI "2") (V4SI "4") (V2SF "2") (V4SF "4") @@ -607,6 +630,7 @@ (define_mode_attr q [(V8QI "") (V16QI "_q") (V4HI "") (V8HI "_q") (V2SI "") (V4SI "_q") + (V4HF "") (V8HF "_q") (V2SF "") (V4SF "_q") (DI "") (V2DI "_q") (DF "") (V2DF "_q")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 4af74ce..f8d6e74 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -320,11 +320,11 @@ [(set_attr "type" "neon_load1_all_lanes,neon_from_gp")]) =20 (define_insn "vec_set_internal" - [(set (match_operand:VQ 0 "s_register_operand" "=3Dw,w") - (vec_merge:VQ - (vec_duplicate:VQ + [(set (match_operand:VQ2 0 "s_register_operand" "=3Dw,w") + (vec_merge:VQ2 + (vec_duplicate:VQ2 (match_operand: 1 "nonimmediate_operand" "Um,r")) - (match_operand:VQ 3 "s_register_operand" "0,0") + (match_operand:VQ2 3 "s_register_operand" "0,0") (match_operand:SI 2 "immediate_operand" "i,i")))] "TARGET_NEON" { @@ -407,7 +407,7 @@ (define_insn "vec_extract" [(set (match_operand: 0 "nonimmediate_operand" "=3DUm,r") (vec_select: - (match_operand:VQ 1 "s_register_operand" "w,w") + (match_operand:VQ2 1 "s_register_operand" "w,w") (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { @@ -2607,7 +2607,7 @@ [(set (match_operand:SI 0 "s_register_operand" "=3Dr") (sign_extend:SI (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") + (match_operand:VQ2 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { @@ -2634,7 +2634,7 @@ [(set (match_operand:SI 0 "s_register_operand" "=3Dr") (zero_extend:SI (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") + (match_operand:VQ2 1 "s_register_operand" "w") (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { @@ -2774,7 +2774,7 @@ }) =20 (define_expand "neon_vcreate" - [(match_operand:VDX 0 "s_register_operand" "") + [(match_operand:VD_RE 0 "s_register_operand" "") (match_operand:DI 1 "general_operand" "")] "TARGET_NEON" { @@ -4125,7 +4125,7 @@ =20 (define_expand "neon_vreinterpretv8qi" [(match_operand:V8QI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4134,7 +4134,7 @@ =20 (define_expand "neon_vreinterpretv4hi" [(match_operand:V4HI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4143,7 +4143,7 @@ =20 (define_expand "neon_vreinterpretv2si" [(match_operand:V2SI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4152,7 +4152,7 @@ =20 (define_expand "neon_vreinterpretv2sf" [(match_operand:V2SF 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4161,7 +4161,7 @@ =20 (define_expand "neon_vreinterpretdi" [(match_operand:DI 0 "s_register_operand" "") - (match_operand:VDX 1 "s_register_operand" "")] + (match_operand:VD_RE 1 "s_register_operand" "")] "TARGET_NEON" { neon_reinterpret (operands[0], operands[1]); @@ -4420,14 +4420,14 @@ (define_expand "vec_load_lanesoi" [(set (match_operand:OI 0 "s_register_operand") (unspec:OI [(match_operand:OI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON") =20 (define_insn "neon_vld2" [(set (match_operand:OI 0 "s_register_operand" "=3Dw") (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2))] "TARGET_NEON" "vld2.\t%h0, %A1" @@ -4438,7 +4438,7 @@ (unspec:TI [(match_operand: 1 "neon_struct_operand" "U= m") (match_operand:TI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_LANE))] "TARGET_NEON" { @@ -4463,7 +4463,7 @@ (unspec:OI [(match_operand: 1 "neon_struct_operand" "U= m") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD2_LANE))] "TARGET_NEON" { @@ -4534,14 +4534,14 @@ (define_expand "vec_store_lanesoi" [(set (match_operand:OI 0 "neon_struct_operand") (unspec:OI [(match_operand:OI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON") =20 (define_insn "neon_vst2" [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2))] "TARGET_NEON" "vst2.\t%h1, %A0" @@ -4553,7 +4553,7 @@ (unspec: [(match_operand:TI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2_LANE))] "TARGET_NEON" { @@ -4578,7 +4578,7 @@ (unspec: [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST2_LANE))] "TARGET_NEON" { @@ -4631,7 +4631,7 @@ (define_expand "vec_load_lanesci" [(match_operand:CI 0 "s_register_operand") (match_operand:CI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vld3 (operands[0], operands[1])); @@ -4641,7 +4641,7 @@ (define_expand "neon_vld3" [(match_operand:CI 0 "s_register_operand") (match_operand:CI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4656,7 +4656,7 @@ (define_insn "neon_vld3qa" [(set (match_operand:CI 0 "s_register_operand" "=3Dw") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3A))] "TARGET_NEON" { @@ -4676,7 +4676,7 @@ [(set (match_operand:CI 0 "s_register_operand" "=3Dw") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (match_operand:CI 2 "s_register_operand" "0") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3B))] "TARGET_NEON" { @@ -4697,7 +4697,7 @@ (unspec:EI [(match_operand: 1 "neon_struct_operand" = "Um") (match_operand:EI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_LANE))] "TARGET_NEON" { @@ -4724,7 +4724,7 @@ (unspec:CI [(match_operand: 1 "neon_struct_operand" = "Um") (match_operand:CI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD3_LANE))] "TARGET_NEON" { @@ -4804,7 +4804,7 @@ (define_expand "vec_store_lanesci" [(match_operand:CI 0 "neon_struct_operand") (match_operand:CI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vst3 (operands[0], operands[1])); @@ -4814,7 +4814,7 @@ (define_expand "neon_vst3" [(match_operand:CI 0 "neon_struct_operand") (match_operand:CI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4829,7 +4829,7 @@ (define_insn "neon_vst3qa" [(set (match_operand:EI 0 "neon_struct_operand" "=3DUm") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3A))] "TARGET_NEON" { @@ -4848,7 +4848,7 @@ (define_insn "neon_vst3qb" [(set (match_operand:EI 0 "neon_struct_operand" "=3DUm") (unspec:EI [(match_operand:CI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3B))] "TARGET_NEON" { @@ -4869,7 +4869,7 @@ (unspec: [(match_operand:EI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3_LANE))] "TARGET_NEON" { @@ -4896,7 +4896,7 @@ (unspec: [(match_operand:CI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST3_LANE))] "TARGET_NEON" { @@ -4951,7 +4951,7 @@ (define_expand "vec_load_lanesxi" [(match_operand:XI 0 "s_register_operand") (match_operand:XI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vld4 (operands[0], operands[1])); @@ -4961,7 +4961,7 @@ (define_expand "neon_vld4" [(match_operand:XI 0 "s_register_operand") (match_operand:XI 1 "neon_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -4976,7 +4976,7 @@ (define_insn "neon_vld4qa" [(set (match_operand:XI 0 "s_register_operand" "=3Dw") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4A))] "TARGET_NEON" { @@ -4997,7 +4997,7 @@ [(set (match_operand:XI 0 "s_register_operand" "=3Dw") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (match_operand:XI 2 "s_register_operand" "0") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4B))] "TARGET_NEON" { @@ -5019,7 +5019,7 @@ (unspec:OI [(match_operand: 1 "neon_struct_operand" "= Um") (match_operand:OI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_LANE))] "TARGET_NEON" { @@ -5047,7 +5047,7 @@ (unspec:XI [(match_operand: 1 "neon_struct_operand" "= Um") (match_operand:XI 2 "s_register_operand" "0") (match_operand:SI 3 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4_LANE))] "TARGET_NEON" { @@ -5132,7 +5132,7 @@ (define_expand "vec_store_lanesxi" [(match_operand:XI 0 "neon_struct_operand") (match_operand:XI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { emit_insn (gen_neon_vst4 (operands[0], operands[1])); @@ -5142,7 +5142,7 @@ (define_expand "neon_vst4" [(match_operand:XI 0 "neon_struct_operand") (match_operand:XI 1 "s_register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { rtx mem; @@ -5157,7 +5157,7 @@ (define_insn "neon_vst4qa" [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4A))] "TARGET_NEON" { @@ -5177,7 +5177,7 @@ (define_insn "neon_vst4qb" [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm") (unspec:OI [(match_operand:XI 1 "s_register_operand" "w") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4B))] "TARGET_NEON" { @@ -5199,7 +5199,7 @@ (unspec: [(match_operand:OI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4_LANE))] "TARGET_NEON" { @@ -5227,7 +5227,7 @@ (unspec: [(match_operand:XI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST4_LANE))] "TARGET_NEON" { --------------090106040408080804070807--