From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-402218-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 99257 invoked by alias); 7 Jul 2015 12:35:37 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Received: (qmail 99106 invoked by uid 89); 7 Jul 2015 12:35:36 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-0.8 required=5.0 tests=AWL,BAYES_20,SPF_PASS autolearn=ham version=3.3.2
X-HELO: eu-smtp-delivery-143.mimecast.com
Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (146.101.78.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 07 Jul 2015 12:35:13 +0000
Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by eu-smtp-1.mimecast.com with ESMTP id uk-mta-31-pqvVUAMoRQikeRqBi9iiBg-1
Received: from [10.2.207.65] ([10.1.2.79]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959);	 Tue, 7 Jul 2015 13:35:07 +0100
Message-ID: <559BC77C.7030100@arm.com>
Date: Tue, 07 Jul 2015 12:35:00 -0000
From: Alan Lawrence <alan.lawrence@arm.com>
User-Agent: Thunderbird 2.0.0.24 (X11/20101213)
MIME-Version: 1.0
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH 6/16][ARM] Remaining float16 intrinsics: vld..., vst..., vget_low/high, vcombine
In-Reply-To: <559BC6EC.3000907@arm.com>
X-MC-Unique: pqvVUAMoRQikeRqBi9iiBg-1
Content-Type: multipart/mixed; boundary="------------090106040408080804070807"
X-IsSubscribed: yes
X-SW-Source: 2015-07/txt/msg00479.txt.bz2

This is a multi-part message in MIME format.
--------------090106040408080804070807
Content-Type: text/plain; charset=WINDOWS-1252; format=flowed
Content-Transfer-Encoding: quoted-printable
Content-length: 64

As per https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01341.html

--------------090106040408080804070807
Content-Type: text/x-patch; name=06_arm_iterators.patch
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline;
 filename="06_arm_iterators.patch"
Content-length: 47392

commit ae6264b144d25fadcbf219e68ddf3d8c5f40be34
Author: Alan Lawrence <alan.lawrence@arm.com>
Date:   Thu Dec 11 11:53:59 2014 +0000

    ARM 4/4 v2: v(ld|st)[234](q?|_lane|_dup), vcombine, vget_(low|high) (v2=
 w/ V_uf_sclr)
=20=20=20=20
    All are tied together with so many iterators!
=20=20=20=20
    Also vec_extract

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 17e39d8..1ee0a3d 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -241,6 +241,12 @@ typedef struct {
 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
   VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
   VAR1 (T, N, J)
+#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
+  VAR1 (T, N, K)
+#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
+  VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
+  VAR1 (T, N, L)
=20
 /* The NEON builtin data can be found in arm_neon_builtins.def.
    The mode entries in the following table correspond to the "key" type of=
 the
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index db73c70..93fb44f 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -162,6 +162,16 @@ typedef struct uint64x2x2_t
   uint64x2_t val[2];
 } uint64x2x2_t;
=20
+typedef struct float16x4x2_t
+{
+  float16x4_t val[2];
+} float16x4x2_t;
+
+typedef struct float16x8x2_t
+{
+  float16x8_t val[2];
+} float16x8x2_t;
+
 typedef struct float32x2x2_t
 {
   float32x2_t val[2];
@@ -288,6 +298,16 @@ typedef struct uint64x2x3_t
   uint64x2_t val[3];
 } uint64x2x3_t;
=20
+typedef struct float16x4x3_t
+{
+  float16x4_t val[3];
+} float16x4x3_t;
+
+typedef struct float16x8x3_t
+{
+  float16x8_t val[3];
+} float16x8x3_t;
+
 typedef struct float32x2x3_t
 {
   float32x2_t val[3];
@@ -414,6 +434,16 @@ typedef struct uint64x2x4_t
   uint64x2_t val[4];
 } uint64x2x4_t;
=20
+typedef struct float16x4x4_t
+{
+  float16x4_t val[4];
+} float16x4x4_t;
+
+typedef struct float16x8x4_t
+{
+  float16x8_t val[4];
+} float16x8x4_t;
+
 typedef struct float32x2x4_t
 {
   float32x2_t val[4];
@@ -6031,6 +6061,12 @@ vcombine_s64 (int64x1_t __a, int64x1_t __b)
   return (int64x2_t)__builtin_neon_vcombinedi (__a, __b);
 }
=20
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline_=
_))
+vcombine_f16 (float16x4_t __a, float16x4_t __b)
+{
+  return __builtin_neon_vcombinev4hf (__a, __b);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline_=
_))
 vcombine_f32 (float32x2_t __a, float32x2_t __b)
 {
@@ -6105,6 +6141,12 @@ vget_high_s64 (int64x2_t __a)
   return (int64x1_t)__builtin_neon_vget_highv2di (__a);
 }
=20
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline_=
_))
+vget_high_f16 (float16x8_t __a)
+{
+  return __builtin_neon_vget_highv8hf (__a);
+}
+
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline_=
_))
 vget_high_f32 (float32x4_t __a)
 {
@@ -6165,6 +6207,12 @@ vget_low_s32 (int32x4_t __a)
   return (int32x2_t)__builtin_neon_vget_lowv4si (__a);
 }
=20
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline_=
_))
+vget_low_f16 (float16x8_t __a)
+{
+  return __builtin_neon_vget_lowv8hf (__a);
+}
+
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline_=
_))
 vget_low_f32 (float32x4_t __a)
 {
@@ -8712,6 +8760,12 @@ vld1_s64 (const int64_t * __a)
   return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a=
);
 }
=20
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline_=
_))
+vld1_f16 (const float16_t * __a)
+{
+  return __builtin_neon_vld1v4hf ((const __builtin_neon_hf *) __a);
+}
+
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline_=
_))
 vld1_f32 (const float32_t * __a)
 {
@@ -8786,6 +8840,12 @@ vld1q_s64 (const int64_t * __a)
   return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) _=
_a);
 }
=20
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline_=
_))
+vld1q_f16 (const float16_t * __a)
+{
+  return __builtin_neon_vld1v8hf ((const __builtin_neon_hf *) __a);
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline_=
_))
 vld1q_f32 (const float32_t * __a)
 {
@@ -9183,6 +9243,12 @@ vst1_s64 (int64_t * __a, int64x1_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_f16 (float16_t * __a, float16x4_t __b)
+{
+  __builtin_neon_vst1v4hf ((__builtin_neon_hf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_f32 (float32_t * __a, float32x2_t __b)
 {
   __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b);
@@ -9257,6 +9323,12 @@ vst1q_s64 (int64_t * __a, int64x2_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_f16 (float16_t * __a, float16x8_t __b)
+{
+  __builtin_neon_vst1v8hf ((__builtin_neon_hf *) __a, __b);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_f32 (float32_t * __a, float32x4_t __b)
 {
   __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b);
@@ -9317,6 +9389,12 @@ vst1_lane_s32 (int32_t * __a, int32x2_t __b, const i=
nt __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4hf ((__builtin_neon_hf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c)
 {
   __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c);
@@ -9391,6 +9469,12 @@ vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const =
int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8hf ((__builtin_neon_hf *) __a, __b, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c)
 {
   __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c);
@@ -9470,6 +9554,14 @@ vld2_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin=
e__))
+vld2_f16 (const float16_t * __a)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld2v4hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin=
e__))
 vld2_f32 (const float32_t * __a)
 {
@@ -9568,6 +9660,14 @@ vld2q_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inlin=
e__))
+vld2q_f16 (const float16_t * __a)
+{
+  union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld2v8hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inlin=
e__))
 vld2q_f32 (const float32_t * __a)
 {
@@ -9643,6 +9743,16 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b,=
 const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin=
e__))
+vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b };
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld2_lanev4hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin=
e__))
 vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c)
 {
@@ -9715,6 +9825,16 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b=
, const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inlin=
e__))
+vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c)
+{
+  union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld2_lanev8hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inlin=
e__))
 vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c)
 {
@@ -9775,6 +9895,13 @@ vld2_dup_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inlin=
e__))
+vld2_dup_f16 (const float16_t * __a)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld2_dupv4hf ((const __builtin_neon_hf *) __=
a);
+  return __rv.__i;
+}
 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inlin=
e__))
 vld2_dup_f32 (const float32_t * __a)
 {
@@ -9871,6 +9998,13 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_f16 (float16_t * __a, float16x4x2_t __b)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b };
+  __builtin_neon_vst2v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_f32 (float32_t * __a, float32x2x2_t __b)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b };
@@ -9957,6 +10091,13 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_f16 (float16_t * __a, float16x8x2_t __b)
+{
+  union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  __builtin_neon_vst2v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_f32 (float32_t * __a, float32x4x2_t __b)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
@@ -10020,6 +10161,13 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, con=
st int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c)
+{
+  union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b };
+  __builtin_neon_vst2_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c)
 {
   union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu =3D { __b };
@@ -10076,6 +10224,13 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, co=
nst int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c)
+{
+  union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  __builtin_neon_vst2_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c)
 {
   union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
@@ -10127,6 +10282,14 @@ vld3_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin=
e__))
+vld3_f16 (const float16_t * __a)
+{
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld3v4hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin=
e__))
 vld3_f32 (const float32_t * __a)
 {
@@ -10225,6 +10388,14 @@ vld3q_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inlin=
e__))
+vld3q_f16 (const float16_t * __a)
+{
+  union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld3v8hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inlin=
e__))
 vld3q_f32 (const float32_t * __a)
 {
@@ -10300,6 +10471,16 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __=
b, const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin=
e__))
+vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c)
+{
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b };
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld3_lanev4hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin=
e__))
 vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c)
 {
@@ -10372,6 +10553,16 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t _=
_b, const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x3_t __attribute__ ((__always_inlin=
e__))
+vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c)
+{
+  union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b };
+  union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld3_lanev8hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inlin=
e__))
 vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c)
 {
@@ -10432,6 +10623,14 @@ vld3_dup_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x3_t __attribute__ ((__always_inlin=
e__))
+vld3_dup_f16 (const float16_t * __a)
+{
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld3_dupv4hf ((const __builtin_neon_hf *) __=
a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inlin=
e__))
 vld3_dup_f32 (const float32_t * __a)
 {
@@ -10528,6 +10727,13 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_f16 (float16_t * __a, float16x4x3_t __b)
+{
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b };
+  __builtin_neon_vst3v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_f32 (float32_t * __a, float32x2x3_t __b)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b };
@@ -10614,6 +10820,13 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_f16 (float16_t * __a, float16x8x3_t __b)
+{
+  union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b };
+  __builtin_neon_vst3v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_f32 (float32_t * __a, float32x4x3_t __b)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b };
@@ -10677,6 +10890,13 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, con=
st int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c)
+{
+  union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b };
+  __builtin_neon_vst3_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c)
 {
   union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu =3D { __b };
@@ -10733,6 +10953,13 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, co=
nst int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c)
+{
+  union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b };
+  __builtin_neon_vst3_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c)
 {
   union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu =3D { __b };
@@ -10784,6 +11011,14 @@ vld4_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin=
e__))
+vld4_f16 (const float16_t * __a)
+{
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld4v4hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin=
e__))
 vld4_f32 (const float32_t * __a)
 {
@@ -10882,6 +11117,14 @@ vld4q_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inlin=
e__))
+vld4q_f16 (const float16_t * __a)
+{
+  union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld4v8hf ((const __builtin_neon_hf *) __a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inlin=
e__))
 vld4q_f32 (const float32_t * __a)
 {
@@ -10957,6 +11200,16 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __=
b, const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin=
e__))
+vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c)
+{
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld4_lanev4hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin=
e__))
 vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c)
 {
@@ -11029,6 +11282,16 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t _=
_b, const int __c)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x8x4_t __attribute__ ((__always_inlin=
e__))
+vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c)
+{
+  union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b };
+  union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld4_lanev8hf ((const __builtin_neon_hf *) _=
_a,
+					   __bu.__o, __c);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inlin=
e__))
 vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c)
 {
@@ -11089,6 +11352,14 @@ vld4_dup_s32 (const int32_t * __a)
   return __rv.__i;
 }
=20
+__extension__ static __inline float16x4x4_t __attribute__ ((__always_inlin=
e__))
+vld4_dup_f16 (const float16_t * __a)
+{
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv;
+  __rv.__o =3D __builtin_neon_vld4_dupv4hf ((const __builtin_neon_hf *) __=
a);
+  return __rv.__i;
+}
+
 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inlin=
e__))
 vld4_dup_f32 (const float32_t * __a)
 {
@@ -11185,6 +11456,13 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_f16 (float16_t * __a, float16x4x4_t __b)
+{
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  __builtin_neon_vst4v4hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_f32 (float32_t * __a, float32x2x4_t __b)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
@@ -11271,6 +11549,13 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_f16 (float16_t * __a, float16x8x4_t __b)
+{
+  union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b };
+  __builtin_neon_vst4v8hf ((__builtin_neon_hf *) __a, __bu.__o);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_f32 (float32_t * __a, float32x4x4_t __b)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b };
@@ -11334,6 +11619,13 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, con=
st int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c)
+{
+  union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
+  __builtin_neon_vst4_lanev4hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c)
 {
   union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu =3D { __b };
@@ -11390,6 +11682,13 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, co=
nst int __c)
 }
=20
 __extension__ static __inline void __attribute__ ((__always_inline__))
+vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c)
+{
+  union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b };
+  __builtin_neon_vst4_lanev8hf ((__builtin_neon_hf *) __a, __bu.__o, __c);
+}
+
+__extension__ static __inline void __attribute__ ((__always_inline__))
 vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c)
 {
   union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu =3D { __b };
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon=
_builtins.def
index f150b98..0b719df 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR10 (GETLANE, vdup_lane,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
-VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
-VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di)
+VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
 VAR3 (UNOP, vmovn, v8hi, v4si, v2di)
 VAR3 (UNOP, vqmovns, v8hi, v4si, v2di)
 VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di)
@@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf=
, v2di, ti)
 VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti)
 VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti)
 VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti)
-VAR10 (LOAD1, vld1,
-        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR12 (LOAD1, vld1,
+        v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v=
2di)
 VAR10 (LOAD1LANE, vld1_lane,
 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR10 (LOAD1, vld1_dup,
 	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1, vst1,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR10 (STORE1LANE, vst1_lane,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR9 (LOAD1, vld2,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld2_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst2,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst2_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld3,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld3_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst3,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst3_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR9 (LOAD1, vld4,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (LOAD1LANE, vld4_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
-VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di)
-VAR9 (STORE1, vst4,
-	v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
-VAR7 (STORE1LANE, vst4_lane,
-	v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
+VAR12 (STORE1, vst1,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR12 (STORE1LANE, vst1_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR11 (LOAD1, vld2,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld2_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst2,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst2_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld3,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld3_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst3,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst3_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR11 (LOAD1, vld4,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (LOAD1LANE, vld4_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
+VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
+VAR11 (STORE1, vst4,
+	v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
+VAR9 (STORE1LANE, vst4_lane,
+	v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 1e7f3f1..47cc1ee 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -65,20 +65,32 @@
 ;; Integer modes supported by Neon and IWMMXT, except V2DI
 (define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
=20
-;; Double-width vector modes.
+;; Double-width vector modes, on which we support arithmetic (no HF!)
 (define_mode_iterator VD [V8QI V4HI V2SI V2SF])
=20
+;; Double-width vector modes plus 64-bit elements for vreinterpret + vcrea=
te.
+(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI])
+
 ;; Double-width vector modes plus 64-bit elements.
-(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
+(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI])
+
+;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
+(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF])
=20
 ;; Double-width vector modes without floating-point elements.
 (define_mode_iterator VDI [V8QI V4HI V2SI])
=20
-;; Quad-width vector modes.
+;; Quad-width vector modes supporting arithmetic (no HF!).
 (define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
=20
+;; Quad-width vector modes, including V8HF.
+(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF])
+
+;; Quad-width vector modes with 16- or 32-bit elements
+(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
+
 ;; Quad-width vector modes plus 64-bit elements.
-(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
+(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI])
=20
 ;; Quad-width vector modes without floating-point elements.
 (define_mode_iterator VQI [V16QI V8HI V4SI])
@@ -111,7 +123,8 @@
 (define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
=20
 ;; Vector modes, including 64-bit integer elements.
-(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2=
DI])
+(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI
+			    V4HF V8HF V2SF V4SF DI V2DI])
=20
 ;; Vector modes including 64-bit integer elements, but no floats.
 (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
@@ -366,7 +379,8 @@
=20
 ;; Define element mode for each vector mode.
 (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
-              (V4HI "HI") (V8HI "HI")
+			  (V4HI "HI") (V8HI "HI")
+			  (V4HF "HF") (V8HF "HF")
                           (V2SI "SI") (V4SI "SI")
                           (V2SF "SF") (V4SF "SF")
                           (DI "DI")   (V2DI "DI")])
@@ -383,6 +397,7 @@
 ;; size for structure lane/dup loads and stores.
 (define_mode_attr V_two_elem [(V8QI "HI")   (V16QI "HI")
                               (V4HI "SI")   (V8HI "SI")
+                              (V4HF "SF")   (V8HF "SF")
                               (V2SI "V2SI") (V4SI "V2SI")
                               (V2SF "V2SF") (V4SF "V2SF")
                               (DI "V2DI")   (V2DI "V2DI")])
@@ -390,6 +405,7 @@
 ;; Similar, for three elements.
 (define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
                                 (V4HI "BLK") (V8HI "BLK")
+                                (V4HF "BLK") (V8HF "BLK")
                                 (V2SI "BLK") (V4SI "BLK")
                                 (V2SF "BLK") (V4SF "BLK")
                                 (DI "EI")    (V2DI "EI")])
@@ -397,6 +413,7 @@
 ;; Similar, for four elements.
 (define_mode_attr V_four_elem [(V8QI "SI")   (V16QI "SI")
                                (V4HI "V4HI") (V8HI "V4HI")
+                               (V4HF "V4HF") (V8HF "V4HF")
                                (V2SI "V4SI") (V4SI "V4SI")
                                (V2SF "V4SF") (V4SF "V4SF")
                                (DI "OI")     (V2DI "OI")])
@@ -421,7 +438,8 @@
=20
 ;; Modes with half the number of equal-sized elements.
 (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
-              (V4SI  "V2SI") (V4SF "V2SF") (V2DF "DF")
+			  (V8HF "V4HF") (V4SI  "V2SI")
+			  (V4SF "V2SF") (V2DF "DF")
                           (V2DI "DI")])
=20
 ;; Same, but lower-case.
@@ -431,8 +449,9 @@
=20
 ;; Modes with twice the number of equal-sized elements.
 (define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
-                (V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
-                            (DI "V2DI")])
+			    (V2SI "V4SI") (V4HF "V8HF")
+			    (V2SF "V4SF") (DF "V2DF")
+			    (DI "V2DI")])
=20
 ;; Same, but lower-case.
 (define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
@@ -454,8 +473,9 @@
=20
 ;; Mode of result of comparison operations (and bit-select operand 1).
 (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
-                    (V4HI "V4HI") (V8HI  "V8HI")
+				(V4HI "V4HI") (V8HI  "V8HI")
                                 (V2SI "V2SI") (V4SI  "V4SI")
+				(V4HF "V4HI") (V8HF  "V8HI")
                                 (V2SF "V2SI") (V4SF  "V4SI")
                                 (DI   "DI")   (V2DI  "V2DI")])
=20
@@ -492,12 +512,14 @@
 (define_mode_attr V_uf_sclr [(V8QI "u8")  (V16QI "u8")
                  (V4HI "u16") (V8HI "u16")
                              (V2SI "32") (V4SI "32")
+                             (V4HF "u16") (V8HF "u16")
                              (V2SF "32") (V4SF "32")])
=20
 (define_mode_attr V_sz_elem [(V8QI "8")  (V16QI "8")
                  (V4HI "16") (V8HI  "16")
                              (V2SI "32") (V4SI  "32")
                              (DI   "64") (V2DI  "64")
+			     (V4HF "16") (V8HF "16")
                  (V2SF "32") (V4SF  "32")])
=20
 (define_mode_attr V_elem_ch [(V8QI "b")  (V16QI "b")
@@ -564,6 +586,7 @@
                             (DI   "true") (V2DI  "false")])
=20
 (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
+				 (V4HF "4") (V8HF "8")
                                  (V4HI "4") (V8HI "8")
                                  (V2SI "2") (V4SI "4")
                                  (V2SF "2") (V4SF "4")
@@ -607,6 +630,7 @@
 (define_mode_attr q [(V8QI "") (V16QI "_q")
                      (V4HI "") (V8HI "_q")
                      (V2SI "") (V4SI "_q")
+		     (V4HF "") (V8HF "_q")
                      (V2SF "") (V4SF "_q")
                      (DI "")   (V2DI "_q")
                      (DF "")   (V2DF "_q")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 4af74ce..f8d6e74 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -320,11 +320,11 @@
   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
=20
 (define_insn "vec_set<mode>_internal"
-  [(set (match_operand:VQ 0 "s_register_operand" "=3Dw,w")
-        (vec_merge:VQ
-          (vec_duplicate:VQ
+  [(set (match_operand:VQ2 0 "s_register_operand" "=3Dw,w")
+        (vec_merge:VQ2
+          (vec_duplicate:VQ2
             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
-          (match_operand:VQ 3 "s_register_operand" "0,0")
+          (match_operand:VQ2 3 "s_register_operand" "0,0")
           (match_operand:SI 2 "immediate_operand" "i,i")))]
   "TARGET_NEON"
 {
@@ -407,7 +407,7 @@
 (define_insn "vec_extract<mode>"
   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=3DUm,r")
 	(vec_select:<V_elem>
-          (match_operand:VQ 1 "s_register_operand" "w,w")
+          (match_operand:VQ2 1 "s_register_operand" "w,w")
           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
   "TARGET_NEON"
 {
@@ -2607,7 +2607,7 @@
   [(set (match_operand:SI 0 "s_register_operand" "=3Dr")
 	(sign_extend:SI
 	  (vec_select:<V_elem>
-	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (match_operand:VQ2 1 "s_register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_NEON"
 {
@@ -2634,7 +2634,7 @@
   [(set (match_operand:SI 0 "s_register_operand" "=3Dr")
 	(zero_extend:SI
 	  (vec_select:<V_elem>
-	    (match_operand:VQ 1 "s_register_operand" "w")
+	    (match_operand:VQ2 1 "s_register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
   "TARGET_NEON"
 {
@@ -2774,7 +2774,7 @@
 })
=20
 (define_expand "neon_vcreate<mode>"
-  [(match_operand:VDX 0 "s_register_operand" "")
+  [(match_operand:VD_RE 0 "s_register_operand" "")
    (match_operand:DI 1 "general_operand" "")]
   "TARGET_NEON"
 {
@@ -4125,7 +4125,7 @@
=20
 (define_expand "neon_vreinterpretv8qi<mode>"
   [(match_operand:V8QI 0 "s_register_operand" "")
-   (match_operand:VDX 1 "s_register_operand" "")]
+   (match_operand:VD_RE 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
   neon_reinterpret (operands[0], operands[1]);
@@ -4134,7 +4134,7 @@
=20
 (define_expand "neon_vreinterpretv4hi<mode>"
   [(match_operand:V4HI 0 "s_register_operand" "")
-   (match_operand:VDX 1 "s_register_operand" "")]
+   (match_operand:VD_RE 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
   neon_reinterpret (operands[0], operands[1]);
@@ -4143,7 +4143,7 @@
=20
 (define_expand "neon_vreinterpretv2si<mode>"
   [(match_operand:V2SI 0 "s_register_operand" "")
-   (match_operand:VDX 1 "s_register_operand" "")]
+   (match_operand:VD_RE 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
   neon_reinterpret (operands[0], operands[1]);
@@ -4152,7 +4152,7 @@
=20
 (define_expand "neon_vreinterpretv2sf<mode>"
   [(match_operand:V2SF 0 "s_register_operand" "")
-   (match_operand:VDX 1 "s_register_operand" "")]
+   (match_operand:VD_RE 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
   neon_reinterpret (operands[0], operands[1]);
@@ -4161,7 +4161,7 @@
=20
 (define_expand "neon_vreinterpretdi<mode>"
   [(match_operand:DI 0 "s_register_operand" "")
-   (match_operand:VDX 1 "s_register_operand" "")]
+   (match_operand:VD_RE 1 "s_register_operand" "")]
   "TARGET_NEON"
 {
   neon_reinterpret (operands[0], operands[1]);
@@ -4420,14 +4420,14 @@
 (define_expand "vec_load_lanesoi<mode>"
   [(set (match_operand:OI 0 "s_register_operand")
         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_VLD2))]
   "TARGET_NEON")
=20
 (define_insn "neon_vld2<mode>"
   [(set (match_operand:OI 0 "s_register_operand" "=3Dw")
         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2))]
   "TARGET_NEON"
   "vld2.<V_sz_elem>\t%h0, %A1"
@@ -4438,7 +4438,7 @@
         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "U=
m")
                     (match_operand:TI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2_LANE))]
   "TARGET_NEON"
 {
@@ -4463,7 +4463,7 @@
         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "U=
m")
                     (match_operand:OI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD2_LANE))]
   "TARGET_NEON"
 {
@@ -4534,14 +4534,14 @@
 (define_expand "vec_store_lanesoi<mode>"
   [(set (match_operand:OI 0 "neon_struct_operand")
 	(unspec:OI [(match_operand:OI 1 "s_register_operand")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST2))]
   "TARGET_NEON")
=20
 (define_insn "neon_vst2<mode>"
   [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm")
 	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
-		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_VST2))]
   "TARGET_NEON"
   "vst2.<V_sz_elem>\t%h1, %A0"
@@ -4553,7 +4553,7 @@
 	(unspec:<V_two_elem>
 	  [(match_operand:TI 1 "s_register_operand" "w")
 	   (match_operand:SI 2 "immediate_operand" "i")
-	   (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+	   (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 	  UNSPEC_VST2_LANE))]
   "TARGET_NEON"
 {
@@ -4578,7 +4578,7 @@
         (unspec:<V_two_elem>
            [(match_operand:OI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
-            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
            UNSPEC_VST2_LANE))]
   "TARGET_NEON"
 {
@@ -4631,7 +4631,7 @@
 (define_expand "vec_load_lanesci<mode>"
   [(match_operand:CI 0 "s_register_operand")
    (match_operand:CI 1 "neon_struct_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
@@ -4641,7 +4641,7 @@
 (define_expand "neon_vld3<mode>"
   [(match_operand:CI 0 "s_register_operand")
    (match_operand:CI 1 "neon_struct_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   rtx mem;
@@ -4656,7 +4656,7 @@
 (define_insn "neon_vld3qa<mode>"
   [(set (match_operand:CI 0 "s_register_operand" "=3Dw")
         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3A))]
   "TARGET_NEON"
 {
@@ -4676,7 +4676,7 @@
   [(set (match_operand:CI 0 "s_register_operand" "=3Dw")
         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
                     (match_operand:CI 2 "s_register_operand" "0")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3B))]
   "TARGET_NEON"
 {
@@ -4697,7 +4697,7 @@
         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" =
"Um")
                     (match_operand:EI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3_LANE))]
   "TARGET_NEON"
 {
@@ -4724,7 +4724,7 @@
         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" =
"Um")
                     (match_operand:CI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD3_LANE))]
   "TARGET_NEON"
 {
@@ -4804,7 +4804,7 @@
 (define_expand "vec_store_lanesci<mode>"
   [(match_operand:CI 0 "neon_struct_operand")
    (match_operand:CI 1 "s_register_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
@@ -4814,7 +4814,7 @@
 (define_expand "neon_vst3<mode>"
   [(match_operand:CI 0 "neon_struct_operand")
    (match_operand:CI 1 "s_register_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   rtx mem;
@@ -4829,7 +4829,7 @@
 (define_insn "neon_vst3qa<mode>"
   [(set (match_operand:EI 0 "neon_struct_operand" "=3DUm")
         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST3A))]
   "TARGET_NEON"
 {
@@ -4848,7 +4848,7 @@
 (define_insn "neon_vst3qb<mode>"
   [(set (match_operand:EI 0 "neon_struct_operand" "=3DUm")
         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST3B))]
   "TARGET_NEON"
 {
@@ -4869,7 +4869,7 @@
         (unspec:<V_three_elem>
            [(match_operand:EI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
-            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
            UNSPEC_VST3_LANE))]
   "TARGET_NEON"
 {
@@ -4896,7 +4896,7 @@
         (unspec:<V_three_elem>
            [(match_operand:CI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
-            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
            UNSPEC_VST3_LANE))]
   "TARGET_NEON"
 {
@@ -4951,7 +4951,7 @@
 (define_expand "vec_load_lanesxi<mode>"
   [(match_operand:XI 0 "s_register_operand")
    (match_operand:XI 1 "neon_struct_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
@@ -4961,7 +4961,7 @@
 (define_expand "neon_vld4<mode>"
   [(match_operand:XI 0 "s_register_operand")
    (match_operand:XI 1 "neon_struct_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   rtx mem;
@@ -4976,7 +4976,7 @@
 (define_insn "neon_vld4qa<mode>"
   [(set (match_operand:XI 0 "s_register_operand" "=3Dw")
         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4A))]
   "TARGET_NEON"
 {
@@ -4997,7 +4997,7 @@
   [(set (match_operand:XI 0 "s_register_operand" "=3Dw")
         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
                     (match_operand:XI 2 "s_register_operand" "0")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4B))]
   "TARGET_NEON"
 {
@@ -5019,7 +5019,7 @@
         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "=
Um")
                     (match_operand:OI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4_LANE))]
   "TARGET_NEON"
 {
@@ -5047,7 +5047,7 @@
         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "=
Um")
                     (match_operand:XI 2 "s_register_operand" "0")
                     (match_operand:SI 3 "immediate_operand" "i")
-                    (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VLD4_LANE))]
   "TARGET_NEON"
 {
@@ -5132,7 +5132,7 @@
 (define_expand "vec_store_lanesxi<mode>"
   [(match_operand:XI 0 "neon_struct_operand")
    (match_operand:XI 1 "s_register_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
@@ -5142,7 +5142,7 @@
 (define_expand "neon_vst4<mode>"
   [(match_operand:XI 0 "neon_struct_operand")
    (match_operand:XI 1 "s_register_operand")
-   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+   (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
   "TARGET_NEON"
 {
   rtx mem;
@@ -5157,7 +5157,7 @@
 (define_insn "neon_vst4qa<mode>"
   [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm")
         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST4A))]
   "TARGET_NEON"
 {
@@ -5177,7 +5177,7 @@
 (define_insn "neon_vst4qb<mode>"
   [(set (match_operand:OI 0 "neon_struct_operand" "=3DUm")
         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
-                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST4B))]
   "TARGET_NEON"
 {
@@ -5199,7 +5199,7 @@
         (unspec:<V_four_elem>
            [(match_operand:OI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
-            (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
            UNSPEC_VST4_LANE))]
   "TARGET_NEON"
 {
@@ -5227,7 +5227,7 @@
         (unspec:<V_four_elem>
            [(match_operand:XI 1 "s_register_operand" "w")
             (match_operand:SI 2 "immediate_operand" "i")
-            (unspec:VMQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+            (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
            UNSPEC_VST4_LANE))]
   "TARGET_NEON"
 {

--------------090106040408080804070807--