From 08c5cf4b5c6c846a4f62b6ad8776f2388b135e55 Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 7 Apr 2016 14:48:29 +0100
Subject: [PATCH 06/17] [PATCH 6/17][ARM] Add data processing intrinsics for
 float16_t.

2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>

	* config/arm/arm.c (arm_evpc_neon_vuzp): Add support for V8HF and
	V4HF modes.
	(arm_evpc_neon_vtrn): Likewise.
	(arm_evpc_neon_vrev): Likewise.
	(arm_evpc_neon_vext): Likewise.
	* config/arm/arm_neon.h (vbsl_f16): New.
	(vbslq_f16): New.
	(vdup_n_f16): New.
	(vdupq_n_f16): New.
	(vdup_lane_f16): New.
	(vdupq_lane_f16): New.
	(vext_f16): New.
	(vextq_f16): New.
	(vmov_n_f16): New.
	(vmovq_n_f16): New.
	(vrev64_f16): New.
	(vrev64q_f16): New.
	(vtrn_f16): New.
	(vtrnq_f16): New.
	(vuzp_f16): New.
	(vuzpq_f16): New.
	(vzip_f16): New.
	(vzipq_f16): New.
	* config/arm/arm_neon_buillins.def (vdup_n): New (v8hf, v4hf variants).
	(vdup_lane): New (v8hf, v4hf variants).
	(vext): New (v8hf, v4hf variants).
	(vbsl): New (v8hf, v4hf variants).
	* config/arm/iterators.md (VDQWH): New.
	(VH): New.
	(V_double_vector_mode): Add V8HF and V4HF.  Fix white-space.
	(Scalar_mul_8_16): Fix white-space.
	(Is_d_reg): Add V4HF and V8HF.
	* config/arm/neon.md (neon_vdup_lane<mode>_internal): New.
	(neon_vdup_lane<mode>): New.
	(neon_vtrn<mode>_internal): Replace VDQW with VDQWH.
	(*neon_vtrn<mode>_insn): Likewise.
	(neon_vzip<mode>_internal): Likewise. Also fix white-space.
	(*neon_vzip<mode>_insn): Likewise
	(neon_vuzp<mode>_internal): Likewise.
	(*neon_vuzp<mode>_insn): Likewise
	* config/arm/vec-common.md (vec_perm_const<mode>): New.

testsuite/
2016-05-17  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
	(FP16_SUPPORTED): New
	(expected-hfloat-16x4): Make conditional on __fp16 support.
	(expected-hfloat-16x8): Likewise.
	(vdup_n_f16): Disable for non-AArch64 targets.
	* gcc.target/aarch64/advsimd-intrinsics/vbsl.c: Add __fp16 tests,
	conditional on FP16_SUPPORTED.
	* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vext.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vrev.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc: Add support
	for testing __fp16.
	* gcc.target/aarch64/advsimd-intrinsics/vtrn.c: Add __fp16 tests,
	conditional on FP16_SUPPORTED.
	* gcc.target/aarch64/advsimd-intrinsics/vuzp.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vzip.c: Likewise.
---
 gcc/config/arm/arm.c                               |  10 ++
 gcc/config/arm/arm_neon.h                          | 175 +++++++++++++++++++++
 gcc/config/arm/arm_neon_builtins.def               |   4 +
 gcc/config/arm/iterators.md                        |  26 +--
 gcc/config/arm/neon.md                             | 115 +++++++++-----
 gcc/config/arm/vec-common.md                       |  14 ++
 .../aarch64/advsimd-intrinsics/arm-neon-ref.h      |  13 +-
 .../gcc.target/aarch64/advsimd-intrinsics/vbsl.c   |  28 ++++
 .../aarch64/advsimd-intrinsics/vdup-vmov.c         |  75 +++++++++
 .../aarch64/advsimd-intrinsics/vdup_lane.c         |  23 +++
 .../gcc.target/aarch64/advsimd-intrinsics/vext.c   |  30 ++++
 .../gcc.target/aarch64/advsimd-intrinsics/vrev.c   |  20 +++
 .../aarch64/advsimd-intrinsics/vshuffle.inc        |  42 ++++-
 .../gcc.target/aarch64/advsimd-intrinsics/vtrn.c   |  20 +++
 .../gcc.target/aarch64/advsimd-intrinsics/vuzp.c   |  20 +++
 .../gcc.target/aarch64/advsimd-intrinsics/vzip.c   |  20 +++
 16 files changed, 586 insertions(+), 49 deletions(-)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 26a8a48..6892040 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28420,6 +28420,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
@@ -28493,6 +28495,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
@@ -28545,6 +28549,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
 	case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
 	case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
 	case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
+	case V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
+	case V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
 	default:
 	  return false;
 	}
@@ -28628,6 +28634,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
+    case V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
+    case V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
@@ -28703,6 +28711,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
     case V8HImode: gen = gen_neon_vextv8hi; break;
     case V2SImode: gen = gen_neon_vextv2si; break;
     case V4SImode: gen = gen_neon_vextv4si; break;
+    case V4HFmode: gen = gen_neon_vextv4hf; break;
+    case V8HFmode: gen = gen_neon_vextv8hf; break;
     case V2SFmode: gen = gen_neon_vextv2sf; break;
     case V4SFmode: gen = gen_neon_vextv4sf; break;
     case V2DImode: gen = gen_neon_vextv2di; break;
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 07503d7..5b433b4 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -14830,6 +14830,181 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b)
 
 #pragma GCC pop_options
 
+  /* Half-precision data processing intrinsics.  */
+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c)
+{
+  return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c)
+{
+  return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vdup_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev4hf (__a, __b);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vdupq_lane_f16 (float16x4_t __a, const int __b)
+{
+  return __builtin_neon_vdup_lanev8hf (__a, __b);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vext_f16 (float16x4_t __a, float16x4_t __b, const int __c)
+{
+  return __builtin_neon_vextv4hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c)
+{
+  return __builtin_neon_vextv8hf (__a, __b, __c);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vmov_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv4hf (__a);
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vmovq_n_f16 (float16_t __a)
+{
+  return __builtin_neon_vdup_nv8hf (__a);
+}
+
+__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
+vrev64_f16 (float16x4_t __a)
+{
+  return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 });
+}
+
+__extension__ static __inline float16x8_t __attribute__ ((__always_inline__))
+vrev64q_f16 (float16x8_t __a)
+{
+  return
+    (float16x8_t)__builtin_shuffle (__a,
+				    (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 });
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vtrn_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vtrnq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vuzp_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vuzpq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 5, 7, 1, 3, 13, 15, 9, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 4, 6, 0, 2, 12, 14, 8, 10 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__))
+vzip_f16 (float16x4_t __a, float16x4_t __b)
+{
+  float16x4x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 });
+#endif
+  return __rv;
+}
+
+__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__))
+vzipq_f16 (float16x8_t __a, float16x8_t __b)
+{
+  float16x8x2_t __rv;
+#ifdef __ARM_BIG_ENDIAN
+  __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 10, 2, 11, 3, 8, 0, 9, 1 });
+  __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
+				   { 14, 6, 15, 7, 12, 4, 13, 5 });
+#else
+  __rv.val[0] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 });
+  __rv.val[1] = __builtin_shuffle (__a, __b,
+				   (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 });
+#endif
+  return __rv;
+}
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index d9fac78..a4ba516 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -166,8 +166,10 @@ VAR10 (SETLANE, vset_lane,
 VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di)
 VAR10 (UNOP, vdup_n,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (UNOP, vdup_n, v8hf, v4hf)
 VAR10 (GETLANE, vdup_lane,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (GETLANE, vdup_lane, v8hf, v4hf)
 VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
 VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
 VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
@@ -197,6 +199,7 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si)
 VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si)
 VAR10 (SETLANE, vext,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (SETLANE, vext, v8hf, v4hf)
 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf)
 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi)
 VAR2 (UNOP, vrev16, v8qi, v16qi)
@@ -208,6 +211,7 @@ VAR1 (UNOP, vcvtv4sf, v4hf)
 VAR1 (UNOP, vcvtv4hf, v4sf)
 VAR10 (TERNOP, vbsl,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
+VAR2 (TERNOP, vbsl, v8hf, v4hf)
 VAR2 (UNOP, copysignf, v2sf, v4sf)
 VAR2 (UNOP, vrintn, v2sf, v4sf)
 VAR2 (UNOP, vrinta, v2sf, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index aba1023..3f9d9e4 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -119,6 +119,10 @@
 ;; All supported vector modes (except those with 64-bit integer elements).
 (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
 
+;; All supported vector modes including 16-bit float modes.
+(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF
+			     V8HF V4HF])
+
 ;; Supported integer vector modes (not 64 bit elements).
 (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI])
 
@@ -174,6 +178,9 @@
 ;; Modes with 8-bit, 16-bit and 32-bit elements.
 (define_mode_iterator VU [V16QI V8HI V4SI])
 
+;; Vector modes for 16-bit floating-point support.
+(define_mode_iterator VH [V8HF V4HF])
+
 ;; Iterators used for fixed-point support.
 (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
 
@@ -475,9 +482,10 @@
 ;; Used for neon_vdup_lane, where the second operand is double-sized
 ;; even when the first one is quad.
 (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
-                                        (V4SI "V2SI") (V4SF "V2SF")
-                                        (V8QI "V8QI") (V4HI "V4HI")
-                                        (V2SI "V2SI") (V2SF "V2SF")])
+					(V4SI "V2SI") (V4SF "V2SF")
+					(V8QI "V8QI") (V4HI "V4HI")
+					(V2SI "V2SI") (V2SF "V2SF")
+					(V8HF "V4HF") (V4HF "V4HF")])
 
 ;; Mode of result of comparison operations (and bit-select operand 1).
 (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
@@ -582,17 +590,17 @@
                  (DI "false") (V2DI "false")])
 
 (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true")
-                   (V4HI "true") (V8HI "true")
-                   (V2SI "false") (V4SI "false")
-                   (V2SF "false") (V4SF "false")
-                   (DI "false") (V2DI "false")])
-
+				   (V4HI "true") (V8HI "true")
+				   (V2SI "false") (V4SI "false")
+				   (V2SF "false") (V4SF "false")
+				   (DI "false") (V2DI "false")])
 
 (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false")
                             (V4HI "true") (V8HI  "false")
                             (V2SI "true") (V4SI  "false")
                             (V2SF "true") (V4SF  "false")
-                            (DI   "true") (V2DI  "false")])
+                            (DI   "true") (V2DI  "false")
+			    (V4HF "true") (V8HF  "false")])
 
 (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
 				 (V4HF "4") (V8HF "8")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 6b4896d..5fcc991 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3045,6 +3045,28 @@ if (BYTES_BIG_ENDIAN)
   [(set_attr "type" "neon_dup<q>")]
 )
 
+(define_insn "neon_vdup_lane<mode>_internal"
+ [(set (match_operand:VH 0 "s_register_operand" "=w")
+   (vec_duplicate:VH
+    (vec_select:<V_elem>
+     (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+     (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      int elt = INTVAL (operands[2]);
+      elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+      operands[2] = GEN_INT (elt);
+    }
+  if (<Is_d_reg>)
+    return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+  else
+    return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
+}
+  [(set_attr "type" "neon_dup<q>")]
+)
+
 (define_expand "neon_vdup_lane<mode>"
   [(match_operand:VDQW 0 "s_register_operand" "=w")
    (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
@@ -3064,6 +3086,25 @@ if (BYTES_BIG_ENDIAN)
     DONE;
 })
 
+(define_expand "neon_vdup_lane<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:<V_double_vector_mode> 1 "s_register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_NEON && TARGET_FP16"
+{
+  if (BYTES_BIG_ENDIAN)
+    {
+      unsigned int elt = INTVAL (operands[2]);
+      unsigned int reg_nelts
+	= 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
+      elt ^= reg_nelts - 1;
+      operands[2] = GEN_INT (elt);
+    }
+  emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+						operands[2]));
+  DONE;
+})
+
 ; Scalar index is ignored, since only zero is valid here.
 (define_expand "neon_vdup_lanedi"
   [(match_operand:DI 0 "s_register_operand" "=w")
@@ -4281,25 +4322,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vtrn<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-			(match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			 (match_operand:VDQWH 2 "s_register_operand")]
 	   UNSPEC_VTRN1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-          (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand")
+	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vtrn<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VTRN1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-         (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VTRN2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VTRN1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VTRN2))]
   "TARGET_NEON"
   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_permute<q>")]
@@ -4307,25 +4348,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vzip<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-	  	        (match_operand:VDQW 2 "s_register_operand" "")]
-		       UNSPEC_VZIP1))
-    (set (match_operand:VDQW 3 "s_register_operand" "")
-	 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			 (match_operand:VDQWH 2 "s_register_operand")]
+	   UNSPEC_VZIP1))
+    (set (match_operand:VDQWH 3 "s_register_operand")
+	 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vzip<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VZIP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VZIP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VZIP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VZIP2))]
   "TARGET_NEON"
   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_zip<q>")]
@@ -4333,25 +4374,25 @@ if (BYTES_BIG_ENDIAN)
 
 (define_expand "neon_vuzp<mode>_internal"
   [(parallel
-    [(set (match_operand:VDQW 0 "s_register_operand" "")
-	  (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
-			(match_operand:VDQW 2 "s_register_operand" "")]
+    [(set (match_operand:VDQWH 0 "s_register_operand")
+	  (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
+			(match_operand:VDQWH 2 "s_register_operand")]
 	   UNSPEC_VUZP1))
-     (set (match_operand:VDQW 3 "s_register_operand" "")
-	  (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
+     (set (match_operand:VDQWH 3 "s_register_operand" "")
+	  (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
   "TARGET_NEON"
   ""
 )
 
 ;; Note: Different operand numbering to handle tied registers correctly.
 (define_insn "*neon_vuzp<mode>_insn"
-  [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
-                      (match_operand:VDQW 3 "s_register_operand" "2")]
-                     UNSPEC_VUZP1))
-   (set (match_operand:VDQW 2 "s_register_operand" "=&w")
-        (unspec:VDQW [(match_dup 1) (match_dup 3)]
-                     UNSPEC_VUZP2))]
+  [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
+		       (match_operand:VDQWH 3 "s_register_operand" "2")]
+	 UNSPEC_VUZP1))
+   (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
+	(unspec:VDQWH [(match_dup 1) (match_dup 3)]
+	 UNSPEC_VUZP2))]
   "TARGET_NEON"
   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
   [(set_attr "type" "neon_zip<q>")]
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index ce98f71..645b01e 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -124,6 +124,20 @@
     FAIL;
 })
 
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:VH 0 "s_register_operand")
+   (match_operand:VH 1 "s_register_operand")
+   (match_operand:VH 2 "s_register_operand")
+   (match_operand:<V_cmp_result> 3)]
+  "TARGET_NEON"
+{
+  if (arm_expand_vec_perm_const (operands[0], operands[1],
+				 operands[2], operands[3]))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "vec_perm<mode>"
   [(match_operand:VE 0 "s_register_operand" "")
    (match_operand:VE 1 "s_register_operand" "")
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 49fbd84..001e320 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -16,6 +16,15 @@ extern void *memset(void *, int, size_t);
 extern void *memcpy(void *, const void *, size_t);
 extern size_t strlen(const char *);
 
+/* Helper macro to select FP16 tests.  */
+#if (!defined (__aarch64__)						\
+     && (defined (__ARM_FP16_FORMAT_IEEE)				\
+	 || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
+#define FP16_SUPPORTED (1)
+#else
+#undef FP16_SUPPORTED
+#endif
+
 /* Various string construction helpers.  */
 
 /*
@@ -500,7 +509,9 @@ static void clean_results (void)
 /* Helpers to initialize vectors.  */
 #define VDUP(VAR, Q, T1, T2, W, N, V)			\
   VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
-#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
+#if (defined (__aarch64__)						\
+     && (defined (__ARM_FP16_FORMAT_IEEE)				\
+	 || defined (__ARM_FP16_FORMAT_ALTERNATIVE)))
 /* Work around that there is no vdup_n_f16 intrinsic.  */
 #define vdup_n_f16(VAL)		\
   __extension__			\
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
index c4fdbb4..e9b3dfd 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c
@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
 					0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89,
+					       0xcb09, 0xca89 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					0xf6, 0xf6, 0xf6, 0xf6,
@@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3,
 					 0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2,
 					 0xfff4, 0xfff4, 0xfff6, 0xfff6 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89,
+					       0xcb09, 0xca89,
+					       0xca09, 0xc989,
+					       0xc909, 0xc889 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001,
 					   0xc1600001, 0xc1500001 };
 
@@ -66,6 +76,10 @@ void exec_vbsl (void)
   clean_results ();
 
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+  VLOAD(vector, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
   VLOAD(vector, buffer, q, float, f, 32, 4);
 
@@ -80,6 +94,9 @@ void exec_vbsl (void)
   VDUP(vector2, , uint, u, 16, 4, 0xFFF2);
   VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0);
   VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, , float, f, 16, 4, -2.4f);   /* -2.4f is 0xC0CD.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, -30.3f);
   VDUP(vector2, , poly, p, 8, 8, 0xF3);
   VDUP(vector2, , poly, p, 16, 4, 0xFFF2);
@@ -94,6 +111,9 @@ void exec_vbsl (void)
   VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3);
   VDUP(vector2, q, poly, p, 8, 16, 0xF3);
   VDUP(vector2, q, poly, p, 16, 8, 0xFFF2);
+#if defined (FP16_SUPPORTED)
+  VDUP(vector2, q, float, f, 16, 8, -2.4f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, -30.4f);
 
   VDUP(vector_first, , uint, u, 8, 8, 0xF4);
@@ -111,10 +131,18 @@ void exec_vbsl (void)
   TEST_VBSL(uint, , poly, p, 16, 4);
   TEST_VBSL(uint, q, poly, p, 8, 16);
   TEST_VBSL(uint, q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+  TEST_VBSL(uint, , float, f, 16, 4);
+  TEST_VBSL(uint, q, float, f, 16, 8);
+#endif
   TEST_VBSL(uint, , float, f, 32, 2);
   TEST_VBSL(uint, q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
index 22d45d5..aef4173 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
+						0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0,
@@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					  0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
 					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00,
+						0xcc00, 0xcc00,
+						0xcc00, 0xcc00,
+						0xcc00, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
 					    0xc1800000, 0xc1800000 };
 
@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
+						0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1,
@@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					  0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
 					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80,
+						0xcb80, 0xcb80,
+						0xcb80, 0xcb80,
+						0xcb80, 0xcb80 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
 					    0xc1700000, 0xc1700000 };
 
@@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
+						0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2,
@@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					  0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
 					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
+						0xcb00, 0xcb00,
+						0xcb00, 0xcb00,
+						0xcb00, 0xcb00 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
 					    0xc1600000, 0xc1600000 };
 
@@ -171,6 +201,9 @@ void exec_vdup_vmov (void)
     TEST_VDUP(, uint, u, 64, 1);
     TEST_VDUP(, poly, p, 8, 8);
     TEST_VDUP(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(, float, f, 16, 4);
+#endif
     TEST_VDUP(, float, f, 32, 2);
 
     TEST_VDUP(q, int, s, 8, 16);
@@ -183,8 +216,26 @@ void exec_vdup_vmov (void)
     TEST_VDUP(q, uint, u, 64, 2);
     TEST_VDUP(q, poly, p, 8, 16);
     TEST_VDUP(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VDUP(q, float, f, 16, 8);
+#endif
     TEST_VDUP(q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
     switch (i) {
     case 0:
       CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@@ -198,6 +249,7 @@ void exec_vdup_vmov (void)
     default:
       abort();
     }
+#endif
   }
 
   /* Do the same tests with vmov. Use the same expected results.  */
@@ -216,6 +268,9 @@ void exec_vdup_vmov (void)
     TEST_VMOV(, uint, u, 64, 1);
     TEST_VMOV(, poly, p, 8, 8);
     TEST_VMOV(, poly, p, 16, 4);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(, float, f, 16, 4);
+#endif
     TEST_VMOV(, float, f, 32, 2);
 
     TEST_VMOV(q, int, s, 8, 16);
@@ -228,8 +283,26 @@ void exec_vdup_vmov (void)
     TEST_VMOV(q, uint, u, 64, 2);
     TEST_VMOV(q, poly, p, 8, 16);
     TEST_VMOV(q, poly, p, 16, 8);
+#if defined (FP16_SUPPORTED)
+    TEST_VMOV(q, float, f, 16, 8);
+#endif
     TEST_VMOV(q, float, f, 32, 4);
 
+#if defined (FP16_SUPPORTED)
+    switch (i) {
+    case 0:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      break;
+    case 1:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      break;
+    case 2:
+      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      break;
+    default:
+      abort();
+    }
+#else
     switch (i) {
     case 0:
       CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
@@ -243,6 +316,8 @@ void exec_vdup_vmov (void)
     default:
       abort();
     }
+#endif
+
   }
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
index ef708dc..c4b8f14 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
 					0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80,
+					       0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					0xf2, 0xf2, 0xf2, 0xf2,
 					0xf2, 0xf2, 0xf2, 0xf2,
@@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
 					 0xf5, 0xf5, 0xf5, 0xf5 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
 					 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
+					       0xca80, 0xca80,
+					       0xca80, 0xca80,
+					       0xca80, 0xca80 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
 					   0xc1700000, 0xc1700000 };
 
@@ -63,6 +73,9 @@ void exec_vdup_lane (void)
   clean_results ();
 
   TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD(vector, buffer, , float, f, 16, 4);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
 
   /* Choose lane arbitrarily.  */
@@ -76,6 +89,9 @@ void exec_vdup_lane (void)
   TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0);
   TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7);
   TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
+#endif
   TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
 
   TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -88,9 +104,16 @@ void exec_vdup_lane (void)
   TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0);
   TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5);
   TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1);
+#if defined (FP16_SUPPORTED)
+  TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
+#endif
   TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
index 98f88a6..908294a 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c
@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55,
 					0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+					       0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11,
 					0x11, 0x11, 0x11, 0x11,
@@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff,
 					 0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66,
 					 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d,
+					       0x4b4d, 0x4b4d,
+					       0x4b4d, 0x4b4d,
+					       0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd,
 					   0x4204cccd, 0x4204cccd };
 
@@ -60,6 +70,10 @@ void exec_vext (void)
   clean_results ();
 
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#ifdef FP16_SUPPORTED
+  VLOAD(vector1, buffer, , float, f, 16, 4);
+  VLOAD(vector1, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector1, buffer, , float, f, 32, 2);
   VLOAD(vector1, buffer, q, float, f, 32, 4);
 
@@ -74,6 +88,9 @@ void exec_vext (void)
   VDUP(vector2, , uint, u, 64, 1, 0x88);
   VDUP(vector2, , poly, p, 8, 8, 0x55);
   VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, 33.6f);
 
   VDUP(vector2, q, int, s, 8, 16, 0x11);
@@ -86,6 +103,9 @@ void exec_vext (void)
   VDUP(vector2, q, uint, u, 64, 2, 0x88);
   VDUP(vector2, q, poly, p, 8, 16, 0x55);
   VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, 33.2f);
 
   /* Choose arbitrary extract offsets.  */
@@ -99,6 +119,9 @@ void exec_vext (void)
   TEST_VEXT(, uint, u, 64, 1, 0);
   TEST_VEXT(, poly, p, 8, 8, 6);
   TEST_VEXT(, poly, p, 16, 4, 2);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(, float, f, 16, 4, 2);
+#endif
   TEST_VEXT(, float, f, 32, 2, 1);
 
   TEST_VEXT(q, int, s, 8, 16, 14);
@@ -111,9 +134,16 @@ void exec_vext (void)
   TEST_VEXT(q, uint, u, 64, 2, 1);
   TEST_VEXT(q, poly, p, 8, 16, 12);
   TEST_VEXT(q, poly, p, 16, 8, 6);
+#if defined (FP16_SUPPORTED)
+  TEST_VEXT(q, float, f, 16, 8, 7);
+#endif
   TEST_VEXT(q, float, f, 32, 4, 3);
 
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS (TEST_MSG, "");
+#else
   CHECK_RESULTS_NO_FP16 (TEST_MSG, "");
+#endif
 }
 
 int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
index 3b574da..0c01318 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c
@@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 };
 VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 					       0xf3, 0xf2, 0xf1, 0xf0 };
 VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00,
+						      0xcb80, 0xcc00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 };
 VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 					       0xf3, 0xf2, 0xf1, 0xf0,
@@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4,
 						0xfb, 0xfa, 0xf9, 0xf8 };
 VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0,
 						0xfff7, 0xfff6, 0xfff5, 0xfff4 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00,
+						      0xcb80, 0xcc00,
+						      0xc880, 0xc900,
+						      0xc980, 0xca00 };
+#endif
 VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000,
 						  0xc1500000, 0xc1600000 };
 
@@ -104,6 +114,10 @@ void exec_vrev (void)
 
   /* Initialize input "vector" from "buffer".  */
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector, buffer, , float, f, 16, 4);
+  VLOAD (vector, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector, buffer, , float, f, 32, 2);
   VLOAD(vector, buffer, q, float, f, 32, 4);
 
@@ -187,6 +201,12 @@ void exec_vrev (void)
   CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, "");
   CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, "");
 
+#if defined (FP16_SUPPORTED)
+  TEST_VREV (, float, f, 16, 4, 64);
+  TEST_VREV (q, float, f, 16, 8, 64);
+  CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, "");
+  CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, "");
+#endif
   TEST_VREV(, float, f, 32, 2, 64);
   TEST_VREV(q, float, f, 32, 4, 64);
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, "");
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
index b55a205..ad5bf31 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc
@@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void)
   DECL_VSHUFFLE(float, 32, 4)
 
   DECL_ALL_VSHUFFLE();
+#if defined (FP16_SUPPORTED)
+  DECL_VSHUFFLE (float, 16, 4);
+  DECL_VSHUFFLE (float, 16, 8);
+#endif
 
   /* Initialize input "vector" from "buffer".  */
   TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer);
+#if defined (FP16_SUPPORTED)
+  VLOAD (vector1, buffer, , float, f, 16, 4);
+  VLOAD (vector1, buffer, q, float, f, 16, 8);
+#endif
   VLOAD(vector1, buffer, , float, f, 32, 2);
   VLOAD(vector1, buffer, q, float, f, 32, 4);
 
@@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void)
   VDUP(vector2, , uint, u, 32, 2, 0x77);
   VDUP(vector2, , poly, p, 8, 8, 0x55);
   VDUP(vector2, , poly, p, 16, 4, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, , float, f, 16, 4, 14.6f);   /* 14.6f is 0x4b4d.  */
+#endif
   VDUP(vector2, , float, f, 32, 2, 33.6f);
 
   VDUP(vector2, q, int, s, 8, 16, 0x11);
@@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void)
   VDUP(vector2, q, uint, u, 32, 4, 0x77);
   VDUP(vector2, q, poly, p, 8, 16, 0x55);
   VDUP(vector2, q, poly, p, 16, 8, 0x66);
+#if defined (FP16_SUPPORTED)
+  VDUP (vector2, q, float, f, 16, 8, 14.6f);
+#endif
   VDUP(vector2, q, float, f, 32, 4, 33.8f);
-  
+
 #define TEST_ALL_VSHUFFLE(INSN)				\
   TEST_VSHUFFLE(INSN, , int, s, 8, 8);			\
   TEST_VSHUFFLE(INSN, , int, s, 16, 4);			\
@@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void)
   TEST_VSHUFFLE(INSN, q, poly, p, 16, 8);		\
   TEST_VSHUFFLE(INSN, q, float, f, 32, 4)
 
+#define TEST_VSHUFFLE_FP16(INSN)		\
+  TEST_VSHUFFLE(INSN, , float, f, 16, 4);	\
+  TEST_VSHUFFLE(INSN, q, float, f, 16, 8);
+
 #define TEST_ALL_EXTRA_CHUNKS()			\
   TEST_EXTRA_CHUNK(int, 8, 8, 1);		\
   TEST_EXTRA_CHUNK(int, 16, 4, 1);		\
@@ -143,17 +161,37 @@ void FNNAME (INSN_NAME) (void)
     CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
-  }									\
+  }
+
+#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment)		\
+  {									\
+    CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment);	\
+    CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment);	\
+  }
 
   clean_results ();
 
   /* Execute the tests.  */
   TEST_ALL_VSHUFFLE(INSN_NAME);
+#if defined (FP16_SUPPORTED)
+  TEST_VSHUFFLE_FP16 (INSN_NAME);
+#endif
 
   CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)");
+#endif
 
   TEST_ALL_EXTRA_CHUNKS();
+#if defined (FP16_SUPPORTED)
+  TEST_EXTRA_CHUNK (float, 16, 4, 1);
+  TEST_EXTRA_CHUNK (float, 16, 8, 1);
+#endif
+
   CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)");
+#if defined (FP16_SUPPORTED)
+  CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)");
+#endif
 }
 
 int main (void)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
index 2c4a09c..ea2d8d8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c
@@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55,
 					 0xf2, 0xf3, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11,
 					 0xf2, 0xf3, 0x11, 0x11,
@@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55,
 					  0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66,
 					  0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+						0x4b4d, 0x4b4d,
+						0xcb00, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
 					    0x42073333, 0x42073333 };
 
@@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55,
 					 0xf6, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11,
 					 0xfa, 0xfb, 0x11, 0x11,
@@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55,
 					  0xfe, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66,
 					  0xfff6, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980,
+						0x4b4d, 0x4b4d,
+						0xc900, 0xc880,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000,
 					    0x42073333, 0x42073333 };
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
index ab6e576..43b49ca 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c
@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1,
 					  0xfff2, 0xfff3 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80,
+						0xcb00, 0xca80 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xf4, 0xf5, 0xf6, 0xf7,
@@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1,
 					  0xfff2, 0xfff3,
 					  0xfff4, 0xfff5,
 					  0xfff6, 0xfff7 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80,
+						0xcb00, 0xca80,
+						0xca00, 0xc980,
+						0xc900, 0xc880 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
 					    0xc1600000, 0xc1500000 };
 
@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55,
 					 0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11,
 					 0x11, 0x11, 0x11, 0x11,
@@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55,
 					  0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66,
 					  0x66, 0x66, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333,
 					    0x42073333, 0x42073333 };
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
index b5fe516..20f4f5d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c
@@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55,
 					 0xf1, 0xf5, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2,
 					  0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11,
 					 0xf1, 0xf9, 0x11, 0x11,
@@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55,
 					  0xf3, 0xfb, 0x55, 0x55 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66,
 					  0xfff1, 0xfff5, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00,
+						0x4b4d, 0x4b4d,
+						0xcb80, 0xc980,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000,
 					    0x42073333, 0x42073333 };
 
@@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55,
 					 0xf3, 0xf7, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3,
 					  0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11,
 					 0xf5, 0xfd, 0x11, 0x11,
@@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55,
 					  0xf7, 0xff, 0x55, 0x55 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66,
 					  0xfff3, 0xfff7, 0x66, 0x66 };
+#if defined (FP16_SUPPORTED)
+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900,
+						0x4b4d, 0x4b4d,
+						0xca80, 0xc880,
+						0x4b4d, 0x4b4d };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000,
 					    0x42073333, 0x42073333 };
 
-- 
2.1.4