public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape
@ 2023-05-05  8:39 Christophe Lyon
  2023-05-05  8:39 ` [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq Christophe Lyon
                   ` (22 more replies)
  0 siblings, 23 replies; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_round_lshift shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_round_lshift): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_round_lshift): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 61 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 62 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 5e6681c784a..28a2d66ddd1 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -458,6 +458,67 @@ struct binary_orrq_def : public overloaded_base<0>
 };
 SHAPE (binary_orrq)
 
+/* <T0>_t vfoo[t0](<T0>_t, <T0>_t)
+   <T0>_t vfoo[_n_t0](<T0>_t, int32_t)
+
+   Shape for rounding shift left operations.
+
+   Example: vrshlq.
+   int8x16_t [__arm_]vrshlq[_n_s8](int8x16_t a, int32_t b)
+   int8x16_t [__arm_]vrshlq_m_n[_s8](int8x16_t a, int32_t b, mve_pred16_t p)
+   int8x16_t [__arm_]vrshlq[_s8](int8x16_t a, int8x16_t b)
+   int8x16_t [__arm_]vrshlq_m[_s8](int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
+   int8x16_t [__arm_]vrshlq_x[_s8](int8x16_t a, int8x16_t b, mve_pred16_t p)  */
+struct binary_round_lshift_def : public overloaded_base<0>
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index pred, enum mode_suffix_index mode) const override
+  {
+    return ((mode == MODE_n)
+	    && (pred == PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index pred, enum mode_suffix_index mode) const override
+  {
+    switch (mode)
+      {
+      case MODE_none:
+	return false;
+
+	/* For MODE_n, share the overloaded instance with MODE_none, except for PRED_m.  */
+      case MODE_n:
+	return pred != PRED_m;
+
+      default:
+	gcc_unreachable ();
+      }
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,vs0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace, false, preds_m_or_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
+  }
+};
+SHAPE (binary_round_lshift)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 3305d12877a..cef081aa8ec 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -37,6 +37,7 @@ namespace arm_mve
     extern const function_shape *const binary;
     extern const function_shape *const binary_opt_n;
     extern const function_shape *const binary_orrq;
+    extern const function_shape *const binary_round_lshift;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05  9:58   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq Christophe Lyon
                   ` (21 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqrshlq, vrshlq so that they use the same pattern.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_RSHIFT_M_N, MVE_RSHIFT_N): New.
	(mve_insn): Add vqrshl, vrshl.
	* config/arm/mve.md (mve_vqrshlq_n_<supf><mode>)
	(mve_vrshlq_n_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vqrshlq_m_n_<supf><mode>, mve_vrshlq_m_n_<supf><mode>): Merge
	into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 14 +++++++++++
 gcc/config/arm/mve.md       | 49 ++++++++-----------------------------
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 593be83e0be..e7622fe752a 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -435,6 +435,16 @@ (define_int_iterator MVE_INT_N_BINARY_LOGIC   [
 		     VORRQ_N_S VORRQ_N_U
 		     ])
 
+(define_int_iterator MVE_RSHIFT_M_N   [
+		     VQRSHLQ_M_N_S VQRSHLQ_M_N_U
+		     VRSHLQ_M_N_S VRSHLQ_M_N_U
+		     ])
+
+(define_int_iterator MVE_RSHIFT_N   [
+		     VQRSHLQ_N_S VQRSHLQ_N_U
+		     VRSHLQ_N_S VRSHLQ_N_U
+		     ])
+
 (define_int_iterator MVE_FP_M_BINARY   [
 		     VADDQ_M_F
 		     VMULQ_M_F
@@ -526,7 +536,9 @@ (define_int_attr mve_insn [
 		 (VQRDMULHQ_M_S "vqrdmulh")
 		 (VQRDMULHQ_N_S "vqrdmulh")
 		 (VQRDMULHQ_S "vqrdmulh")
+		 (VQRSHLQ_M_N_S "vqrshl") (VQRSHLQ_M_N_U "vqrshl")
 		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
+		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
 		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
 		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
 		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
@@ -538,7 +550,9 @@ (define_int_attr mve_insn [
 		 (VRHADDQ_S "vrhadd") (VRHADDQ_U "vrhadd")
 		 (VRMULHQ_M_S "vrmulh") (VRMULHQ_M_U "vrmulh")
 		 (VRMULHQ_S "vrmulh") (VRMULHQ_U "vrmulh")
+		 (VRSHLQ_M_N_S "vrshl") (VRSHLQ_M_N_U "vrshl")
 		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
+		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
 		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
 		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 6b88fdb8a7a..0d3343b6e29 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1373,17 +1373,18 @@ (define_expand "mve_vorrq_u<mode>"
 )
 
 ;;
-;; [vqrshlq_n_s, vqrshlq_n_u])
+;; [vqrshlq_n_s, vqrshlq_n_u]
+;; [vrshlq_n_u, vrshlq_n_s]
 ;;
-(define_insn "mve_vqrshlq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:SI 2 "s_register_operand" "r")]
-	 VQRSHLQ_N))
+	 MVE_RSHIFT_N))
   ]
   "TARGET_HAVE_MVE"
-  "vqrshl.<supf>%#<V_sz_elem>\t%q0, %2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1432,21 +1433,6 @@ (define_insn "mve_vqshluq_n_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vrshlq_n_u, vrshlq_n_s])
-;;
-(define_insn "mve_vrshlq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:SI 2 "s_register_operand" "r")]
-	 VRSHLQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrshl.<supf>%#<V_sz_elem>\t%q0, %2"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vrshrq_n_s, vrshrq_n_u])
 ;;
@@ -3098,18 +3084,19 @@ (define_insn "mve_vqrdmlsdhxq_s<mode>"
 ])
 
 ;;
-;; [vqrshlq_m_n_s, vqrshlq_m_n_u])
+;; [vqrshlq_m_n_s, vqrshlq_m_n_u]
+;; [vrshlq_m_n_s, vrshlq_m_n_u]
 ;;
-(define_insn "mve_vqrshlq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:SI 2 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VQRSHLQ_M_N))
+	 MVE_RSHIFT_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqrshlt.<supf>%#<V_sz_elem>	%q0, %2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3145,22 +3132,6 @@ (define_insn "mve_vrev64q_m_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrshlq_m_n_s, vrshlq_m_n_u])
-;;
-(define_insn "mve_vrshlq_m_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:SI 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VRSHLQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrshlt.<supf>%#<V_sz_elem>\t%q0, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vshlq_m_r_u, vshlq_m_r_s])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
  2023-05-05  8:39 ` [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05  9:59   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq Christophe Lyon
                   ` (20 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vrshlq, vqrshlq using the new MVE builtins framework.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vqrshlq, vrshlq): New.
	* config/arm/arm-mve-builtins-base.def (vqrshlq, vrshlq): New.
	* config/arm/arm-mve-builtins-base.h (vqrshlq, vrshlq): New.
	* config/arm/arm-mve-builtins.cc (has_inactive_argument): Handle
	vqrshlq, vrshlq.
	* config/arm/arm_mve.h (vrshlq): Remove.
	(vrshlq_m_n): Remove.
	(vrshlq_m): Remove.
	(vrshlq_x): Remove.
	(vrshlq_u8): Remove.
	(vrshlq_n_u8): Remove.
	(vrshlq_s8): Remove.
	(vrshlq_n_s8): Remove.
	(vrshlq_u16): Remove.
	(vrshlq_n_u16): Remove.
	(vrshlq_s16): Remove.
	(vrshlq_n_s16): Remove.
	(vrshlq_u32): Remove.
	(vrshlq_n_u32): Remove.
	(vrshlq_s32): Remove.
	(vrshlq_n_s32): Remove.
	(vrshlq_m_n_u8): Remove.
	(vrshlq_m_n_s8): Remove.
	(vrshlq_m_n_u16): Remove.
	(vrshlq_m_n_s16): Remove.
	(vrshlq_m_n_u32): Remove.
	(vrshlq_m_n_s32): Remove.
	(vrshlq_m_s8): Remove.
	(vrshlq_m_s32): Remove.
	(vrshlq_m_s16): Remove.
	(vrshlq_m_u8): Remove.
	(vrshlq_m_u32): Remove.
	(vrshlq_m_u16): Remove.
	(vrshlq_x_s8): Remove.
	(vrshlq_x_s16): Remove.
	(vrshlq_x_s32): Remove.
	(vrshlq_x_u8): Remove.
	(vrshlq_x_u16): Remove.
	(vrshlq_x_u32): Remove.
	(__arm_vrshlq_u8): Remove.
	(__arm_vrshlq_n_u8): Remove.
	(__arm_vrshlq_s8): Remove.
	(__arm_vrshlq_n_s8): Remove.
	(__arm_vrshlq_u16): Remove.
	(__arm_vrshlq_n_u16): Remove.
	(__arm_vrshlq_s16): Remove.
	(__arm_vrshlq_n_s16): Remove.
	(__arm_vrshlq_u32): Remove.
	(__arm_vrshlq_n_u32): Remove.
	(__arm_vrshlq_s32): Remove.
	(__arm_vrshlq_n_s32): Remove.
	(__arm_vrshlq_m_n_u8): Remove.
	(__arm_vrshlq_m_n_s8): Remove.
	(__arm_vrshlq_m_n_u16): Remove.
	(__arm_vrshlq_m_n_s16): Remove.
	(__arm_vrshlq_m_n_u32): Remove.
	(__arm_vrshlq_m_n_s32): Remove.
	(__arm_vrshlq_m_s8): Remove.
	(__arm_vrshlq_m_s32): Remove.
	(__arm_vrshlq_m_s16): Remove.
	(__arm_vrshlq_m_u8): Remove.
	(__arm_vrshlq_m_u32): Remove.
	(__arm_vrshlq_m_u16): Remove.
	(__arm_vrshlq_x_s8): Remove.
	(__arm_vrshlq_x_s16): Remove.
	(__arm_vrshlq_x_s32): Remove.
	(__arm_vrshlq_x_u8): Remove.
	(__arm_vrshlq_x_u16): Remove.
	(__arm_vrshlq_x_u32): Remove.
	(__arm_vrshlq): Remove.
	(__arm_vrshlq_m_n): Remove.
	(__arm_vrshlq_m): Remove.
	(__arm_vrshlq_x): Remove.
	(vqrshlq): Remove.
	(vqrshlq_m_n): Remove.
	(vqrshlq_m): Remove.
	(vqrshlq_u8): Remove.
	(vqrshlq_n_u8): Remove.
	(vqrshlq_s8): Remove.
	(vqrshlq_n_s8): Remove.
	(vqrshlq_u16): Remove.
	(vqrshlq_n_u16): Remove.
	(vqrshlq_s16): Remove.
	(vqrshlq_n_s16): Remove.
	(vqrshlq_u32): Remove.
	(vqrshlq_n_u32): Remove.
	(vqrshlq_s32): Remove.
	(vqrshlq_n_s32): Remove.
	(vqrshlq_m_n_u8): Remove.
	(vqrshlq_m_n_s8): Remove.
	(vqrshlq_m_n_u16): Remove.
	(vqrshlq_m_n_s16): Remove.
	(vqrshlq_m_n_u32): Remove.
	(vqrshlq_m_n_s32): Remove.
	(vqrshlq_m_s8): Remove.
	(vqrshlq_m_s32): Remove.
	(vqrshlq_m_s16): Remove.
	(vqrshlq_m_u8): Remove.
	(vqrshlq_m_u32): Remove.
	(vqrshlq_m_u16): Remove.
	(__arm_vqrshlq_u8): Remove.
	(__arm_vqrshlq_n_u8): Remove.
	(__arm_vqrshlq_s8): Remove.
	(__arm_vqrshlq_n_s8): Remove.
	(__arm_vqrshlq_u16): Remove.
	(__arm_vqrshlq_n_u16): Remove.
	(__arm_vqrshlq_s16): Remove.
	(__arm_vqrshlq_n_s16): Remove.
	(__arm_vqrshlq_u32): Remove.
	(__arm_vqrshlq_n_u32): Remove.
	(__arm_vqrshlq_s32): Remove.
	(__arm_vqrshlq_n_s32): Remove.
	(__arm_vqrshlq_m_n_u8): Remove.
	(__arm_vqrshlq_m_n_s8): Remove.
	(__arm_vqrshlq_m_n_u16): Remove.
	(__arm_vqrshlq_m_n_s16): Remove.
	(__arm_vqrshlq_m_n_u32): Remove.
	(__arm_vqrshlq_m_n_s32): Remove.
	(__arm_vqrshlq_m_s8): Remove.
	(__arm_vqrshlq_m_s32): Remove.
	(__arm_vqrshlq_m_s16): Remove.
	(__arm_vqrshlq_m_u8): Remove.
	(__arm_vqrshlq_m_u32): Remove.
	(__arm_vqrshlq_m_u16): Remove.
	(__arm_vqrshlq): Remove.
	(__arm_vqrshlq_m_n): Remove.
	(__arm_vqrshlq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm-mve-builtins.cc       |   4 +-
 gcc/config/arm/arm_mve.h                 | 969 +----------------------
 5 files changed, 18 insertions(+), 961 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index de0cdb4229b..f5e48519b19 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -157,10 +157,12 @@ FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
+FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
+FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index d256f3ebb2d..e6dc2b00aaa 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -29,10 +29,12 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index d64cb5e1dec..31ba3fece82 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -34,10 +34,12 @@ extern const function_base *const vmulq;
 extern const function_base *const vorrq;
 extern const function_base *const vqaddq;
 extern const function_base *const vqdmulhq;
+extern const function_base *const vqrshlq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
 extern const function_base *const vrhaddq;
 extern const function_base *const vrmulhq;
+extern const function_base *const vrshlq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 0708d4fa94a..91b3ae71f94 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -669,7 +669,9 @@ function_instance::has_inactive_argument () const
   if (pred != PRED_m)
     return false;
 
-  if (base == functions::vorrq && mode_suffix_id == MODE_n)
+  if ((base == functions::vorrq && mode_suffix_id == MODE_n)
+      || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
+      || (base == functions::vrshlq && mode_suffix_id == MODE_n))
     return false;
 
   return true;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 9c5d14794a1..636945d6ef0 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -79,10 +79,8 @@
 #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
 #define vabdq(__a, __b) __arm_vabdq(__a, __b)
 #define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
-#define vrshlq(__a, __b) __arm_vrshlq(__a, __b)
 #define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
 #define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
-#define vqrshlq(__a, __b) __arm_vqrshlq(__a, __b)
 #define vminavq(__a, __b) __arm_vminavq(__a, __b)
 #define vminaq(__a, __b) __arm_vminaq(__a, __b)
 #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
@@ -153,9 +151,7 @@
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vshlq_m_r(__a, __b, __p) __arm_vshlq_m_r(__a, __b, __p)
-#define vrshlq_m_n(__a, __b, __p) __arm_vrshlq_m_n(__a, __b, __p)
 #define vqshlq_m_r(__a, __b, __p) __arm_vqshlq_m_r(__a, __b, __p)
-#define vqrshlq_m_n(__a, __b, __p) __arm_vqrshlq_m_n(__a, __b, __p)
 #define vminavq_p(__a, __b, __p) __arm_vminavq_p(__a, __b, __p)
 #define vminaq_m(__a, __b, __p) __arm_vminaq_m(__a, __b, __p)
 #define vmaxavq_p(__a, __b, __p) __arm_vmaxavq_p(__a, __b, __p)
@@ -254,10 +250,8 @@
 #define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
 #define vqrdmulhq_m(__inactive, __a, __b, __p) __arm_vqrdmulhq_m(__inactive, __a, __b, __p)
-#define vqrshlq_m(__inactive, __a, __b, __p) __arm_vqrshlq_m(__inactive, __a, __b, __p)
 #define vqshlq_m_n(__inactive, __a, __imm, __p) __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
 #define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive, __a, __b, __p)
-#define vrshlq_m(__inactive, __a, __b, __p) __arm_vrshlq_m(__inactive, __a, __b, __p)
 #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive, __a, __imm, __p)
 #define vshlq_m_n(__inactive, __a, __imm, __p) __arm_vshlq_m_n(__inactive, __a, __imm, __p)
 #define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive, __a, __imm, __p)
@@ -385,7 +379,6 @@
 #define vrev16q_x(__a, __p) __arm_vrev16q_x(__a, __p)
 #define vrev32q_x(__a, __p) __arm_vrev32q_x(__a, __p)
 #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
-#define vrshlq_x(__a, __b, __p) __arm_vrshlq_x(__a, __b, __p)
 #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
 #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
 #define vshlq_x(__a, __b, __p) __arm_vshlq_x(__a, __b, __p)
@@ -663,12 +656,8 @@
 #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
 #define vabdq_u8(__a, __b) __arm_vabdq_u8(__a, __b)
 #define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
-#define vrshlq_u8(__a, __b) __arm_vrshlq_u8(__a, __b)
-#define vrshlq_n_u8(__a, __b) __arm_vrshlq_n_u8(__a, __b)
 #define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
 #define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
-#define vqrshlq_u8(__a, __b) __arm_vqrshlq_u8(__a, __b)
-#define vqrshlq_n_u8(__a, __b) __arm_vqrshlq_n_u8(__a, __b)
 #define vminavq_s8(__a, __b) __arm_vminavq_s8(__a, __b)
 #define vminaq_s8(__a, __b) __arm_vminaq_s8(__a, __b)
 #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
@@ -691,12 +680,8 @@
 #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
 #define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
 #define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
-#define vrshlq_s8(__a, __b) __arm_vrshlq_s8(__a, __b)
-#define vrshlq_n_s8(__a, __b) __arm_vrshlq_n_s8(__a, __b)
 #define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
 #define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
-#define vqrshlq_s8(__a, __b) __arm_vqrshlq_s8(__a, __b)
-#define vqrshlq_n_s8(__a, __b) __arm_vqrshlq_n_s8(__a, __b)
 #define vqrdmulhq_s8(__a, __b) __arm_vqrdmulhq_s8(__a, __b)
 #define vqrdmulhq_n_s8(__a, __b) __arm_vqrdmulhq_n_s8(__a, __b)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
@@ -743,12 +728,8 @@
 #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
 #define vabdq_u16(__a, __b) __arm_vabdq_u16(__a, __b)
 #define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
-#define vrshlq_u16(__a, __b) __arm_vrshlq_u16(__a, __b)
-#define vrshlq_n_u16(__a, __b) __arm_vrshlq_n_u16(__a, __b)
 #define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
 #define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
-#define vqrshlq_u16(__a, __b) __arm_vqrshlq_u16(__a, __b)
-#define vqrshlq_n_u16(__a, __b) __arm_vqrshlq_n_u16(__a, __b)
 #define vminavq_s16(__a, __b) __arm_vminavq_s16(__a, __b)
 #define vminaq_s16(__a, __b) __arm_vminaq_s16(__a, __b)
 #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
@@ -771,12 +752,8 @@
 #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
 #define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
 #define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
-#define vrshlq_s16(__a, __b) __arm_vrshlq_s16(__a, __b)
-#define vrshlq_n_s16(__a, __b) __arm_vrshlq_n_s16(__a, __b)
 #define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
 #define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
-#define vqrshlq_s16(__a, __b) __arm_vqrshlq_s16(__a, __b)
-#define vqrshlq_n_s16(__a, __b) __arm_vqrshlq_n_s16(__a, __b)
 #define vqrdmulhq_s16(__a, __b) __arm_vqrdmulhq_s16(__a, __b)
 #define vqrdmulhq_n_s16(__a, __b) __arm_vqrdmulhq_n_s16(__a, __b)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
@@ -823,12 +800,8 @@
 #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
 #define vabdq_u32(__a, __b) __arm_vabdq_u32(__a, __b)
 #define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
-#define vrshlq_u32(__a, __b) __arm_vrshlq_u32(__a, __b)
-#define vrshlq_n_u32(__a, __b) __arm_vrshlq_n_u32(__a, __b)
 #define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
 #define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
-#define vqrshlq_u32(__a, __b) __arm_vqrshlq_u32(__a, __b)
-#define vqrshlq_n_u32(__a, __b) __arm_vqrshlq_n_u32(__a, __b)
 #define vminavq_s32(__a, __b) __arm_vminavq_s32(__a, __b)
 #define vminaq_s32(__a, __b) __arm_vminaq_s32(__a, __b)
 #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
@@ -851,12 +824,8 @@
 #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
 #define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
 #define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
-#define vrshlq_s32(__a, __b) __arm_vrshlq_s32(__a, __b)
-#define vrshlq_n_s32(__a, __b) __arm_vrshlq_n_s32(__a, __b)
 #define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
 #define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
-#define vqrshlq_s32(__a, __b) __arm_vqrshlq_s32(__a, __b)
-#define vqrshlq_n_s32(__a, __b) __arm_vqrshlq_n_s32(__a, __b)
 #define vqrdmulhq_s32(__a, __b) __arm_vqrdmulhq_s32(__a, __b)
 #define vqrdmulhq_n_s32(__a, __b) __arm_vqrdmulhq_n_s32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
@@ -1064,9 +1033,7 @@
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vshlq_m_r_u8(__a, __b, __p) __arm_vshlq_m_r_u8(__a, __b, __p)
-#define vrshlq_m_n_u8(__a, __b, __p) __arm_vrshlq_m_n_u8(__a, __b, __p)
 #define vqshlq_m_r_u8(__a, __b, __p) __arm_vqshlq_m_r_u8(__a, __b, __p)
-#define vqrshlq_m_n_u8(__a, __b, __p) __arm_vqrshlq_m_n_u8(__a, __b, __p)
 #define vminavq_p_s8(__a, __b, __p) __arm_vminavq_p_s8(__a, __b, __p)
 #define vminaq_m_s8(__a, __b, __p) __arm_vminaq_m_s8(__a, __b, __p)
 #define vmaxavq_p_s8(__a, __b, __p) __arm_vmaxavq_p_s8(__a, __b, __p)
@@ -1084,10 +1051,8 @@
 #define vcmpeqq_m_s8(__a, __b, __p) __arm_vcmpeqq_m_s8(__a, __b, __p)
 #define vcmpeqq_m_n_s8(__a, __b, __p) __arm_vcmpeqq_m_n_s8(__a, __b, __p)
 #define vshlq_m_r_s8(__a, __b, __p) __arm_vshlq_m_r_s8(__a, __b, __p)
-#define vrshlq_m_n_s8(__a, __b, __p) __arm_vrshlq_m_n_s8(__a, __b, __p)
 #define vrev64q_m_s8(__inactive, __a, __p) __arm_vrev64q_m_s8(__inactive, __a, __p)
 #define vqshlq_m_r_s8(__a, __b, __p) __arm_vqshlq_m_r_s8(__a, __b, __p)
-#define vqrshlq_m_n_s8(__a, __b, __p) __arm_vqrshlq_m_n_s8(__a, __b, __p)
 #define vqnegq_m_s8(__inactive, __a, __p) __arm_vqnegq_m_s8(__inactive, __a, __p)
 #define vqabsq_m_s8(__inactive, __a, __p) __arm_vqabsq_m_s8(__inactive, __a, __p)
 #define vnegq_m_s8(__inactive, __a, __p) __arm_vnegq_m_s8(__inactive, __a, __p)
@@ -1147,9 +1112,7 @@
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vshlq_m_r_u16(__a, __b, __p) __arm_vshlq_m_r_u16(__a, __b, __p)
-#define vrshlq_m_n_u16(__a, __b, __p) __arm_vrshlq_m_n_u16(__a, __b, __p)
 #define vqshlq_m_r_u16(__a, __b, __p) __arm_vqshlq_m_r_u16(__a, __b, __p)
-#define vqrshlq_m_n_u16(__a, __b, __p) __arm_vqrshlq_m_n_u16(__a, __b, __p)
 #define vminavq_p_s16(__a, __b, __p) __arm_vminavq_p_s16(__a, __b, __p)
 #define vminaq_m_s16(__a, __b, __p) __arm_vminaq_m_s16(__a, __b, __p)
 #define vmaxavq_p_s16(__a, __b, __p) __arm_vmaxavq_p_s16(__a, __b, __p)
@@ -1167,10 +1130,8 @@
 #define vcmpeqq_m_s16(__a, __b, __p) __arm_vcmpeqq_m_s16(__a, __b, __p)
 #define vcmpeqq_m_n_s16(__a, __b, __p) __arm_vcmpeqq_m_n_s16(__a, __b, __p)
 #define vshlq_m_r_s16(__a, __b, __p) __arm_vshlq_m_r_s16(__a, __b, __p)
-#define vrshlq_m_n_s16(__a, __b, __p) __arm_vrshlq_m_n_s16(__a, __b, __p)
 #define vrev64q_m_s16(__inactive, __a, __p) __arm_vrev64q_m_s16(__inactive, __a, __p)
 #define vqshlq_m_r_s16(__a, __b, __p) __arm_vqshlq_m_r_s16(__a, __b, __p)
-#define vqrshlq_m_n_s16(__a, __b, __p) __arm_vqrshlq_m_n_s16(__a, __b, __p)
 #define vqnegq_m_s16(__inactive, __a, __p) __arm_vqnegq_m_s16(__inactive, __a, __p)
 #define vqabsq_m_s16(__inactive, __a, __p) __arm_vqabsq_m_s16(__inactive, __a, __p)
 #define vnegq_m_s16(__inactive, __a, __p) __arm_vnegq_m_s16(__inactive, __a, __p)
@@ -1230,9 +1191,7 @@
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vshlq_m_r_u32(__a, __b, __p) __arm_vshlq_m_r_u32(__a, __b, __p)
-#define vrshlq_m_n_u32(__a, __b, __p) __arm_vrshlq_m_n_u32(__a, __b, __p)
 #define vqshlq_m_r_u32(__a, __b, __p) __arm_vqshlq_m_r_u32(__a, __b, __p)
-#define vqrshlq_m_n_u32(__a, __b, __p) __arm_vqrshlq_m_n_u32(__a, __b, __p)
 #define vminavq_p_s32(__a, __b, __p) __arm_vminavq_p_s32(__a, __b, __p)
 #define vminaq_m_s32(__a, __b, __p) __arm_vminaq_m_s32(__a, __b, __p)
 #define vmaxavq_p_s32(__a, __b, __p) __arm_vmaxavq_p_s32(__a, __b, __p)
@@ -1250,10 +1209,8 @@
 #define vcmpeqq_m_s32(__a, __b, __p) __arm_vcmpeqq_m_s32(__a, __b, __p)
 #define vcmpeqq_m_n_s32(__a, __b, __p) __arm_vcmpeqq_m_n_s32(__a, __b, __p)
 #define vshlq_m_r_s32(__a, __b, __p) __arm_vshlq_m_r_s32(__a, __b, __p)
-#define vrshlq_m_n_s32(__a, __b, __p) __arm_vrshlq_m_n_s32(__a, __b, __p)
 #define vrev64q_m_s32(__inactive, __a, __p) __arm_vrev64q_m_s32(__inactive, __a, __p)
 #define vqshlq_m_r_s32(__a, __b, __p) __arm_vqshlq_m_r_s32(__a, __b, __p)
-#define vqrshlq_m_n_s32(__a, __b, __p) __arm_vqrshlq_m_n_s32(__a, __b, __p)
 #define vqnegq_m_s32(__inactive, __a, __p) __arm_vqnegq_m_s32(__inactive, __a, __p)
 #define vqabsq_m_s32(__inactive, __a, __p) __arm_vqabsq_m_s32(__inactive, __a, __p)
 #define vnegq_m_s32(__inactive, __a, __p) __arm_vnegq_m_s32(__inactive, __a, __p)
@@ -1646,12 +1603,6 @@
 #define vqrdmulhq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s8(__inactive, __a, __b, __p)
 #define vqrdmulhq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s32(__inactive, __a, __b, __p)
 #define vqrdmulhq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s16(__inactive, __a, __b, __p)
-#define vqrshlq_m_s8(__inactive, __a, __b, __p) __arm_vqrshlq_m_s8(__inactive, __a, __b, __p)
-#define vqrshlq_m_s32(__inactive, __a, __b, __p) __arm_vqrshlq_m_s32(__inactive, __a, __b, __p)
-#define vqrshlq_m_s16(__inactive, __a, __b, __p) __arm_vqrshlq_m_s16(__inactive, __a, __b, __p)
-#define vqrshlq_m_u8(__inactive, __a, __b, __p) __arm_vqrshlq_m_u8(__inactive, __a, __b, __p)
-#define vqrshlq_m_u32(__inactive, __a, __b, __p) __arm_vqrshlq_m_u32(__inactive, __a, __b, __p)
-#define vqrshlq_m_u16(__inactive, __a, __b, __p) __arm_vqrshlq_m_u16(__inactive, __a, __b, __p)
 #define vqshlq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vqshlq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vqshlq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
@@ -1664,12 +1615,6 @@
 #define vqshlq_m_u8(__inactive, __a, __b, __p) __arm_vqshlq_m_u8(__inactive, __a, __b, __p)
 #define vqshlq_m_u32(__inactive, __a, __b, __p) __arm_vqshlq_m_u32(__inactive, __a, __b, __p)
 #define vqshlq_m_u16(__inactive, __a, __b, __p) __arm_vqshlq_m_u16(__inactive, __a, __b, __p)
-#define vrshlq_m_s8(__inactive, __a, __b, __p) __arm_vrshlq_m_s8(__inactive, __a, __b, __p)
-#define vrshlq_m_s32(__inactive, __a, __b, __p) __arm_vrshlq_m_s32(__inactive, __a, __b, __p)
-#define vrshlq_m_s16(__inactive, __a, __b, __p) __arm_vrshlq_m_s16(__inactive, __a, __b, __p)
-#define vrshlq_m_u8(__inactive, __a, __b, __p) __arm_vrshlq_m_u8(__inactive, __a, __b, __p)
-#define vrshlq_m_u32(__inactive, __a, __b, __p) __arm_vrshlq_m_u32(__inactive, __a, __b, __p)
-#define vrshlq_m_u16(__inactive, __a, __b, __p) __arm_vrshlq_m_u16(__inactive, __a, __b, __p)
 #define vrshrq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
@@ -2232,12 +2177,6 @@
 #define vrev64q_x_u8(__a, __p) __arm_vrev64q_x_u8(__a, __p)
 #define vrev64q_x_u16(__a, __p) __arm_vrev64q_x_u16(__a, __p)
 #define vrev64q_x_u32(__a, __p) __arm_vrev64q_x_u32(__a, __p)
-#define vrshlq_x_s8(__a, __b, __p) __arm_vrshlq_x_s8(__a, __b, __p)
-#define vrshlq_x_s16(__a, __b, __p) __arm_vrshlq_x_s16(__a, __b, __p)
-#define vrshlq_x_s32(__a, __b, __p) __arm_vrshlq_x_s32(__a, __b, __p)
-#define vrshlq_x_u8(__a, __b, __p) __arm_vrshlq_x_u8(__a, __b, __p)
-#define vrshlq_x_u16(__a, __b, __p) __arm_vrshlq_x_u16(__a, __b, __p)
-#define vrshlq_x_u32(__a, __b, __p) __arm_vrshlq_x_u32(__a, __b, __p)
 #define vshllbq_x_n_s8(__a,  __imm, __p) __arm_vshllbq_x_n_s8(__a,  __imm, __p)
 #define vshllbq_x_n_s16(__a,  __imm, __p) __arm_vshllbq_x_n_s16(__a,  __imm, __p)
 #define vshllbq_x_n_u8(__a,  __imm, __p) __arm_vshllbq_x_n_u8(__a,  __imm, __p)
@@ -3300,20 +3239,6 @@ __arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vrshlq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_u8 (uint8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
@@ -3328,20 +3253,6 @@ __arm_vqshlq_r_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrshlq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_u8 (uint8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s8 (uint8_t __a, int8x16_t __b)
@@ -3496,20 +3407,6 @@ __arm_vshlq_r_s8 (int8x16_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vrshlq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_s8 (int8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_s8 (int8x16_t __a, int8x16_t __b)
@@ -3524,20 +3421,6 @@ __arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrshlq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_s8 (int8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq_s8 (int8x16_t __a, int8x16_t __b)
@@ -3862,20 +3745,6 @@ __arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vrshlq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_u16 (uint16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
@@ -3890,20 +3759,6 @@ __arm_vqshlq_r_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrshlq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_u16 (uint16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s16 (uint16_t __a, int16x8_t __b)
@@ -4058,20 +3913,6 @@ __arm_vshlq_r_s16 (int16x8_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vrshlq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_s16 (int16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_s16 (int16x8_t __a, int16x8_t __b)
@@ -4086,20 +3927,6 @@ __arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrshlq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_s16 (int16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
@@ -4424,20 +4251,6 @@ __arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrshlq_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_u32 (uint32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
@@ -4452,20 +4265,6 @@ __arm_vqshlq_r_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrshlq_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_u32 (uint32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s32 (uint32_t __a, int32x4_t __b)
@@ -4620,20 +4419,6 @@ __arm_vshlq_r_s32 (int32x4_t __a, int32_t __b)
   return __builtin_mve_vshlq_r_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrshlq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vrshlq_n_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_s32 (int32x4_t __a, int32x4_t __b)
@@ -4648,20 +4433,6 @@ __arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrshlq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrshlq_n_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
@@ -5633,13 +5404,6 @@ __arm_vshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_uv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
@@ -5647,13 +5411,6 @@ __arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_uv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s8 (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -5773,13 +5530,6 @@ __arm_vshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -5794,13 +5544,6 @@ __arm_vqshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -6215,13 +5958,6 @@ __arm_vshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_uv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
@@ -6229,13 +5965,6 @@ __arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_uv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s16 (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -6355,13 +6084,6 @@ __arm_vshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -6376,13 +6098,6 @@ __arm_vqshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -6796,13 +6511,6 @@ __arm_vshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_uv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
@@ -6810,13 +6518,6 @@ __arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_uv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s32 (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -6936,13 +6637,6 @@ __arm_vshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vshlq_m_r_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_n_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -6957,13 +6651,6 @@ __arm_vqshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vqshlq_m_r_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_n_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -9029,48 +8716,6 @@ __arm_vqrdmulhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
   return __builtin_mve_vqrdmulhq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshlq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -9155,48 +8800,6 @@ __arm_vqshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pr
   return __builtin_mve_vqshlq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -12648,48 +12251,6 @@ __arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_u8 (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_u16 (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x_u32 (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshlq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -17203,20 +16764,6 @@ __arm_vshlq_r (uint8x16_t __a, int32_t __b)
  return __arm_vshlq_r_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint8x16_t __a, int8x16_t __b)
-{
- return __arm_vrshlq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint8x16_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq (uint8x16_t __a, int8x16_t __b)
@@ -17231,20 +16778,6 @@ __arm_vqshlq_r (uint8x16_t __a, int32_t __b)
  return __arm_vqshlq_r_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrshlq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint8x16_t __a, int32_t __b)
-{
- return __arm_vqrshlq_n_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq (uint8_t __a, int8x16_t __b)
@@ -17399,20 +16932,6 @@ __arm_vshlq_r (int8x16_t __a, int32_t __b)
  return __arm_vshlq_r_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vrshlq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int8x16_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq (int8x16_t __a, int8x16_t __b)
@@ -17427,20 +16946,6 @@ __arm_vqshlq_r (int8x16_t __a, int32_t __b)
  return __arm_vqshlq_r_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrshlq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int8x16_t __a, int32_t __b)
-{
- return __arm_vqrshlq_n_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq (int8x16_t __a, int8x16_t __b)
@@ -17746,63 +17251,35 @@ __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
 {
- return __arm_vaddvaq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vabdq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (uint16x8_t __a, int32_t __b)
-{
- return __arm_vshlq_r_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint16x8_t __a, int16x8_t __b)
-{
- return __arm_vrshlq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint16x8_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_u16 (__a, __b);
+ return __arm_vaddvaq_u16 (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
+__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
 {
- return __arm_vqshlq_u16 (__a, __b);
+ return __arm_vabdq_u16 (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
+__arm_vshlq_r (uint16x8_t __a, int32_t __b)
 {
- return __arm_vqshlq_r_u16 (__a, __b);
+ return __arm_vshlq_r_u16 (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint16x8_t __a, int16x8_t __b)
+__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
 {
- return __arm_vqrshlq_u16 (__a, __b);
+ return __arm_vqshlq_u16 (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint16x8_t __a, int32_t __b)
+__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
 {
- return __arm_vqrshlq_n_u16 (__a, __b);
+ return __arm_vqshlq_r_u16 (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
@@ -17959,20 +17436,6 @@ __arm_vshlq_r (int16x8_t __a, int32_t __b)
  return __arm_vshlq_r_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vrshlq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int16x8_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq (int16x8_t __a, int16x8_t __b)
@@ -17987,20 +17450,6 @@ __arm_vqshlq_r (int16x8_t __a, int32_t __b)
  return __arm_vqshlq_r_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrshlq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int16x8_t __a, int32_t __b)
-{
- return __arm_vqrshlq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq (int16x8_t __a, int16x8_t __b)
@@ -18323,20 +17772,6 @@ __arm_vshlq_r (uint32x4_t __a, int32_t __b)
  return __arm_vshlq_r_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint32x4_t __a, int32x4_t __b)
-{
- return __arm_vrshlq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (uint32x4_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq (uint32x4_t __a, int32x4_t __b)
@@ -18351,20 +17786,6 @@ __arm_vqshlq_r (uint32x4_t __a, int32_t __b)
  return __arm_vqshlq_r_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrshlq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (uint32x4_t __a, int32_t __b)
-{
- return __arm_vqrshlq_n_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq (uint32_t __a, int32x4_t __b)
@@ -18519,20 +17940,6 @@ __arm_vshlq_r (int32x4_t __a, int32_t __b)
  return __arm_vshlq_r_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vrshlq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq (int32x4_t __a, int32_t __b)
-{
- return __arm_vrshlq_n_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq (int32x4_t __a, int32x4_t __b)
@@ -18547,20 +17954,6 @@ __arm_vqshlq_r (int32x4_t __a, int32_t __b)
  return __arm_vqshlq_r_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrshlq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq (int32x4_t __a, int32_t __b)
-{
- return __arm_vqrshlq_n_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmulhq (int32x4_t __a, int32x4_t __b)
@@ -19492,13 +18885,6 @@ __arm_vshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_u8 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
@@ -19506,13 +18892,6 @@ __arm_vqshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_u8 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -19632,13 +19011,6 @@ __arm_vshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -19653,13 +19025,6 @@ __arm_vqshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -20073,13 +19438,6 @@ __arm_vshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_u16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
@@ -20087,13 +19445,6 @@ __arm_vqshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_u16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -20213,13 +19564,6 @@ __arm_vshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -20234,13 +19578,6 @@ __arm_vqshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -20654,13 +19991,6 @@ __arm_vshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
@@ -20668,13 +19998,6 @@ __arm_vqshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -20794,13 +20117,6 @@ __arm_vshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vshlq_m_r_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m_n (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_n_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -20815,13 +20131,6 @@ __arm_vqshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vqshlq_m_r_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m_n (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_n_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -22887,48 +22196,6 @@ __arm_vqrdmulhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred1
  return __arm_vqrdmulhq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrshlq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -23013,48 +22280,6 @@ __arm_vqshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16
  return __arm_vqshlq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -26009,48 +25234,6 @@ __arm_vrev64q_x (uint32x4_t __a, mve_pred16_t __p)
  return __arm_vrev64q_x_u32 (__a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshlq_x (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrshlq_x_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshllbq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -29858,22 +29041,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -29908,22 +29075,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)));})
-
 #define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -30181,16 +29332,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __p1, p2));})
-
 #define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -30200,15 +29341,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqrshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqrshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqrshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -31649,22 +30781,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -31717,22 +30833,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)));})
-
 #define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -32100,15 +31200,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vqrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqrshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqrshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqrshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -32128,16 +31219,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __p1, p2));})
-
 #define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -33076,16 +32157,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vrshlq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
@@ -33333,17 +32404,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
 
-#define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vqshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -33365,17 +32425,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
  2023-05-05  8:39 ` [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq Christophe Lyon
  2023-05-05  8:39 ` [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:00   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq Christophe Lyon
                   ` (19 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqshlq and vshlq so that they use the same pattern.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_SHIFT_M_R, MVE_SHIFT_M_N)
	(MVE_SHIFT_N, MVE_SHIFT_R): New.
	(mve_insn): Add vqshl, vshl.
	* config/arm/mve.md (mve_vqshlq_n_<supf><mode>)
	(mve_vshlq_n_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vqshlq_r_<supf><mode>, mve_vshlq_r_<supf><mode>): Merge into
	...
	(@mve_<mve_insn>q_r_<supf><mode>): ... this.
	(mve_vqshlq_m_r_<supf><mode>, mve_vshlq_m_r_<supf><mode>): Merge
	into ...
	(@mve_<mve_insn>q_m_r_<supf><mode>): ... this.
	(mve_vqshlq_m_n_<supf><mode>, mve_vshlq_m_n_<supf><mode>): Merge
	into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
	* config/arm/vec-common.md (mve_vshlq_<supf><mode>): Transform
	into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md  | 29 +++++++++++
 gcc/config/arm/mve.md        | 99 ++++++++----------------------------
 gcc/config/arm/vec-common.md |  4 +-
 3 files changed, 51 insertions(+), 81 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index e7622fe752a..c53b42a86e9 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -435,6 +435,26 @@ (define_int_iterator MVE_INT_N_BINARY_LOGIC   [
 		     VORRQ_N_S VORRQ_N_U
 		     ])
 
+(define_int_iterator MVE_SHIFT_M_R   [
+		     VQSHLQ_M_R_S VQSHLQ_M_R_U
+		     VSHLQ_M_R_S VSHLQ_M_R_U
+		     ])
+
+(define_int_iterator MVE_SHIFT_M_N   [
+		     VQSHLQ_M_N_S VQSHLQ_M_N_U
+		     VSHLQ_M_N_S VSHLQ_M_N_U
+		     ])
+
+(define_int_iterator MVE_SHIFT_N   [
+		     VQSHLQ_N_S VQSHLQ_N_U
+		     VSHLQ_N_S VSHLQ_N_U
+		     ])
+
+(define_int_iterator MVE_SHIFT_R   [
+		     VQSHLQ_R_S VQSHLQ_R_U
+		     VSHLQ_R_S VSHLQ_R_U
+		     ])
+
 (define_int_iterator MVE_RSHIFT_M_N   [
 		     VQRSHLQ_M_N_S VQRSHLQ_M_N_U
 		     VRSHLQ_M_N_S VRSHLQ_M_N_U
@@ -540,7 +560,11 @@ (define_int_attr mve_insn [
 		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
 		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
 		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
+		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
+		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
 		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
+		 (VQSHLQ_N_S "vqshl") (VQSHLQ_N_U "vqshl")
+		 (VQSHLQ_R_S "vqshl") (VQSHLQ_R_U "vqshl")
 		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
 		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
 		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
@@ -554,7 +578,12 @@ (define_int_attr mve_insn [
 		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
 		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
 		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
+		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
+		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
 		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
+		 (VSHLQ_N_S "vshl") (VSHLQ_N_U "vshl")
+		 (VSHLQ_R_S "vshl") (VSHLQ_R_U "vshl")
+		 (VSHLQ_S "vshl") (VSHLQ_U "vshl")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 0d3343b6e29..fb1076aef73 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1389,32 +1389,34 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ])
 
 ;;
-;; [vqshlq_n_s, vqshlq_n_u])
+;; [vqshlq_n_s, vqshlq_n_u]
+;; [vshlq_n_u, vshlq_n_s]
 ;;
-(define_insn "mve_vqshlq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:SI 2 "immediate_operand" "i")]
-	 VQSHLQ_N))
+	 MVE_SHIFT_N))
   ]
   "TARGET_HAVE_MVE"
-  "vqshl.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
 ;;
-;; [vqshlq_r_u, vqshlq_r_s])
+;; [vqshlq_r_u, vqshlq_r_s]
+;; [vshlq_r_s, vshlq_r_u]
 ;;
-(define_insn "mve_vqshlq_r_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_r_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:SI 2 "s_register_operand" "r")]
-	 VQSHLQ_R))
+	 MVE_SHIFT_R))
   ]
   "TARGET_HAVE_MVE"
-  "vqshl.<supf>%#<V_sz_elem>\t%q0, %2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1448,36 +1450,6 @@ (define_insn "mve_vrshrq_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vshlq_n_u, vshlq_n_s])
-;;
-(define_insn "mve_vshlq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:SI 2 "immediate_operand" "i")]
-	 VSHLQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vshl.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vshlq_r_s, vshlq_r_u])
-;;
-(define_insn "mve_vshlq_r_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:SI 2 "s_register_operand" "r")]
-	 VSHLQ_R))
-  ]
-  "TARGET_HAVE_MVE"
-  "vshl.<supf>%#<V_sz_elem>\t%q0, %2"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vabdq_f])
 ;;
@@ -3101,18 +3073,19 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vqshlq_m_r_u, vqshlq_m_r_s])
+;; [vqshlq_m_r_u, vqshlq_m_r_s]
+;; [vshlq_m_r_u, vshlq_m_r_s]
 ;;
-(define_insn "mve_vqshlq_m_r_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_r_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:SI 2 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VQSHLQ_M_R))
+	 MVE_SHIFT_M_R))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqshlt.<supf>%#<V_sz_elem>\t%q0, %2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3132,22 +3105,6 @@ (define_insn "mve_vrev64q_m_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vshlq_m_r_u, vshlq_m_r_s])
-;;
-(define_insn "mve_vshlq_m_r_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:SI 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VSHLQ_M_R))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vshlt.<supf>%#<V_sz_elem>\t%q0, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vsliq_n_u, vsliq_n_s])
 ;;
@@ -4881,19 +4838,20 @@ (define_insn "mve_vornq_m_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vqshlq_m_n_s, vqshlq_m_n_u])
+;; [vqshlq_m_n_s, vqshlq_m_n_u]
+;; [vshlq_m_n_s, vshlq_m_n_u]
 ;;
-(define_insn "mve_vqshlq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:SI 3 "immediate_operand" "i")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHLQ_M_N))
+	 MVE_SHIFT_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqshlt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -4914,23 +4872,6 @@ (define_insn "mve_vrshrq_m_n_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vshlq_m_n_s, vshlq_m_n_u])
-;;
-(define_insn "mve_vshlq_m_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "immediate_operand" "i")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VSHLQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vshlt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vshrq_m_n_s, vshrq_m_n_u])
 ;;
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 918338ca5c0..b5fc86fdf28 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -357,14 +357,14 @@ (define_expand "@movmisalign<mode>"
     }
 })
 
-(define_insn "mve_vshlq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w,w")
 		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Ds")]
 	 VSHLQ))]
   "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
   "@
-   vshl.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+   <mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
    * return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
   [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
 )
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (2 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:01   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq Christophe Lyon
                   ` (18 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqrdmulhq using the new MVE builtins framework.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vqrdmulhq): New.
	* config/arm/arm-mve-builtins-base.def (vqrdmulhq): New.
	* config/arm/arm-mve-builtins-base.h (vqrdmulhq): New.
	* config/arm/arm_mve.h (vqrdmulhq): Remove.
	(vqrdmulhq_m): Remove.
	(vqrdmulhq_s8): Remove.
	(vqrdmulhq_n_s8): Remove.
	(vqrdmulhq_s16): Remove.
	(vqrdmulhq_n_s16): Remove.
	(vqrdmulhq_s32): Remove.
	(vqrdmulhq_n_s32): Remove.
	(vqrdmulhq_m_n_s8): Remove.
	(vqrdmulhq_m_n_s32): Remove.
	(vqrdmulhq_m_n_s16): Remove.
	(vqrdmulhq_m_s8): Remove.
	(vqrdmulhq_m_s32): Remove.
	(vqrdmulhq_m_s16): Remove.
	(__arm_vqrdmulhq_s8): Remove.
	(__arm_vqrdmulhq_n_s8): Remove.
	(__arm_vqrdmulhq_s16): Remove.
	(__arm_vqrdmulhq_n_s16): Remove.
	(__arm_vqrdmulhq_s32): Remove.
	(__arm_vqrdmulhq_n_s32): Remove.
	(__arm_vqrdmulhq_m_n_s8): Remove.
	(__arm_vqrdmulhq_m_n_s32): Remove.
	(__arm_vqrdmulhq_m_n_s16): Remove.
	(__arm_vqrdmulhq_m_s8): Remove.
	(__arm_vqrdmulhq_m_s32): Remove.
	(__arm_vqrdmulhq_m_s16): Remove.
	(__arm_vqrdmulhq): Remove.
	(__arm_vqrdmulhq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 213 -----------------------
 4 files changed, 3 insertions(+), 213 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index f5e48519b19..8c125657c67 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -158,6 +158,7 @@ FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
 FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
+FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index e6dc2b00aaa..5b9966341ce 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -29,6 +29,7 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 31ba3fece82..eeb747d52ad 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -34,6 +34,7 @@ extern const function_base *const vmulq;
 extern const function_base *const vorrq;
 extern const function_base *const vqaddq;
 extern const function_base *const vqdmulhq;
+extern const function_base *const vqrdmulhq;
 extern const function_base *const vqrshlq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 636945d6ef0..44b383dbe08 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -94,7 +94,6 @@
 #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
 #define vcmpgeq(__a, __b) __arm_vcmpgeq(__a, __b)
 #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
-#define vqrdmulhq(__a, __b) __arm_vqrdmulhq(__a, __b)
 #define vmlsdavxq(__a, __b) __arm_vmlsdavxq(__a, __b)
 #define vmlsdavq(__a, __b) __arm_vmlsdavq(__a, __b)
 #define vmladavxq(__a, __b) __arm_vmladavxq(__a, __b)
@@ -249,7 +248,6 @@
 #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b, __c, __p)
 #define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
-#define vqrdmulhq_m(__inactive, __a, __b, __p) __arm_vqrdmulhq_m(__inactive, __a, __b, __p)
 #define vqshlq_m_n(__inactive, __a, __imm, __p) __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
 #define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive, __a, __b, __p)
 #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive, __a, __imm, __p)
@@ -682,8 +680,6 @@
 #define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
 #define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
 #define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
-#define vqrdmulhq_s8(__a, __b) __arm_vqrdmulhq_s8(__a, __b)
-#define vqrdmulhq_n_s8(__a, __b) __arm_vqrdmulhq_n_s8(__a, __b)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
@@ -754,8 +750,6 @@
 #define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
 #define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
 #define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
-#define vqrdmulhq_s16(__a, __b) __arm_vqrdmulhq_s16(__a, __b)
-#define vqrdmulhq_n_s16(__a, __b) __arm_vqrdmulhq_n_s16(__a, __b)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
@@ -826,8 +820,6 @@
 #define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
 #define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
 #define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
-#define vqrdmulhq_s32(__a, __b) __arm_vqrdmulhq_s32(__a, __b)
-#define vqrdmulhq_n_s32(__a, __b) __arm_vqrdmulhq_n_s32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
@@ -1597,12 +1589,6 @@
 #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_n_s8(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_n_s8(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_n_s32(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_n_s32(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_n_s16(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_n_s16(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s8(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s32(__inactive, __a, __b, __p)
-#define vqrdmulhq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmulhq_m_s16(__inactive, __a, __b, __p)
 #define vqshlq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vqshlq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vqshlq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
@@ -3421,20 +3407,6 @@ __arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrdmulhq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vqrdmulhq_n_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
@@ -3927,20 +3899,6 @@ __arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmulhq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vqrdmulhq_n_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
@@ -4433,20 +4391,6 @@ __arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
   return __builtin_mve_vqshlq_r_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrdmulhq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqrdmulhq_n_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
@@ -8674,48 +8618,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve
   return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_n_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_n_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_n_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmulhq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -16946,20 +16848,6 @@ __arm_vqshlq_r (int8x16_t __a, int32_t __b)
  return __arm_vqshlq_r_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrdmulhq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int8x16_t __a, int8_t __b)
-{
- return __arm_vqrdmulhq_n_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int8x16_t __a, int8x16_t __b)
@@ -17450,20 +17338,6 @@ __arm_vqshlq_r (int16x8_t __a, int32_t __b)
  return __arm_vqshlq_r_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrdmulhq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int16x8_t __a, int16_t __b)
-{
- return __arm_vqrdmulhq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int16x8_t __a, int16x8_t __b)
@@ -17954,20 +17828,6 @@ __arm_vqshlq_r (int32x4_t __a, int32_t __b)
  return __arm_vqshlq_r_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrdmulhq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq (int32x4_t __a, int32_t __b)
-{
- return __arm_vqrdmulhq_n_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int32x4_t __a, int32x4_t __b)
@@ -22154,48 +22014,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pre
  return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_n_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_n_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_n_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmulhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmulhq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -29075,16 +28893,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
 #define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -30833,16 +30641,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
 #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -32455,17 +32253,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
 
-#define __arm_vqrdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmulhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmulhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmulhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmulhq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
 #define __arm_vqrdmlsdhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (3 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:48   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 07/23] arm: [MVE intrinsics] rework vabdq Christophe Lyon
                   ` (17 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_FP_M_BINARY): Add vabdq.
	(MVE_FP_VABDQ_ONLY): New.
	(mve_insn): Add vabd.
	* config/arm/mve.md (mve_vabdq_f<mode>): Move into ...
	(@mve_<mve_insn>q_f<mode>): ... this.
	(mve_vabdq_m_f<mode>): Remove.
---
 gcc/config/arm/iterators.md |  9 +++++++--
 gcc/config/arm/mve.md       | 25 +++++--------------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c53b42a86e9..3133642ea82 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -466,6 +466,7 @@ (define_int_iterator MVE_RSHIFT_N   [
 		     ])
 
 (define_int_iterator MVE_FP_M_BINARY   [
+		     VABDQ_M_F
 		     VADDQ_M_F
 		     VMULQ_M_F
 		     VSUBQ_M_F
@@ -490,6 +491,10 @@ (define_int_iterator MVE_FP_N_BINARY   [
 		     VSUBQ_N_F
 		     ])
 
+(define_int_iterator MVE_FP_VABDQ_ONLY [
+		     VABDQ_F
+		     ])
+
 (define_int_iterator MVE_FP_CREATE_ONLY [
 		     VCREATEQ_F
 		     ])
@@ -501,8 +506,8 @@ (define_code_attr mve_addsubmul [
 		 ])
 
 (define_int_attr mve_insn [
-		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd")
-		 (VABDQ_S "vabd") (VABDQ_U "vabd")
+		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd") (VABDQ_M_F "vabd")
+		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd") (VADDQ_M_N_F "vadd")
 		 (VADDQ_M_S "vadd") (VADDQ_M_U "vadd") (VADDQ_M_F "vadd")
 		 (VADDQ_N_S "vadd") (VADDQ_N_U "vadd") (VADDQ_N_F "vadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index fb1076aef73..c8cb4e430ac 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1451,17 +1451,17 @@ (define_insn "mve_vrshrq_n_<supf><mode>"
 ])
 
 ;;
-;; [vabdq_f])
+;; [vabdq_f]
 ;;
-(define_insn "mve_vabdq_f<mode>"
+(define_insn "@mve_<mve_insn>q_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VABDQ_F))
+	 MVE_FP_VABDQ_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vabd.f%#<V_sz_elem>	%q0, %q1, %q2"
+  "<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -5483,24 +5483,9 @@ (define_insn "mve_vrmlsldavhaxq_p_sv4si"
   "vpst\;vrmlsldavhaxt.s32\t%Q0, %R0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
-;;
-;; [vabdq_m_f])
-;;
-(define_insn "mve_vabdq_m_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:MVE_0 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VABDQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vabdt.f%#<V_sz_elem>	%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
 
 ;;
+;; [vabdq_m_f]
 ;; [vaddq_m_f]
 ;; [vsubq_m_f]
 ;; [vmulq_m_f]
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 07/23] arm: [MVE intrinsics] rework vabdq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (4 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:49   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape Christophe Lyon
                   ` (16 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vabdq using the new MVE builtins framework.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_WITHOUT_N): New.
	(vabdq): New.
	* config/arm/arm-mve-builtins-base.def (vabdq): New.
	* config/arm/arm-mve-builtins-base.h (vabdq): New.
	* config/arm/arm_mve.h (vabdq): Remove.
	(vabdq_m): Remove.
	(vabdq_x): Remove.
	(vabdq_u8): Remove.
	(vabdq_s8): Remove.
	(vabdq_u16): Remove.
	(vabdq_s16): Remove.
	(vabdq_u32): Remove.
	(vabdq_s32): Remove.
	(vabdq_f16): Remove.
	(vabdq_f32): Remove.
	(vabdq_m_s8): Remove.
	(vabdq_m_s32): Remove.
	(vabdq_m_s16): Remove.
	(vabdq_m_u8): Remove.
	(vabdq_m_u32): Remove.
	(vabdq_m_u16): Remove.
	(vabdq_m_f32): Remove.
	(vabdq_m_f16): Remove.
	(vabdq_x_s8): Remove.
	(vabdq_x_s16): Remove.
	(vabdq_x_s32): Remove.
	(vabdq_x_u8): Remove.
	(vabdq_x_u16): Remove.
	(vabdq_x_u32): Remove.
	(vabdq_x_f16): Remove.
	(vabdq_x_f32): Remove.
	(__arm_vabdq_u8): Remove.
	(__arm_vabdq_s8): Remove.
	(__arm_vabdq_u16): Remove.
	(__arm_vabdq_s16): Remove.
	(__arm_vabdq_u32): Remove.
	(__arm_vabdq_s32): Remove.
	(__arm_vabdq_m_s8): Remove.
	(__arm_vabdq_m_s32): Remove.
	(__arm_vabdq_m_s16): Remove.
	(__arm_vabdq_m_u8): Remove.
	(__arm_vabdq_m_u32): Remove.
	(__arm_vabdq_m_u16): Remove.
	(__arm_vabdq_x_s8): Remove.
	(__arm_vabdq_x_s16): Remove.
	(__arm_vabdq_x_s32): Remove.
	(__arm_vabdq_x_u8): Remove.
	(__arm_vabdq_x_u16): Remove.
	(__arm_vabdq_x_u32): Remove.
	(__arm_vabdq_f16): Remove.
	(__arm_vabdq_f32): Remove.
	(__arm_vabdq_m_f32): Remove.
	(__arm_vabdq_m_f16): Remove.
	(__arm_vabdq_x_f16): Remove.
	(__arm_vabdq_x_f32): Remove.
	(__arm_vabdq): Remove.
	(__arm_vabdq_m): Remove.
	(__arm_vabdq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  10 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 431 -----------------------
 4 files changed, 13 insertions(+), 431 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 8c125657c67..a74119db917 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -146,6 +146,16 @@ namespace arm_mve {
     UNSPEC##_M_S, -1, -1,						\
     UNSPEC##_M_N_S, -1, -1))
 
+  /* Helper for builtins with only unspec codes, _m predicated
+     overrides, but no _n version.  */
+#define FUNCTION_WITHOUT_N(NAME, UNSPEC) FUNCTION			\
+  (NAME, unspec_mve_function_exact_insn,				\
+   (UNSPEC##_S, UNSPEC##_U, UNSPEC##_F,					\
+    -1, -1, -1,								\
+    UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,				\
+    -1, -1, -1))
+
+FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 5b9966341ce..9230837fd43 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #define REQUIRES_FLOAT false
+DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
@@ -41,6 +42,7 @@ DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
 
 #define REQUIRES_FLOAT true
+DEF_MVE_FUNCTION (vabdq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index eeb747d52ad..d9d45d1925a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -23,6 +23,7 @@
 namespace arm_mve {
 namespace functions {
 
+extern const function_base *const vabdq;
 extern const function_base *const vaddq;
 extern const function_base *const vandq;
 extern const function_base *const vcreateq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 44b383dbe08..175d9955c33 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -77,7 +77,6 @@
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
 #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
-#define vabdq(__a, __b) __arm_vabdq(__a, __b)
 #define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
 #define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
 #define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
@@ -218,7 +217,6 @@
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
 #define vshlq_m(__inactive, __a, __b, __p) __arm_vshlq_m(__inactive, __a, __b, __p)
-#define vabdq_m(__inactive, __a, __b, __p) __arm_vabdq_m(__inactive, __a, __b, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -355,7 +353,6 @@
 #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a, __b, __imm, __p)
 #define vminq_x(__a, __b, __p) __arm_vminq_x(__a, __b, __p)
 #define vmaxq_x(__a, __b, __p) __arm_vmaxq_x(__a, __b, __p)
-#define vabdq_x(__a, __b, __p) __arm_vabdq_x(__a, __b, __p)
 #define vabsq_x(__a, __p) __arm_vabsq_x(__a, __p)
 #define vclsq_x(__a, __p) __arm_vclsq_x(__a, __p)
 #define vclzq_x(__a, __p) __arm_vclzq_x(__a, __p)
@@ -652,7 +649,6 @@
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
 #define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
 #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
-#define vabdq_u8(__a, __b) __arm_vabdq_u8(__a, __b)
 #define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
 #define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
 #define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
@@ -698,7 +694,6 @@
 #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
 #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
-#define vabdq_s8(__a, __b) __arm_vabdq_s8(__a, __b)
 #define vshlq_n_s8(__a,  __imm) __arm_vshlq_n_s8(__a,  __imm)
 #define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
 #define vqshlq_n_s8(__a,  __imm) __arm_vqshlq_n_s8(__a,  __imm)
@@ -722,7 +717,6 @@
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
 #define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
 #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
-#define vabdq_u16(__a, __b) __arm_vabdq_u16(__a, __b)
 #define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
 #define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
 #define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
@@ -768,7 +762,6 @@
 #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
 #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
-#define vabdq_s16(__a, __b) __arm_vabdq_s16(__a, __b)
 #define vshlq_n_s16(__a,  __imm) __arm_vshlq_n_s16(__a,  __imm)
 #define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
 #define vqshlq_n_s16(__a,  __imm) __arm_vqshlq_n_s16(__a,  __imm)
@@ -792,7 +785,6 @@
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
 #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
-#define vabdq_u32(__a, __b) __arm_vabdq_u32(__a, __b)
 #define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
 #define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
 #define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
@@ -838,7 +830,6 @@
 #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
-#define vabdq_s32(__a, __b) __arm_vabdq_s32(__a, __b)
 #define vshlq_n_s32(__a,  __imm) __arm_vshlq_n_s32(__a,  __imm)
 #define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
 #define vqshlq_n_s32(__a,  __imm) __arm_vqshlq_n_s32(__a,  __imm)
@@ -894,7 +885,6 @@
 #define vcaddq_rot90_f16(__a, __b) __arm_vcaddq_rot90_f16(__a, __b)
 #define vcaddq_rot270_f16(__a, __b) __arm_vcaddq_rot270_f16(__a, __b)
 #define vbicq_f16(__a, __b) __arm_vbicq_f16(__a, __b)
-#define vabdq_f16(__a, __b) __arm_vabdq_f16(__a, __b)
 #define vshlltq_n_s8(__a,  __imm) __arm_vshlltq_n_s8(__a,  __imm)
 #define vshllbq_n_s8(__a,  __imm) __arm_vshllbq_n_s8(__a,  __imm)
 #define vbicq_n_s16(__a,  __imm) __arm_vbicq_n_s16(__a,  __imm)
@@ -950,7 +940,6 @@
 #define vcaddq_rot90_f32(__a, __b) __arm_vcaddq_rot90_f32(__a, __b)
 #define vcaddq_rot270_f32(__a, __b) __arm_vcaddq_rot270_f32(__a, __b)
 #define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b)
-#define vabdq_f32(__a, __b) __arm_vabdq_f32(__a, __b)
 #define vshlltq_n_s16(__a,  __imm) __arm_vshlltq_n_s16(__a,  __imm)
 #define vshllbq_n_s16(__a,  __imm) __arm_vshllbq_n_s16(__a,  __imm)
 #define vbicq_n_s32(__a,  __imm) __arm_vbicq_n_s32(__a,  __imm)
@@ -1460,12 +1449,6 @@
 #define vshlq_m_u32(__inactive, __a, __b, __p) __arm_vshlq_m_u32(__inactive, __a, __b, __p)
 #define vabavq_p_u32(__a, __b, __c, __p) __arm_vabavq_p_u32(__a, __b, __c, __p)
 #define vshlq_m_s32(__inactive, __a, __b, __p) __arm_vshlq_m_s32(__inactive, __a, __b, __p)
-#define vabdq_m_s8(__inactive, __a, __b, __p) __arm_vabdq_m_s8(__inactive, __a, __b, __p)
-#define vabdq_m_s32(__inactive, __a, __b, __p) __arm_vabdq_m_s32(__inactive, __a, __b, __p)
-#define vabdq_m_s16(__inactive, __a, __b, __p) __arm_vabdq_m_s16(__inactive, __a, __b, __p)
-#define vabdq_m_u8(__inactive, __a, __b, __p) __arm_vabdq_m_u8(__inactive, __a, __b, __p)
-#define vabdq_m_u32(__inactive, __a, __b, __p) __arm_vabdq_m_u32(__inactive, __a, __b, __p)
-#define vabdq_m_u16(__inactive, __a, __b, __p) __arm_vabdq_m_u16(__inactive, __a, __b, __p)
 #define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
 #define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
 #define vbicq_m_s16(__inactive, __a, __b, __p) __arm_vbicq_m_s16(__inactive, __a, __b, __p)
@@ -1700,8 +1683,6 @@
 #define vshrntq_m_n_s16(__a, __b,  __imm, __p) __arm_vshrntq_m_n_s16(__a, __b,  __imm, __p)
 #define vshrntq_m_n_u32(__a, __b,  __imm, __p) __arm_vshrntq_m_n_u32(__a, __b,  __imm, __p)
 #define vshrntq_m_n_u16(__a, __b,  __imm, __p) __arm_vshrntq_m_n_u16(__a, __b,  __imm, __p)
-#define vabdq_m_f32(__inactive, __a, __b, __p) __arm_vabdq_m_f32(__inactive, __a, __b, __p)
-#define vabdq_m_f16(__inactive, __a, __b, __p) __arm_vabdq_m_f16(__inactive, __a, __b, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
 #define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
@@ -2060,12 +2041,6 @@
 #define vmaxq_x_u8(__a, __b, __p) __arm_vmaxq_x_u8(__a, __b, __p)
 #define vmaxq_x_u16(__a, __b, __p) __arm_vmaxq_x_u16(__a, __b, __p)
 #define vmaxq_x_u32(__a, __b, __p) __arm_vmaxq_x_u32(__a, __b, __p)
-#define vabdq_x_s8(__a, __b, __p) __arm_vabdq_x_s8(__a, __b, __p)
-#define vabdq_x_s16(__a, __b, __p) __arm_vabdq_x_s16(__a, __b, __p)
-#define vabdq_x_s32(__a, __b, __p) __arm_vabdq_x_s32(__a, __b, __p)
-#define vabdq_x_u8(__a, __b, __p) __arm_vabdq_x_u8(__a, __b, __p)
-#define vabdq_x_u16(__a, __b, __p) __arm_vabdq_x_u16(__a, __b, __p)
-#define vabdq_x_u32(__a, __b, __p) __arm_vabdq_x_u32(__a, __b, __p)
 #define vabsq_x_s8(__a, __p) __arm_vabsq_x_s8(__a, __p)
 #define vabsq_x_s16(__a, __p) __arm_vabsq_x_s16(__a, __p)
 #define vabsq_x_s32(__a, __p) __arm_vabsq_x_s32(__a, __p)
@@ -2201,8 +2176,6 @@
 #define vminnmq_x_f32(__a, __b, __p) __arm_vminnmq_x_f32(__a, __b, __p)
 #define vmaxnmq_x_f16(__a, __b, __p) __arm_vmaxnmq_x_f16(__a, __b, __p)
 #define vmaxnmq_x_f32(__a, __b, __p) __arm_vmaxnmq_x_f32(__a, __b, __p)
-#define vabdq_x_f16(__a, __b, __p) __arm_vabdq_x_f16(__a, __b, __p)
-#define vabdq_x_f32(__a, __b, __p) __arm_vabdq_x_f32(__a, __b, __p)
 #define vabsq_x_f16(__a, __p) __arm_vabsq_x_f16(__a, __p)
 #define vabsq_x_f32(__a, __p) __arm_vabsq_x_f32(__a, __p)
 #define vnegq_x_f16(__a, __p) __arm_vnegq_x_f16(__a, __p)
@@ -3211,13 +3184,6 @@ __arm_vaddvaq_u8 (uint32_t __a, uint8x16_t __b)
   return __builtin_mve_vaddvaq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vabdq_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
@@ -3533,13 +3499,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
   return __builtin_mve_vaddvaq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vabdq_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n_s8 (int8x16_t __a, const int __imm)
@@ -3703,13 +3662,6 @@ __arm_vaddvaq_u16 (uint32_t __a, uint16x8_t __b)
   return __builtin_mve_vaddvaq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vabdq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
@@ -4025,13 +3977,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
   return __builtin_mve_vaddvaq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vabdq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n_s16 (int16x8_t __a, const int __imm)
@@ -4195,13 +4140,6 @@ __arm_vaddvaq_u32 (uint32_t __a, uint32x4_t __b)
   return __builtin_mve_vaddvaq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vabdq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
@@ -4517,13 +4455,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
   return __builtin_mve_vaddvaq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vabdq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n_s32 (int32x4_t __a, const int __imm)
@@ -7715,48 +7646,6 @@ __arm_vshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred1
   return __builtin_mve_vshlq_m_sv4si (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -11432,48 +11321,6 @@ __arm_vmaxq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmaxq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x_s8 (int8x16_t __a, mve_pred16_t __p)
@@ -13692,13 +13539,6 @@ __arm_vbicq_f16 (float16x8_t __a, float16x8_t __b)
   return __builtin_mve_vbicq_fv8hf (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vabdq_fv8hf (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_f32 (float32x4_t __a, float32_t __b)
@@ -13895,13 +13735,6 @@ __arm_vbicq_f32 (float32x4_t __a, float32x4_t __b)
   return __builtin_mve_vbicq_fv4sf (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vabdq_fv4sf (__a, __b);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b)
@@ -14666,20 +14499,6 @@ __arm_vcvtq_m_n_f32_s32 (float32x4_t __inactive, int32x4_t __a, const int __imm6
   return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__inactive, __a, __imm6, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_fv4sf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_fv8hf (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -15274,20 +15093,6 @@ __arm_vmaxnmq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmaxnmq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_fv8hf (__arm_vuninitializedq_f16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vabdq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x_f16 (float16x8_t __a, mve_pred16_t __p)
@@ -16652,13 +16457,6 @@ __arm_vaddvaq (uint32_t __a, uint8x16_t __b)
  return __arm_vaddvaq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vabdq_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r (uint8x16_t __a, int32_t __b)
@@ -16974,13 +16772,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
  return __arm_vaddvaq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vabdq_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n (int8x16_t __a, const int __imm)
@@ -17142,13 +16933,6 @@ __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
  return __arm_vaddvaq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vabdq_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r (uint16x8_t __a, int32_t __b)
@@ -17464,13 +17248,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
  return __arm_vaddvaq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vabdq_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n (int16x8_t __a, const int __imm)
@@ -17632,13 +17409,6 @@ __arm_vaddvaq (uint32_t __a, uint32x4_t __b)
  return __arm_vaddvaq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vabdq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_r (uint32x4_t __a, int32_t __b)
@@ -17954,13 +17724,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
  return __arm_vaddvaq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vabdq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlq_n (int32x4_t __a, const int __imm)
@@ -21111,48 +20874,6 @@ __arm_vshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t
  return __arm_vshlq_m_s32 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -24359,48 +24080,6 @@ __arm_vmaxq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vmaxq_x_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x (int8x16_t __a, mve_pred16_t __p)
@@ -26195,13 +25874,6 @@ __arm_vbicq (float16x8_t __a, float16x8_t __b)
  return __arm_vbicq_f16 (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vabdq_f16 (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (float32x4_t __a, float32_t __b)
@@ -26398,13 +26070,6 @@ __arm_vbicq (float32x4_t __a, float32x4_t __b)
  return __arm_vbicq_f32 (__a, __b);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq (float32x4_t __a, float32x4_t __b)
-{
- return __arm_vabdq_f32 (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpeqq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -27154,20 +26819,6 @@ __arm_vcvtq_m_n (float32x4_t __inactive, int32x4_t __a, const int __imm6, mve_pr
  return __arm_vcvtq_m_n_f32_s32 (__inactive, __a, __imm6, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_f32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_m_f16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -27686,20 +27337,6 @@ __arm_vmaxnmq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
  return __arm_vmaxnmq_x_f32 (__a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabdq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vabdq_x_f32 (__a, __b, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x (float16x8_t __a, mve_pred16_t __p)
@@ -28554,18 +28191,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_n_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_n_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vabdq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
-
 #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -29746,19 +29371,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -30228,18 +29840,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vabdq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vabdq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vabdq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vabsq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vabsq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
@@ -30762,16 +30362,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vabdq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vcmpeqq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -31416,17 +31006,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -31834,16 +31413,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vabdq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabdq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabdq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabdq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabdq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabdq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabdq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vbicq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (5 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 07/23] arm: [MVE intrinsics] rework vabdq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:51   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r Christophe Lyon
                   ` (15 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_lshift shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_lshift): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_lshift): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 57 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 58 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 28a2d66ddd1..e5093c3f29d 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -519,6 +519,63 @@ struct binary_round_lshift_def : public overloaded_base<0>
 };
 SHAPE (binary_round_lshift)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t)
+   <T0>_t vfoo_n[_t0](<T0>_t, const int)
+
+   i.e. the standard shape for left shift operations that operate on
+   vector types.
+
+   For the MODE_n versions, check that 'imm' is in the [0..#bits-1] range.
+
+   Example: vshlq.
+   int8x16_t [__arm_]vshlq[_s8](int8x16_t a, int8x16_t b)
+   int8x16_t [__arm_]vshlq_m[_s8](int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
+   int8x16_t [__arm_]vshlq_x[_s8](int8x16_t a, int8x16_t b, mve_pred16_t p)
+   int8x16_t [__arm_]vshlq_n[_s8](int8x16_t a, const int imm)
+   int8x16_t [__arm_]vshlq_m_n[_s8](int8x16_t inactive, int8x16_t a, const int imm, mve_pred16_t p)
+   int8x16_t [__arm_]vshlq_x_n[_s8](int8x16_t a, const int imm, mve_pred16_t p)  */
+struct binary_lshift_def : public overloaded_base<0>
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,vs0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    if (c.mode_suffix_id != MODE_n)
+      return true;
+
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 0, bits - 1);
+  }
+};
+SHAPE (binary_lshift)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index cef081aa8ec..e472862ceef 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -35,6 +35,7 @@ namespace arm_mve
   {
 
     extern const function_shape *const binary;
+    extern const function_shape *const binary_lshift;
     extern const function_shape *const binary_opt_n;
     extern const function_shape *const binary_orrq;
     extern const function_shape *const binary_round_lshift;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (6 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:55   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape Christophe Lyon
                   ` (14 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins.cc (has_inactive_argument)
	(finish_opt_n_resolution): Handle MODE_r.
	* config/arm/arm-mve-builtins.def (r): New mode.
---
 gcc/config/arm/arm-mve-builtins.cc  | 8 ++++++--
 gcc/config/arm/arm-mve-builtins.def | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 91b3ae71f94..c25b1be9903 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -669,7 +669,8 @@ function_instance::has_inactive_argument () const
   if (pred != PRED_m)
     return false;
 
-  if ((base == functions::vorrq && mode_suffix_id == MODE_n)
+  if (mode_suffix_id == MODE_r
+      || (base == functions::vorrq && mode_suffix_id == MODE_n)
       || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
       || (base == functions::vrshlq && mode_suffix_id == MODE_n))
     return false;
@@ -1522,7 +1523,10 @@ finish_opt_n_resolution (unsigned int argno, unsigned int first_argno,
 {
   if (inferred_type == NUM_TYPE_SUFFIXES)
     inferred_type = first_type;
-  tree scalar_form = lookup_form (MODE_n, inferred_type);
+  mode_suffix_index scalar_mode = MODE_n;
+  if (mode_suffix_id == MODE_r)
+    scalar_mode = MODE_r;
+  tree scalar_form = lookup_form (scalar_mode, inferred_type);
 
   /* Allow the final argument to be scalar, if an _n form exists.  */
   if (scalar_argument_p (argno))
diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-builtins.def
index 49d07364fa2..e3f37876210 100644
--- a/gcc/config/arm/arm-mve-builtins.def
+++ b/gcc/config/arm/arm-mve-builtins.def
@@ -35,6 +35,7 @@
 
 DEF_MVE_MODE (n, none, none, none)
 DEF_MVE_MODE (offset, none, none, bytes)
+DEF_MVE_MODE (r, none, none, none)
 
 #define REQUIRES_FLOAT false
 DEF_MVE_TYPE (mve_pred16_t, boolean_type_node)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (7 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:56   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl Christophe Lyon
                   ` (13 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_lshift_r shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_lshift_r): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_lshift_r): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 41 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 42 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index e5093c3f29d..4ecb612ece5 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -576,6 +576,47 @@ struct binary_lshift_def : public overloaded_base<0>
 };
 SHAPE (binary_lshift)
 
+/* Used with the above form, but only for the MODE_r case which does
+   not always support the same set of predicates as MODE_none and
+   MODE_n.  For vqshlq they are the same, but for vshlq they are not.
+
+   <T0>_t vfoo_r[_t0](<T0>_t, int32_t)
+
+   i.e. the standard shape for shift operations that operate on
+   vector types.
+   Example: vshlq.
+   int8x16_t [__arm_]vshlq_r[_s8](int8x16_t a, int32_t b)
+   int8x16_t [__arm_]vshlq_m_r[_s8](int8x16_t a, int32_t b, mve_pred16_t p)  */
+struct binary_lshift_r_def : public overloaded_base<0>
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_r, preserve_user_namespace);
+    build_all (b, "v0,v0,ss32", group, MODE_r, preserve_user_namespace, false, preds_m_or_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
+  }
+};
+SHAPE (binary_lshift_r)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index e472862ceef..25d9b60a670 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -36,6 +36,7 @@ namespace arm_mve
 
     extern const function_shape *const binary;
     extern const function_shape *const binary_lshift;
+    extern const function_shape *const binary_lshift_r;
     extern const function_shape *const binary_opt_n;
     extern const function_shape *const binary_orrq;
     extern const function_shape *const binary_round_lshift;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (8 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:56   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq Christophe Lyon
                   ` (12 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Introduce a function that will be used to build vshl intrinsics. They
are special because they have to handle MODE_r.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-functions.h (class
	unspec_mve_function_exact_insn_vshl): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h | 150 ++++++++++++++++++++
 1 file changed, 150 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index 5abf913d182..533fd1159c6 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -376,6 +376,156 @@ public:
   }
 };
 
+/* Map the function directly to CODE (UNSPEC, M) for vshl-like
+   builtins. The difference with unspec_mve_function_exact_insn is
+   that this function handles MODE_r and the related unspecs..  */
+class unspec_mve_function_exact_insn_vshl : public function_base
+{
+public:
+  CONSTEXPR unspec_mve_function_exact_insn_vshl (int unspec_for_sint,
+						 int unspec_for_uint,
+						 int unspec_for_n_sint,
+						 int unspec_for_n_uint,
+						 int unspec_for_m_sint,
+						 int unspec_for_m_uint,
+						 int unspec_for_m_n_sint,
+						 int unspec_for_m_n_uint,
+						 int unspec_for_m_r_sint,
+						 int unspec_for_m_r_uint,
+						 int unspec_for_r_sint,
+						 int unspec_for_r_uint)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_n_sint (unspec_for_n_sint),
+      m_unspec_for_n_uint (unspec_for_n_uint),
+      m_unspec_for_m_sint (unspec_for_m_sint),
+      m_unspec_for_m_uint (unspec_for_m_uint),
+      m_unspec_for_m_n_sint (unspec_for_m_n_sint),
+      m_unspec_for_m_n_uint (unspec_for_m_n_uint),
+      m_unspec_for_m_r_sint (unspec_for_m_r_sint),
+      m_unspec_for_m_r_uint (unspec_for_m_r_uint),
+      m_unspec_for_r_sint (unspec_for_r_sint),
+      m_unspec_for_r_uint (unspec_for_r_uint)
+  {}
+
+  /* The unspec code associated with signed-integer, unsigned-integer
+     and floating-point operations respectively.  It covers the cases
+     with the _n suffix, and/or the _m predicate.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_n_sint;
+  int m_unspec_for_n_uint;
+  int m_unspec_for_m_sint;
+  int m_unspec_for_m_uint;
+  int m_unspec_for_m_n_sint;
+  int m_unspec_for_m_n_uint;
+  int m_unspec_for_m_r_sint;
+  int m_unspec_for_m_r_uint;
+  int m_unspec_for_r_sint;
+  int m_unspec_for_r_uint;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    switch (e.pred)
+      {
+      case PRED_none:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No predicate, no suffix.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
+	    break;
+
+	  case MODE_n:
+	    /* No predicate, _n suffix.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_n (m_unspec_for_n_uint, m_unspec_for_n_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_n (m_unspec_for_n_sint, m_unspec_for_n_sint, e.vector_mode (0));
+	    break;
+
+	  case MODE_r:
+	    /* No predicate, _r suffix.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_r (m_unspec_for_r_uint, m_unspec_for_r_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_r (m_unspec_for_r_sint, m_unspec_for_r_sint, e.vector_mode (0));
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No suffix, "m" predicate.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+	    break;
+
+	  case MODE_n:
+	    /* _n suffix, "m" predicate.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
+	    break;
+
+	  case MODE_r:
+	    /* _r suffix, "m" predicate.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_m_r (m_unspec_for_m_r_uint, m_unspec_for_m_r_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_m_r (m_unspec_for_m_r_sint, m_unspec_for_m_r_sint, e.vector_mode (0));
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return e.use_cond_insn (code, 0);
+
+      case PRED_x:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No suffix, "x" predicate.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+	    break;
+
+	  case MODE_n:
+	    /* _n suffix, "x" predicate.  */
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (9 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:58   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq Christophe Lyon
                   ` (11 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqshlq, vshlq using the new MVE builtins framework.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_WITH_M_N_R): New.
	(vqshlq, vshlq): New.
	* config/arm/arm-mve-builtins-base.def (vqshlq, vshlq): New.
	* config/arm/arm-mve-builtins-base.h (vqshlq, vshlq): New.
	* config/arm/arm_mve.h (vshlq): Remove.
	(vshlq_r): Remove.
	(vshlq_n): Remove.
	(vshlq_m_r): Remove.
	(vshlq_m): Remove.
	(vshlq_m_n): Remove.
	(vshlq_x): Remove.
	(vshlq_x_n): Remove.
	(vshlq_s8): Remove.
	(vshlq_s16): Remove.
	(vshlq_s32): Remove.
	(vshlq_u8): Remove.
	(vshlq_u16): Remove.
	(vshlq_u32): Remove.
	(vshlq_r_u8): Remove.
	(vshlq_n_u8): Remove.
	(vshlq_r_s8): Remove.
	(vshlq_n_s8): Remove.
	(vshlq_r_u16): Remove.
	(vshlq_n_u16): Remove.
	(vshlq_r_s16): Remove.
	(vshlq_n_s16): Remove.
	(vshlq_r_u32): Remove.
	(vshlq_n_u32): Remove.
	(vshlq_r_s32): Remove.
	(vshlq_n_s32): Remove.
	(vshlq_m_r_u8): Remove.
	(vshlq_m_r_s8): Remove.
	(vshlq_m_r_u16): Remove.
	(vshlq_m_r_s16): Remove.
	(vshlq_m_r_u32): Remove.
	(vshlq_m_r_s32): Remove.
	(vshlq_m_u8): Remove.
	(vshlq_m_s8): Remove.
	(vshlq_m_u16): Remove.
	(vshlq_m_s16): Remove.
	(vshlq_m_u32): Remove.
	(vshlq_m_s32): Remove.
	(vshlq_m_n_s8): Remove.
	(vshlq_m_n_s32): Remove.
	(vshlq_m_n_s16): Remove.
	(vshlq_m_n_u8): Remove.
	(vshlq_m_n_u32): Remove.
	(vshlq_m_n_u16): Remove.
	(vshlq_x_s8): Remove.
	(vshlq_x_s16): Remove.
	(vshlq_x_s32): Remove.
	(vshlq_x_u8): Remove.
	(vshlq_x_u16): Remove.
	(vshlq_x_u32): Remove.
	(vshlq_x_n_s8): Remove.
	(vshlq_x_n_s16): Remove.
	(vshlq_x_n_s32): Remove.
	(vshlq_x_n_u8): Remove.
	(vshlq_x_n_u16): Remove.
	(vshlq_x_n_u32): Remove.
	(__arm_vshlq_s8): Remove.
	(__arm_vshlq_s16): Remove.
	(__arm_vshlq_s32): Remove.
	(__arm_vshlq_u8): Remove.
	(__arm_vshlq_u16): Remove.
	(__arm_vshlq_u32): Remove.
	(__arm_vshlq_r_u8): Remove.
	(__arm_vshlq_n_u8): Remove.
	(__arm_vshlq_r_s8): Remove.
	(__arm_vshlq_n_s8): Remove.
	(__arm_vshlq_r_u16): Remove.
	(__arm_vshlq_n_u16): Remove.
	(__arm_vshlq_r_s16): Remove.
	(__arm_vshlq_n_s16): Remove.
	(__arm_vshlq_r_u32): Remove.
	(__arm_vshlq_n_u32): Remove.
	(__arm_vshlq_r_s32): Remove.
	(__arm_vshlq_n_s32): Remove.
	(__arm_vshlq_m_r_u8): Remove.
	(__arm_vshlq_m_r_s8): Remove.
	(__arm_vshlq_m_r_u16): Remove.
	(__arm_vshlq_m_r_s16): Remove.
	(__arm_vshlq_m_r_u32): Remove.
	(__arm_vshlq_m_r_s32): Remove.
	(__arm_vshlq_m_u8): Remove.
	(__arm_vshlq_m_s8): Remove.
	(__arm_vshlq_m_u16): Remove.
	(__arm_vshlq_m_s16): Remove.
	(__arm_vshlq_m_u32): Remove.
	(__arm_vshlq_m_s32): Remove.
	(__arm_vshlq_m_n_s8): Remove.
	(__arm_vshlq_m_n_s32): Remove.
	(__arm_vshlq_m_n_s16): Remove.
	(__arm_vshlq_m_n_u8): Remove.
	(__arm_vshlq_m_n_u32): Remove.
	(__arm_vshlq_m_n_u16): Remove.
	(__arm_vshlq_x_s8): Remove.
	(__arm_vshlq_x_s16): Remove.
	(__arm_vshlq_x_s32): Remove.
	(__arm_vshlq_x_u8): Remove.
	(__arm_vshlq_x_u16): Remove.
	(__arm_vshlq_x_u32): Remove.
	(__arm_vshlq_x_n_s8): Remove.
	(__arm_vshlq_x_n_s16): Remove.
	(__arm_vshlq_x_n_s32): Remove.
	(__arm_vshlq_x_n_u8): Remove.
	(__arm_vshlq_x_n_u16): Remove.
	(__arm_vshlq_x_n_u32): Remove.
	(__arm_vshlq): Remove.
	(__arm_vshlq_r): Remove.
	(__arm_vshlq_n): Remove.
	(__arm_vshlq_m_r): Remove.
	(__arm_vshlq_m): Remove.
	(__arm_vshlq_m_n): Remove.
	(__arm_vshlq_x): Remove.
	(__arm_vshlq_x_n): Remove.
	(vqshlq): Remove.
	(vqshlq_r): Remove.
	(vqshlq_n): Remove.
	(vqshlq_m_r): Remove.
	(vqshlq_m_n): Remove.
	(vqshlq_m): Remove.
	(vqshlq_u8): Remove.
	(vqshlq_r_u8): Remove.
	(vqshlq_n_u8): Remove.
	(vqshlq_s8): Remove.
	(vqshlq_r_s8): Remove.
	(vqshlq_n_s8): Remove.
	(vqshlq_u16): Remove.
	(vqshlq_r_u16): Remove.
	(vqshlq_n_u16): Remove.
	(vqshlq_s16): Remove.
	(vqshlq_r_s16): Remove.
	(vqshlq_n_s16): Remove.
	(vqshlq_u32): Remove.
	(vqshlq_r_u32): Remove.
	(vqshlq_n_u32): Remove.
	(vqshlq_s32): Remove.
	(vqshlq_r_s32): Remove.
	(vqshlq_n_s32): Remove.
	(vqshlq_m_r_u8): Remove.
	(vqshlq_m_r_s8): Remove.
	(vqshlq_m_r_u16): Remove.
	(vqshlq_m_r_s16): Remove.
	(vqshlq_m_r_u32): Remove.
	(vqshlq_m_r_s32): Remove.
	(vqshlq_m_n_s8): Remove.
	(vqshlq_m_n_s32): Remove.
	(vqshlq_m_n_s16): Remove.
	(vqshlq_m_n_u8): Remove.
	(vqshlq_m_n_u32): Remove.
	(vqshlq_m_n_u16): Remove.
	(vqshlq_m_s8): Remove.
	(vqshlq_m_s32): Remove.
	(vqshlq_m_s16): Remove.
	(vqshlq_m_u8): Remove.
	(vqshlq_m_u32): Remove.
	(vqshlq_m_u16): Remove.
	(__arm_vqshlq_u8): Remove.
	(__arm_vqshlq_r_u8): Remove.
	(__arm_vqshlq_n_u8): Remove.
	(__arm_vqshlq_s8): Remove.
	(__arm_vqshlq_r_s8): Remove.
	(__arm_vqshlq_n_s8): Remove.
	(__arm_vqshlq_u16): Remove.
	(__arm_vqshlq_r_u16): Remove.
	(__arm_vqshlq_n_u16): Remove.
	(__arm_vqshlq_s16): Remove.
	(__arm_vqshlq_r_s16): Remove.
	(__arm_vqshlq_n_s16): Remove.
	(__arm_vqshlq_u32): Remove.
	(__arm_vqshlq_r_u32): Remove.
	(__arm_vqshlq_n_u32): Remove.
	(__arm_vqshlq_s32): Remove.
	(__arm_vqshlq_r_s32): Remove.
	(__arm_vqshlq_n_s32): Remove.
	(__arm_vqshlq_m_r_u8): Remove.
	(__arm_vqshlq_m_r_s8): Remove.
	(__arm_vqshlq_m_r_u16): Remove.
	(__arm_vqshlq_m_r_s16): Remove.
	(__arm_vqshlq_m_r_u32): Remove.
	(__arm_vqshlq_m_r_s32): Remove.
	(__arm_vqshlq_m_n_s8): Remove.
	(__arm_vqshlq_m_n_s32): Remove.
	(__arm_vqshlq_m_n_s16): Remove.
	(__arm_vqshlq_m_n_u8): Remove.
	(__arm_vqshlq_m_n_u32): Remove.
	(__arm_vqshlq_m_n_u16): Remove.
	(__arm_vqshlq_m_s8): Remove.
	(__arm_vqshlq_m_s32): Remove.
	(__arm_vqshlq_m_s16): Remove.
	(__arm_vqshlq_m_u8): Remove.
	(__arm_vqshlq_m_u32): Remove.
	(__arm_vqshlq_m_u16): Remove.
	(__arm_vqshlq): Remove.
	(__arm_vqshlq_r): Remove.
	(__arm_vqshlq_n): Remove.
	(__arm_vqshlq_m_r): Remove.
	(__arm_vqshlq_m_n): Remove.
	(__arm_vqshlq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   13 +
 gcc/config/arm/arm-mve-builtins-base.def |    4 +
 gcc/config/arm/arm-mve-builtins-base.h   |    2 +
 gcc/config/arm/arm_mve.h                 | 1552 +---------------------
 4 files changed, 49 insertions(+), 1522 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index a74119db917..4bebf86f784 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -128,6 +128,17 @@ namespace arm_mve {
     UNSPEC##_M_S, UNSPEC##_M_U, -1,					\
     UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
 
+  /* Helper for vshl builtins with only unspec codes, _m predicated
+     and _n and _r overrides.  */
+#define FUNCTION_WITH_M_N_R(NAME, UNSPEC) FUNCTION			\
+  (NAME, unspec_mve_function_exact_insn_vshl,				\
+   (UNSPEC##_S, UNSPEC##_U,						\
+    UNSPEC##_N_S, UNSPEC##_N_U,						\
+    UNSPEC##_M_S, UNSPEC##_M_U,						\
+    UNSPEC##_M_N_S, UNSPEC##_M_N_U,					\
+    UNSPEC##_M_R_S, UNSPEC##_M_R_U,					\
+    UNSPEC##_R_S, UNSPEC##_R_U))
+
   /* Helper for builtins with only unspec codes, _m predicated
      overrides, no _n and no floating-point version.  */
 #define FUNCTION_WITHOUT_N_NO_F(NAME, UNSPEC) FUNCTION			\
@@ -169,11 +180,13 @@ FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
 FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
 FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
+FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
 FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
+FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 9230837fd43..f2e40cda2af 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -32,11 +32,15 @@ DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r" forms do not support the "x" predicate
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index d9d45d1925a..5b62de6a922 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -37,11 +37,13 @@ extern const function_base *const vqaddq;
 extern const function_base *const vqdmulhq;
 extern const function_base *const vqrdmulhq;
 extern const function_base *const vqrshlq;
+extern const function_base *const vqshlq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
 extern const function_base *const vrhaddq;
 extern const function_base *const vrmulhq;
 extern const function_base *const vrshlq;
+extern const function_base *const vshlq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 175d9955c33..ad67dcfd024 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -60,7 +60,6 @@
 #define vshrq(__a, __imm) __arm_vshrq(__a, __imm)
 #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
 #define vcmpneq(__a, __b) __arm_vcmpneq(__a, __b)
-#define vshlq(__a, __b) __arm_vshlq(__a, __b)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
@@ -77,17 +76,12 @@
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
 #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
-#define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
-#define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
-#define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
 #define vminavq(__a, __b) __arm_vminavq(__a, __b)
 #define vminaq(__a, __b) __arm_vminaq(__a, __b)
 #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
 #define vmaxaq(__a, __b) __arm_vmaxaq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
-#define vshlq_n(__a, __imm) __arm_vshlq_n(__a, __imm)
 #define vrshrq(__a, __imm) __arm_vrshrq(__a, __imm)
-#define vqshlq_n(__a, __imm) __arm_vqshlq_n(__a, __imm)
 #define vcmpltq(__a, __b) __arm_vcmpltq(__a, __b)
 #define vcmpleq(__a, __b) __arm_vcmpleq(__a, __b)
 #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
@@ -148,8 +142,6 @@
 #define vaddvaq_p(__a, __b, __p) __arm_vaddvaq_p(__a, __b, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
-#define vshlq_m_r(__a, __b, __p) __arm_vshlq_m_r(__a, __b, __p)
-#define vqshlq_m_r(__a, __b, __p) __arm_vqshlq_m_r(__a, __b, __p)
 #define vminavq_p(__a, __b, __p) __arm_vminavq_p(__a, __b, __p)
 #define vminaq_m(__a, __b, __p) __arm_vminaq_m(__a, __b, __p)
 #define vmaxavq_p(__a, __b, __p) __arm_vmaxavq_p(__a, __b, __p)
@@ -216,7 +208,6 @@
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
-#define vshlq_m(__inactive, __a, __b, __p) __arm_vshlq_m(__inactive, __a, __b, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -246,10 +237,7 @@
 #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b, __c, __p)
 #define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
-#define vqshlq_m_n(__inactive, __a, __imm, __p) __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
-#define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive, __a, __b, __p)
 #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive, __a, __imm, __p)
-#define vshlq_m_n(__inactive, __a, __imm, __p) __arm_vshlq_m_n(__inactive, __a, __imm, __p)
 #define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive, __a, __imm, __p)
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmlaldavaq_p(__a, __b, __c, __p) __arm_vmlaldavaq_p(__a, __b, __c, __p)
@@ -376,8 +364,6 @@
 #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
 #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
 #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
-#define vshlq_x(__a, __b, __p) __arm_vshlq_x(__a, __b, __p)
-#define vshlq_x_n(__a, __imm, __p) __arm_vshlq_x_n(__a, __imm, __p)
 #define vrshrq_x(__a, __imm, __p) __arm_vrshrq_x(__a, __imm, __p)
 #define vshrq_x(__a, __imm, __p) __arm_vshrq_x(__a, __imm, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
@@ -623,12 +609,6 @@
 #define vcmpneq_u8(__a, __b) __arm_vcmpneq_u8(__a, __b)
 #define vcmpneq_u16(__a, __b) __arm_vcmpneq_u16(__a, __b)
 #define vcmpneq_u32(__a, __b) __arm_vcmpneq_u32(__a, __b)
-#define vshlq_s8(__a, __b) __arm_vshlq_s8(__a, __b)
-#define vshlq_s16(__a, __b) __arm_vshlq_s16(__a, __b)
-#define vshlq_s32(__a, __b) __arm_vshlq_s32(__a, __b)
-#define vshlq_u8(__a, __b) __arm_vshlq_u8(__a, __b)
-#define vshlq_u16(__a, __b) __arm_vshlq_u16(__a, __b)
-#define vshlq_u32(__a, __b) __arm_vshlq_u32(__a, __b)
 #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
 #define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
 #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
@@ -649,17 +629,12 @@
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
 #define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
 #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
-#define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
-#define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
-#define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
 #define vminavq_s8(__a, __b) __arm_vminavq_s8(__a, __b)
 #define vminaq_s8(__a, __b) __arm_vminaq_s8(__a, __b)
 #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
 #define vmaxaq_s8(__a, __b) __arm_vmaxaq_s8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
-#define vshlq_n_u8(__a,  __imm) __arm_vshlq_n_u8(__a,  __imm)
 #define vrshrq_n_u8(__a,  __imm) __arm_vrshrq_n_u8(__a,  __imm)
-#define vqshlq_n_u8(__a,  __imm) __arm_vqshlq_n_u8(__a,  __imm)
 #define vcmpneq_n_s8(__a, __b) __arm_vcmpneq_n_s8(__a, __b)
 #define vcmpltq_s8(__a, __b) __arm_vcmpltq_s8(__a, __b)
 #define vcmpltq_n_s8(__a, __b) __arm_vcmpltq_n_s8(__a, __b)
@@ -673,9 +648,6 @@
 #define vcmpeqq_n_s8(__a, __b) __arm_vcmpeqq_n_s8(__a, __b)
 #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
 #define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
-#define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
-#define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
-#define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
@@ -694,9 +666,7 @@
 #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
 #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
-#define vshlq_n_s8(__a,  __imm) __arm_vshlq_n_s8(__a,  __imm)
 #define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
-#define vqshlq_n_s8(__a,  __imm) __arm_vqshlq_n_s8(__a,  __imm)
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
@@ -717,17 +687,12 @@
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
 #define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
 #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
-#define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
-#define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
-#define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
 #define vminavq_s16(__a, __b) __arm_vminavq_s16(__a, __b)
 #define vminaq_s16(__a, __b) __arm_vminaq_s16(__a, __b)
 #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
 #define vmaxaq_s16(__a, __b) __arm_vmaxaq_s16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
-#define vshlq_n_u16(__a,  __imm) __arm_vshlq_n_u16(__a,  __imm)
 #define vrshrq_n_u16(__a,  __imm) __arm_vrshrq_n_u16(__a,  __imm)
-#define vqshlq_n_u16(__a,  __imm) __arm_vqshlq_n_u16(__a,  __imm)
 #define vcmpneq_n_s16(__a, __b) __arm_vcmpneq_n_s16(__a, __b)
 #define vcmpltq_s16(__a, __b) __arm_vcmpltq_s16(__a, __b)
 #define vcmpltq_n_s16(__a, __b) __arm_vcmpltq_n_s16(__a, __b)
@@ -741,9 +706,6 @@
 #define vcmpeqq_n_s16(__a, __b) __arm_vcmpeqq_n_s16(__a, __b)
 #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
 #define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
-#define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
-#define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
-#define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
@@ -762,9 +724,7 @@
 #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
 #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
-#define vshlq_n_s16(__a,  __imm) __arm_vshlq_n_s16(__a,  __imm)
 #define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
-#define vqshlq_n_s16(__a,  __imm) __arm_vqshlq_n_s16(__a,  __imm)
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
@@ -785,17 +745,12 @@
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
 #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
-#define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
-#define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
-#define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
 #define vminavq_s32(__a, __b) __arm_vminavq_s32(__a, __b)
 #define vminaq_s32(__a, __b) __arm_vminaq_s32(__a, __b)
 #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
 #define vmaxaq_s32(__a, __b) __arm_vmaxaq_s32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
-#define vshlq_n_u32(__a,  __imm) __arm_vshlq_n_u32(__a,  __imm)
 #define vrshrq_n_u32(__a,  __imm) __arm_vrshrq_n_u32(__a,  __imm)
-#define vqshlq_n_u32(__a,  __imm) __arm_vqshlq_n_u32(__a,  __imm)
 #define vcmpneq_n_s32(__a, __b) __arm_vcmpneq_n_s32(__a, __b)
 #define vcmpltq_s32(__a, __b) __arm_vcmpltq_s32(__a, __b)
 #define vcmpltq_n_s32(__a, __b) __arm_vcmpltq_n_s32(__a, __b)
@@ -809,9 +764,6 @@
 #define vcmpeqq_n_s32(__a, __b) __arm_vcmpeqq_n_s32(__a, __b)
 #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
 #define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
-#define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
-#define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
-#define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
@@ -830,9 +782,7 @@
 #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
-#define vshlq_n_s32(__a,  __imm) __arm_vshlq_n_s32(__a,  __imm)
 #define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
-#define vqshlq_n_s32(__a,  __imm) __arm_vqshlq_n_s32(__a,  __imm)
 #define vqmovntq_u16(__a, __b) __arm_vqmovntq_u16(__a, __b)
 #define vqmovnbq_u16(__a, __b) __arm_vqmovnbq_u16(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
@@ -1013,8 +963,6 @@
 #define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
-#define vshlq_m_r_u8(__a, __b, __p) __arm_vshlq_m_r_u8(__a, __b, __p)
-#define vqshlq_m_r_u8(__a, __b, __p) __arm_vqshlq_m_r_u8(__a, __b, __p)
 #define vminavq_p_s8(__a, __b, __p) __arm_vminavq_p_s8(__a, __b, __p)
 #define vminaq_m_s8(__a, __b, __p) __arm_vminaq_m_s8(__a, __b, __p)
 #define vmaxavq_p_s8(__a, __b, __p) __arm_vmaxavq_p_s8(__a, __b, __p)
@@ -1031,9 +979,7 @@
 #define vcmpgeq_m_n_s8(__a, __b, __p) __arm_vcmpgeq_m_n_s8(__a, __b, __p)
 #define vcmpeqq_m_s8(__a, __b, __p) __arm_vcmpeqq_m_s8(__a, __b, __p)
 #define vcmpeqq_m_n_s8(__a, __b, __p) __arm_vcmpeqq_m_n_s8(__a, __b, __p)
-#define vshlq_m_r_s8(__a, __b, __p) __arm_vshlq_m_r_s8(__a, __b, __p)
 #define vrev64q_m_s8(__inactive, __a, __p) __arm_vrev64q_m_s8(__inactive, __a, __p)
-#define vqshlq_m_r_s8(__a, __b, __p) __arm_vqshlq_m_r_s8(__a, __b, __p)
 #define vqnegq_m_s8(__inactive, __a, __p) __arm_vqnegq_m_s8(__inactive, __a, __p)
 #define vqabsq_m_s8(__inactive, __a, __p) __arm_vqabsq_m_s8(__inactive, __a, __p)
 #define vnegq_m_s8(__inactive, __a, __p) __arm_vnegq_m_s8(__inactive, __a, __p)
@@ -1092,8 +1038,6 @@
 #define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
-#define vshlq_m_r_u16(__a, __b, __p) __arm_vshlq_m_r_u16(__a, __b, __p)
-#define vqshlq_m_r_u16(__a, __b, __p) __arm_vqshlq_m_r_u16(__a, __b, __p)
 #define vminavq_p_s16(__a, __b, __p) __arm_vminavq_p_s16(__a, __b, __p)
 #define vminaq_m_s16(__a, __b, __p) __arm_vminaq_m_s16(__a, __b, __p)
 #define vmaxavq_p_s16(__a, __b, __p) __arm_vmaxavq_p_s16(__a, __b, __p)
@@ -1110,9 +1054,7 @@
 #define vcmpgeq_m_n_s16(__a, __b, __p) __arm_vcmpgeq_m_n_s16(__a, __b, __p)
 #define vcmpeqq_m_s16(__a, __b, __p) __arm_vcmpeqq_m_s16(__a, __b, __p)
 #define vcmpeqq_m_n_s16(__a, __b, __p) __arm_vcmpeqq_m_n_s16(__a, __b, __p)
-#define vshlq_m_r_s16(__a, __b, __p) __arm_vshlq_m_r_s16(__a, __b, __p)
 #define vrev64q_m_s16(__inactive, __a, __p) __arm_vrev64q_m_s16(__inactive, __a, __p)
-#define vqshlq_m_r_s16(__a, __b, __p) __arm_vqshlq_m_r_s16(__a, __b, __p)
 #define vqnegq_m_s16(__inactive, __a, __p) __arm_vqnegq_m_s16(__inactive, __a, __p)
 #define vqabsq_m_s16(__inactive, __a, __p) __arm_vqabsq_m_s16(__inactive, __a, __p)
 #define vnegq_m_s16(__inactive, __a, __p) __arm_vnegq_m_s16(__inactive, __a, __p)
@@ -1171,8 +1113,6 @@
 #define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
-#define vshlq_m_r_u32(__a, __b, __p) __arm_vshlq_m_r_u32(__a, __b, __p)
-#define vqshlq_m_r_u32(__a, __b, __p) __arm_vqshlq_m_r_u32(__a, __b, __p)
 #define vminavq_p_s32(__a, __b, __p) __arm_vminavq_p_s32(__a, __b, __p)
 #define vminaq_m_s32(__a, __b, __p) __arm_vminaq_m_s32(__a, __b, __p)
 #define vmaxavq_p_s32(__a, __b, __p) __arm_vmaxavq_p_s32(__a, __b, __p)
@@ -1189,9 +1129,7 @@
 #define vcmpgeq_m_n_s32(__a, __b, __p) __arm_vcmpgeq_m_n_s32(__a, __b, __p)
 #define vcmpeqq_m_s32(__a, __b, __p) __arm_vcmpeqq_m_s32(__a, __b, __p)
 #define vcmpeqq_m_n_s32(__a, __b, __p) __arm_vcmpeqq_m_n_s32(__a, __b, __p)
-#define vshlq_m_r_s32(__a, __b, __p) __arm_vshlq_m_r_s32(__a, __b, __p)
 #define vrev64q_m_s32(__inactive, __a, __p) __arm_vrev64q_m_s32(__inactive, __a, __p)
-#define vqshlq_m_r_s32(__a, __b, __p) __arm_vqshlq_m_r_s32(__a, __b, __p)
 #define vqnegq_m_s32(__inactive, __a, __p) __arm_vqnegq_m_s32(__inactive, __a, __p)
 #define vqabsq_m_s32(__inactive, __a, __p) __arm_vqabsq_m_s32(__inactive, __a, __p)
 #define vnegq_m_s32(__inactive, __a, __p) __arm_vnegq_m_s32(__inactive, __a, __p)
@@ -1429,26 +1367,20 @@
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vabavq_p_s8(__a, __b, __c, __p) __arm_vabavq_p_s8(__a, __b, __c, __p)
 #define vsriq_m_n_u8(__a, __b,  __imm, __p) __arm_vsriq_m_n_u8(__a, __b,  __imm, __p)
-#define vshlq_m_u8(__inactive, __a, __b, __p) __arm_vshlq_m_u8(__inactive, __a, __b, __p)
 #define vabavq_p_u8(__a, __b, __c, __p) __arm_vabavq_p_u8(__a, __b, __c, __p)
-#define vshlq_m_s8(__inactive, __a, __b, __p) __arm_vshlq_m_s8(__inactive, __a, __b, __p)
 #define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s16(__a, __b,  __imm, __p) __arm_vsriq_m_n_s16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s16(__inactive, __a,  __imm, __p)
 #define vabavq_p_s16(__a, __b, __c, __p) __arm_vabavq_p_s16(__a, __b, __c, __p)
 #define vsriq_m_n_u16(__a, __b,  __imm, __p) __arm_vsriq_m_n_u16(__a, __b,  __imm, __p)
-#define vshlq_m_u16(__inactive, __a, __b, __p) __arm_vshlq_m_u16(__inactive, __a, __b, __p)
 #define vabavq_p_u16(__a, __b, __c, __p) __arm_vabavq_p_u16(__a, __b, __c, __p)
-#define vshlq_m_s16(__inactive, __a, __b, __p) __arm_vshlq_m_s16(__inactive, __a, __b, __p)
 #define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s32(__a, __b,  __imm, __p) __arm_vsriq_m_n_s32(__a, __b,  __imm, __p)
 #define vqshluq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vabavq_p_s32(__a, __b, __c, __p) __arm_vabavq_p_s32(__a, __b, __c, __p)
 #define vsriq_m_n_u32(__a, __b,  __imm, __p) __arm_vsriq_m_n_u32(__a, __b,  __imm, __p)
-#define vshlq_m_u32(__inactive, __a, __b, __p) __arm_vshlq_m_u32(__inactive, __a, __b, __p)
 #define vabavq_p_u32(__a, __b, __c, __p) __arm_vabavq_p_u32(__a, __b, __c, __p)
-#define vshlq_m_s32(__inactive, __a, __b, __p) __arm_vshlq_m_s32(__inactive, __a, __b, __p)
 #define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
 #define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
 #define vbicq_m_s16(__inactive, __a, __b, __p) __arm_vbicq_m_s16(__inactive, __a, __b, __p)
@@ -1572,30 +1504,12 @@
 #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
-#define vqshlq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
-#define vqshlq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
-#define vqshlq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
-#define vqshlq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vqshlq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vqshlq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vqshlq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vqshlq_m_s8(__inactive, __a, __b, __p) __arm_vqshlq_m_s8(__inactive, __a, __b, __p)
-#define vqshlq_m_s32(__inactive, __a, __b, __p) __arm_vqshlq_m_s32(__inactive, __a, __b, __p)
-#define vqshlq_m_s16(__inactive, __a, __b, __p) __arm_vqshlq_m_s16(__inactive, __a, __b, __p)
-#define vqshlq_m_u8(__inactive, __a, __b, __p) __arm_vqshlq_m_u8(__inactive, __a, __b, __p)
-#define vqshlq_m_u32(__inactive, __a, __b, __p) __arm_vqshlq_m_u32(__inactive, __a, __b, __p)
-#define vqshlq_m_u16(__inactive, __a, __b, __p) __arm_vqshlq_m_u16(__inactive, __a, __b, __p)
 #define vrshrq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
 #define vrshrq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_s8(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_s32(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_s16(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vshlq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vshlq_m_n_u16(__inactive, __a,  __imm, __p)
 #define vshrq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vshrq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vshrq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s16(__inactive, __a,  __imm, __p)
@@ -2146,18 +2060,6 @@
 #define vshlltq_x_n_s16(__a,  __imm, __p) __arm_vshlltq_x_n_s16(__a,  __imm, __p)
 #define vshlltq_x_n_u8(__a,  __imm, __p) __arm_vshlltq_x_n_u8(__a,  __imm, __p)
 #define vshlltq_x_n_u16(__a,  __imm, __p) __arm_vshlltq_x_n_u16(__a,  __imm, __p)
-#define vshlq_x_s8(__a, __b, __p) __arm_vshlq_x_s8(__a, __b, __p)
-#define vshlq_x_s16(__a, __b, __p) __arm_vshlq_x_s16(__a, __b, __p)
-#define vshlq_x_s32(__a, __b, __p) __arm_vshlq_x_s32(__a, __b, __p)
-#define vshlq_x_u8(__a, __b, __p) __arm_vshlq_x_u8(__a, __b, __p)
-#define vshlq_x_u16(__a, __b, __p) __arm_vshlq_x_u16(__a, __b, __p)
-#define vshlq_x_u32(__a, __b, __p) __arm_vshlq_x_u32(__a, __b, __p)
-#define vshlq_x_n_s8(__a,  __imm, __p) __arm_vshlq_x_n_s8(__a,  __imm, __p)
-#define vshlq_x_n_s16(__a,  __imm, __p) __arm_vshlq_x_n_s16(__a,  __imm, __p)
-#define vshlq_x_n_s32(__a,  __imm, __p) __arm_vshlq_x_n_s32(__a,  __imm, __p)
-#define vshlq_x_n_u8(__a,  __imm, __p) __arm_vshlq_x_n_u8(__a,  __imm, __p)
-#define vshlq_x_n_u16(__a,  __imm, __p) __arm_vshlq_x_n_u16(__a,  __imm, __p)
-#define vshlq_x_n_u32(__a,  __imm, __p) __arm_vshlq_x_n_u32(__a,  __imm, __p)
 #define vrshrq_x_n_s8(__a,  __imm, __p) __arm_vrshrq_x_n_s8(__a,  __imm, __p)
 #define vrshrq_x_n_s16(__a,  __imm, __p) __arm_vrshrq_x_n_s16(__a,  __imm, __p)
 #define vrshrq_x_n_s32(__a,  __imm, __p) __arm_vrshrq_x_n_s32(__a,  __imm, __p)
@@ -3000,48 +2902,6 @@ __arm_vcmpneq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vcmpneq_v4si ((int32x4_t)__a, (int32x4_t)__b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vshlq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vshlq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vshlq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_u8 (uint8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vshlq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_u16 (uint16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vshlq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_u32 (uint32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vshlq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -3184,27 +3044,6 @@ __arm_vaddvaq_u8 (uint32_t __a, uint8x16_t __b)
   return __builtin_mve_vaddvaq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqshlq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_u8 (uint8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s8 (uint8_t __a, int8x16_t __b)
@@ -3240,13 +3079,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_u8 (uint8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_uv16qi (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_u8 (uint8x16_t __a, const int __imm)
@@ -3254,13 +3086,6 @@ __arm_vrshrq_n_u8 (uint8x16_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_uv16qi (__a, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_u8 (uint8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_uv16qi (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s8 (int8x16_t __a, int8_t __b)
@@ -3352,27 +3177,6 @@ __arm_vaddvq_p_s8 (int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vaddvq_p_sv16qi (__a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_s8 (int8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqshlq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
@@ -3499,13 +3303,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
   return __builtin_mve_vaddvaq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_s8 (int8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_sv16qi (__a, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_s8 (int8x16_t __a, const int __imm)
@@ -3513,13 +3310,6 @@ __arm_vrshrq_n_s8 (int8x16_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_sv16qi (__a, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_s8 (int8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_sv16qi (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -3662,27 +3452,6 @@ __arm_vaddvaq_u16 (uint32_t __a, uint16x8_t __b)
   return __builtin_mve_vaddvaq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqshlq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_u16 (uint16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s16 (uint16_t __a, int16x8_t __b)
@@ -3718,13 +3487,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_u16 (uint16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_uv8hi (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_u16 (uint16x8_t __a, const int __imm)
@@ -3732,13 +3494,6 @@ __arm_vrshrq_n_u16 (uint16x8_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_uv8hi (__a, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_u16 (uint16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_uv8hi (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s16 (int16x8_t __a, int16_t __b)
@@ -3830,27 +3585,6 @@ __arm_vaddvq_p_s16 (int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vaddvq_p_sv8hi (__a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_s16 (int16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqshlq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
@@ -3977,13 +3711,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
   return __builtin_mve_vaddvaq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_sv8hi (__a, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_s16 (int16x8_t __a, const int __imm)
@@ -3991,13 +3718,6 @@ __arm_vrshrq_n_s16 (int16x8_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_sv8hi (__a, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_sv8hi (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -4140,27 +3860,6 @@ __arm_vaddvaq_u32 (uint32_t __a, uint32x4_t __b)
   return __builtin_mve_vaddvaq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqshlq_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_u32 (uint32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_s32 (uint32_t __a, int32x4_t __b)
@@ -4196,13 +3895,6 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_u32 (uint32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_uv4si (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_u32 (uint32x4_t __a, const int __imm)
@@ -4210,13 +3902,6 @@ __arm_vrshrq_n_u32 (uint32x4_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_uv4si (__a, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_u32 (uint32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_uv4si (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s32 (int32x4_t __a, int32_t __b)
@@ -4308,27 +3993,6 @@ __arm_vaddvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vaddvq_p_sv4si (__a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vshlq_r_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqshlq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqshlq_r_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
@@ -4455,13 +4119,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
   return __builtin_mve_vaddvaq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vshlq_n_sv4si (__a, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_n_s32 (int32x4_t __a, const int __imm)
@@ -4469,13 +4126,6 @@ __arm_vrshrq_n_s32 (int32x4_t __a, const int __imm)
   return __builtin_mve_vrshrq_n_sv4si (__a, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vqshlq_n_sv4si (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovntq_u16 (uint8x16_t __a, uint16x8_t __b)
@@ -5272,20 +4922,6 @@ __arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s8 (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -5398,13 +5034,6 @@ __arm_vcmpeqq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
   return __builtin_mve_vcmpeqq_m_n_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -5412,13 +5041,6 @@ __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_sv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -5826,20 +5448,6 @@ __arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_uv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s16 (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -5952,13 +5560,6 @@ __arm_vcmpeqq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
   return __builtin_mve_vcmpeqq_m_n_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -5966,13 +5567,6 @@ __arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -6379,20 +5973,6 @@ __arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p_s32 (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -6505,13 +6085,6 @@ __arm_vcmpeqq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vcmpeqq_m_n_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_r_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -6519,13 +6092,6 @@ __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_r_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -7527,13 +7093,6 @@ __arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_
   return __builtin_mve_vsriq_m_n_uv16qi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
@@ -7541,13 +7100,6 @@ __arm_vabavq_p_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __
   return __builtin_mve_vabavq_p_uv16qi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -7576,13 +7128,6 @@ __arm_vsriq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16
   return __builtin_mve_vsriq_m_n_uv8hi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
@@ -7590,13 +7135,6 @@ __arm_vabavq_p_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t _
   return __builtin_mve_vabavq_p_uv8hi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -7625,13 +7163,6 @@ __arm_vsriq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16
   return __builtin_mve_vsriq_m_n_uv4si (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_uv4si (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
@@ -7639,13 +7170,6 @@ __arm_vabavq_p_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t _
   return __builtin_mve_vabavq_p_uv4si (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_sv4si (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -8507,90 +8031,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve
   return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_sv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_sv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_sv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshlq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -8633,48 +8073,6 @@ __arm_vrshrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mv
   return __builtin_mve_vrshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_sv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_sv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_sv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -11981,163 +11379,79 @@ __arm_vrev64q_x_s32 (int32x4_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshllbq_m_n_sv16qi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshllbq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshllbq_m_n_sv8hi (__arm_vuninitializedq_s32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshllbq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshllbq_m_n_uv16qi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshllbq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshllbq_m_n_uv8hi (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlltq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlltq_m_n_sv16qi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlltq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlltq_m_n_sv8hi (__arm_vuninitializedq_s32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlltq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlltq_m_n_uv16qi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlltq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
+__arm_vrev64q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
+  return __builtin_mve_vrev64q_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vrev64q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
+  return __builtin_mve_vrev64q_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
+__arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
+  return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
 }
 
-__extension__ extern __inline uint8x16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_u8 (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
+__arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
+  return __builtin_mve_vshllbq_m_n_sv16qi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_u16 (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vshllbq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
+  return __builtin_mve_vshllbq_m_n_sv8hi (__arm_vuninitializedq_s32 (), __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_u32 (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
+__arm_vshllbq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
+  return __builtin_mve_vshllbq_m_n_uv16qi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
+__arm_vshllbq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a, __imm, __p);
+  return __builtin_mve_vshllbq_m_n_uv8hi (__arm_vuninitializedq_u32 (), __a, __imm, __p);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
+__arm_vshlltq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
+  return __builtin_mve_vshlltq_m_n_sv16qi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshlq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
+__arm_vshlltq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __imm, __p);
+  return __builtin_mve_vshlltq_m_n_sv8hi (__arm_vuninitializedq_s32 (), __a, __imm, __p);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
+__arm_vshlltq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
+  return __builtin_mve_vshlltq_m_n_uv16qi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
+__arm_vshlltq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
 {
-  return __builtin_mve_vshlq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __imm, __p);
+  return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (), __a, __imm, __p);
 }
 
 __extension__ extern __inline int8x16_t
@@ -16275,48 +15589,6 @@ __arm_vcmpneq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vcmpneq_u32 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vshlq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vshlq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vshlq_s32 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (uint8x16_t __a, int8x16_t __b)
-{
- return __arm_vshlq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (uint16x8_t __a, int16x8_t __b)
-{
- return __arm_vshlq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq (uint32x4_t __a, int32x4_t __b)
-{
- return __arm_vshlq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint8x16_t __a, uint8x16_t __b)
@@ -16457,27 +15729,6 @@ __arm_vaddvaq (uint32_t __a, uint8x16_t __b)
  return __arm_vaddvaq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (uint8x16_t __a, int32_t __b)
-{
- return __arm_vshlq_r_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (uint8x16_t __a, int8x16_t __b)
-{
- return __arm_vqshlq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (uint8x16_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq (uint8_t __a, int8x16_t __b)
@@ -16513,13 +15764,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
  return __arm_vbrsrq_n_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (uint8x16_t __a, const int __imm)
-{
- return __arm_vshlq_n_u8 (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (uint8x16_t __a, const int __imm)
@@ -16527,13 +15771,6 @@ __arm_vrshrq (uint8x16_t __a, const int __imm)
  return __arm_vrshrq_n_u8 (__a, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (uint8x16_t __a, const int __imm)
-{
- return __arm_vqshlq_n_u8 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int8x16_t __a, int8_t __b)
@@ -16625,27 +15862,6 @@ __arm_vaddvq_p (int8x16_t __a, mve_pred16_t __p)
  return __arm_vaddvq_p_s8 (__a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (int8x16_t __a, int32_t __b)
-{
- return __arm_vshlq_r_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqshlq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (int8x16_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int8x16_t __a, int8x16_t __b)
@@ -16772,13 +15988,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
  return __arm_vaddvaq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (int8x16_t __a, const int __imm)
-{
- return __arm_vshlq_n_s8 (__a, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (int8x16_t __a, const int __imm)
@@ -16786,13 +15995,6 @@ __arm_vrshrq (int8x16_t __a, const int __imm)
  return __arm_vrshrq_n_s8 (__a, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (int8x16_t __a, const int __imm)
-{
- return __arm_vqshlq_n_s8 (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint16x8_t __a, uint16x8_t __b)
@@ -16933,27 +16135,6 @@ __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
  return __arm_vaddvaq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (uint16x8_t __a, int32_t __b)
-{
- return __arm_vshlq_r_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
-{
- return __arm_vqshlq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq (uint16_t __a, int16x8_t __b)
@@ -16989,13 +16170,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
  return __arm_vbrsrq_n_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (uint16x8_t __a, const int __imm)
-{
- return __arm_vshlq_n_u16 (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (uint16x8_t __a, const int __imm)
@@ -17003,13 +16177,6 @@ __arm_vrshrq (uint16x8_t __a, const int __imm)
  return __arm_vrshrq_n_u16 (__a, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (uint16x8_t __a, const int __imm)
-{
- return __arm_vqshlq_n_u16 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int16x8_t __a, int16_t __b)
@@ -17101,27 +16268,6 @@ __arm_vaddvq_p (int16x8_t __a, mve_pred16_t __p)
  return __arm_vaddvq_p_s16 (__a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (int16x8_t __a, int32_t __b)
-{
- return __arm_vshlq_r_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqshlq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (int16x8_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int16x8_t __a, int16x8_t __b)
@@ -17248,13 +16394,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
  return __arm_vaddvaq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (int16x8_t __a, const int __imm)
-{
- return __arm_vshlq_n_s16 (__a, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (int16x8_t __a, const int __imm)
@@ -17262,13 +16401,6 @@ __arm_vrshrq (int16x8_t __a, const int __imm)
  return __arm_vrshrq_n_s16 (__a, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (int16x8_t __a, const int __imm)
-{
- return __arm_vqshlq_n_s16 (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint32x4_t __a, uint32x4_t __b)
@@ -17409,27 +16541,6 @@ __arm_vaddvaq (uint32_t __a, uint32x4_t __b)
  return __arm_vaddvaq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (uint32x4_t __a, int32_t __b)
-{
- return __arm_vshlq_r_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (uint32x4_t __a, int32x4_t __b)
-{
- return __arm_vqshlq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (uint32x4_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq (uint32_t __a, int32x4_t __b)
@@ -17465,13 +16576,6 @@ __arm_vbrsrq (uint32x4_t __a, int32_t __b)
  return __arm_vbrsrq_n_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (uint32x4_t __a, const int __imm)
-{
- return __arm_vshlq_n_u32 (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (uint32x4_t __a, const int __imm)
@@ -17479,13 +16583,6 @@ __arm_vrshrq (uint32x4_t __a, const int __imm)
  return __arm_vrshrq_n_u32 (__a, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (uint32x4_t __a, const int __imm)
-{
- return __arm_vqshlq_n_u32 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int32x4_t __a, int32_t __b)
@@ -17577,27 +16674,6 @@ __arm_vaddvq_p (int32x4_t __a, mve_pred16_t __p)
  return __arm_vaddvq_p_s32 (__a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_r (int32x4_t __a, int32_t __b)
-{
- return __arm_vshlq_r_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqshlq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_r (int32x4_t __a, int32_t __b)
-{
- return __arm_vqshlq_r_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int32x4_t __a, int32x4_t __b)
@@ -17724,13 +16800,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
  return __arm_vaddvaq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_n (int32x4_t __a, const int __imm)
-{
- return __arm_vshlq_n_s32 (__a, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq (int32x4_t __a, const int __imm)
@@ -17738,13 +16807,6 @@ __arm_vrshrq (int32x4_t __a, const int __imm)
  return __arm_vrshrq_n_s32 (__a, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_n (int32x4_t __a, const int __imm)
-{
- return __arm_vqshlq_n_s32 (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovntq (uint8x16_t __a, uint16x8_t __b)
@@ -18501,20 +17563,6 @@ __arm_vsliq (uint8x16_t __a, uint8x16_t __b, const int __imm)
  return __arm_vsliq_n_u8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_r_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -18627,13 +17675,6 @@ __arm_vcmpeqq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
  return __arm_vcmpeqq_m_n_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_r_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -18641,13 +17682,6 @@ __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
  return __arm_vrev64q_m_s8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -19054,20 +18088,6 @@ __arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
  return __arm_vsliq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_r_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -19175,16 +18195,9 @@ __arm_vcmpeqq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
+__arm_vcmpeqq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
 {
- return __arm_vshlq_m_r_s16 (__a, __b, __p);
+ return __arm_vcmpeqq_m_n_s16 (__a, __b, __p);
 }
 
 __extension__ extern __inline int16x8_t
@@ -19194,13 +18207,6 @@ __arm_vrev64q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
  return __arm_vrev64q_m_s16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -19607,20 +18613,6 @@ __arm_vsliq (uint32x4_t __a, uint32x4_t __b, const int __imm)
  return __arm_vsliq_n_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_r_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vminavq_p (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -19733,13 +18725,6 @@ __arm_vcmpeqq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vcmpeqq_m_n_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_r_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -19747,13 +18732,6 @@ __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
  return __arm_vrev64q_m_s32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_r_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqnegq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -20755,13 +19733,6 @@ __arm_vsriq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u8 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_u8 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
@@ -20769,13 +19740,6 @@ __arm_vabavq_p (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
  return __arm_vabavq_p_u8 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_s8 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -20804,13 +19768,6 @@ __arm_vsriq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u16 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
@@ -20818,13 +19775,6 @@ __arm_vabavq_p (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
  return __arm_vabavq_p_u16 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -20853,13 +19803,6 @@ __arm_vsriq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u32 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_u32 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_p (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
@@ -20867,13 +19810,6 @@ __arm_vabavq_p (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
  return __arm_vabavq_p_u32 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_m_s32 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -21735,90 +20671,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pre
  return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_s8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_s32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_s16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m_n (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqshlq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -21861,48 +20713,6 @@ __arm_vrshrq_m (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred
  return __arm_vrshrq_m_n_u16 (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_s8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_s32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_s16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_m_n (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -24787,90 +23597,6 @@ __arm_vshlltq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
  return __arm_vshlltq_x_n_u16 (__a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vshlq_x_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_s8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_s16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_s32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlq_x_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlq_x_n_u32 (__a, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -28165,16 +26891,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -28434,24 +27150,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce2(p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
-#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vshlltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -28490,34 +27188,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
 
-#define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vqshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28756,24 +27426,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
-#define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_m_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -30170,44 +28822,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vqshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -30223,24 +28837,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -30588,15 +29184,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_m_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vrev64q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -30607,15 +29194,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -31514,16 +30092,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vshlq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
@@ -31547,15 +30115,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlltq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlltq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
 
-#define __arm_vshlq_x_n(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vdwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u8 ((uint32_t) __p1, p2, p3, p4), \
@@ -31771,27 +30330,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
 
-#define __arm_vqshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vqshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
-
-#define __arm_vqshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -32044,36 +30582,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshluq_m_n_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshluq_m_n_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
 
-#define __arm_vshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
-
-#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
 #define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (10 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:58   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 14/23] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (10 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmaxq and vminq so that they use the same pattern.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MAX_MIN_SU): New.
	(max_min_su_str): New.
	(max_min_supf): New.
	* config/arm/mve.md (mve_vmaxq_s<mode>, mve_vmaxq_u<mode>)
	(mve_vminq_s<mode>, mve_vminq_u<mode>): Merge into ...
	(mve_<max_min_su_str>q_<max_min_supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 11 ++++++++++
 gcc/config/arm/mve.md       | 44 +++++--------------------------------
 2 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3133642ea82..9ff61e0573b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -330,6 +330,9 @@ (define_code_iterator FCVT [unsigned_float float])
 ;; Saturating addition, subtraction
 (define_code_iterator SSPLUSMINUS [ss_plus ss_minus])
 
+;; Max/Min iterator, to factorize MVE patterns
+(define_code_iterator MAX_MIN_SU [smax umax smin umin])
+
 ;; MVE integer binary operations.
 (define_code_iterator MVE_INT_BINARY_RTX [plus minus mult])
 
@@ -1271,6 +1274,14 @@ (define_code_attr float_sup [(unsigned_float "u") (float "s")])
 
 (define_code_attr float_SUP [(unsigned_float "U") (float "S")])
 
+;; max/min for MVE
+(define_code_attr max_min_su_str [(smax "vmax") (umax "vmax") (smin "vmin") (umin "vmin")])
+
+(define_code_attr max_min_supf [
+		 (smax "s") (umax "u")
+		 (smin "s") (umin "u")
+		 ])
+
 ;;----------------------------------------------------------------------------
 ;; Int attributes
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index c8cb4e430ac..44409b40e5f 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1106,29 +1106,20 @@ (define_insn "mve_vmaxavq_s<mode>"
 ])
 
 ;;
-;; [vmaxq_u, vmaxq_s])
+;; [vmaxq_u, vmaxq_s]
+;; [vminq_s, vminq_u]
 ;;
-(define_insn "mve_vmaxq_s<mode>"
+(define_insn "mve_<max_min_su_str>q_<max_min_supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(smax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+	(MAX_MIN_SU:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
 		    (match_operand:MVE_2 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vmax.%#<V_s_elem>\t%q0, %q1, %q2"
+  "<max_min_su_str>.<max_min_supf>%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
-(define_insn "mve_vmaxq_u<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(umax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmax.%#<V_u_elem>\t%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
 
 ;;
 ;; [vmaxvq_u, vmaxvq_s])
@@ -1175,31 +1166,6 @@ (define_insn "mve_vminavq_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vminq_s, vminq_u])
-;;
-(define_insn "mve_vminq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(smin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmin.%#<V_s_elem>\t%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-(define_insn "mve_vminq_u<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(umin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmin.%#<V_u_elem>\t%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vminvq_u, vminvq_s])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 14/23] arm: [MVE intrinsics] rework vmaxq vminq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (11 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 10:59   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape Christophe Lyon
                   ` (9 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmaxq and vminq using the new MVE builtins framework.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_WITH_RTX_M_NO_F): New.
	(vmaxq, vminq): New.
	* config/arm/arm-mve-builtins-base.def (vmaxq, vminq): New.
	* config/arm/arm-mve-builtins-base.h (vmaxq, vminq): New.
	* config/arm/arm_mve.h (vminq): Remove.
	(vmaxq): Remove.
	(vmaxq_m): Remove.
	(vminq_m): Remove.
	(vminq_x): Remove.
	(vmaxq_x): Remove.
	(vminq_u8): Remove.
	(vmaxq_u8): Remove.
	(vminq_s8): Remove.
	(vmaxq_s8): Remove.
	(vminq_u16): Remove.
	(vmaxq_u16): Remove.
	(vminq_s16): Remove.
	(vmaxq_s16): Remove.
	(vminq_u32): Remove.
	(vmaxq_u32): Remove.
	(vminq_s32): Remove.
	(vmaxq_s32): Remove.
	(vmaxq_m_s8): Remove.
	(vmaxq_m_s32): Remove.
	(vmaxq_m_s16): Remove.
	(vmaxq_m_u8): Remove.
	(vmaxq_m_u32): Remove.
	(vmaxq_m_u16): Remove.
	(vminq_m_s8): Remove.
	(vminq_m_s32): Remove.
	(vminq_m_s16): Remove.
	(vminq_m_u8): Remove.
	(vminq_m_u32): Remove.
	(vminq_m_u16): Remove.
	(vminq_x_s8): Remove.
	(vminq_x_s16): Remove.
	(vminq_x_s32): Remove.
	(vminq_x_u8): Remove.
	(vminq_x_u16): Remove.
	(vminq_x_u32): Remove.
	(vmaxq_x_s8): Remove.
	(vmaxq_x_s16): Remove.
	(vmaxq_x_s32): Remove.
	(vmaxq_x_u8): Remove.
	(vmaxq_x_u16): Remove.
	(vmaxq_x_u32): Remove.
	(__arm_vminq_u8): Remove.
	(__arm_vmaxq_u8): Remove.
	(__arm_vminq_s8): Remove.
	(__arm_vmaxq_s8): Remove.
	(__arm_vminq_u16): Remove.
	(__arm_vmaxq_u16): Remove.
	(__arm_vminq_s16): Remove.
	(__arm_vmaxq_s16): Remove.
	(__arm_vminq_u32): Remove.
	(__arm_vmaxq_u32): Remove.
	(__arm_vminq_s32): Remove.
	(__arm_vmaxq_s32): Remove.
	(__arm_vmaxq_m_s8): Remove.
	(__arm_vmaxq_m_s32): Remove.
	(__arm_vmaxq_m_s16): Remove.
	(__arm_vmaxq_m_u8): Remove.
	(__arm_vmaxq_m_u32): Remove.
	(__arm_vmaxq_m_u16): Remove.
	(__arm_vminq_m_s8): Remove.
	(__arm_vminq_m_s32): Remove.
	(__arm_vminq_m_s16): Remove.
	(__arm_vminq_m_u8): Remove.
	(__arm_vminq_m_u32): Remove.
	(__arm_vminq_m_u16): Remove.
	(__arm_vminq_x_s8): Remove.
	(__arm_vminq_x_s16): Remove.
	(__arm_vminq_x_s32): Remove.
	(__arm_vminq_x_u8): Remove.
	(__arm_vminq_x_u16): Remove.
	(__arm_vminq_x_u32): Remove.
	(__arm_vmaxq_x_s8): Remove.
	(__arm_vmaxq_x_s16): Remove.
	(__arm_vmaxq_x_s32): Remove.
	(__arm_vmaxq_x_u8): Remove.
	(__arm_vmaxq_x_u16): Remove.
	(__arm_vmaxq_x_u32): Remove.
	(__arm_vminq): Remove.
	(__arm_vmaxq): Remove.
	(__arm_vmaxq_m): Remove.
	(__arm_vminq_m): Remove.
	(__arm_vminq_x): Remove.
	(__arm_vmaxq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  11 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 628 -----------------------
 4 files changed, 15 insertions(+), 628 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 4bebf86f784..1839d5cb1a5 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -110,6 +110,15 @@ namespace arm_mve {
     UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,				\
     UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
 
+  /* Helper for builtins with RTX codes, _m predicated override, but
+     no floating-point versions.  */
+#define FUNCTION_WITH_RTX_M_NO_F(NAME, RTX_S, RTX_U, UNSPEC) FUNCTION	\
+  (NAME, unspec_based_mve_function_exact_insn,				\
+   (RTX_S, RTX_U, UNKNOWN,						\
+    -1, -1, -1,								\
+    UNSPEC##_M_S, UNSPEC##_M_U, -1,					\
+    -1, -1, -1))
+
   /* Helper for builtins without RTX codes, no _m predicated and no _n
      overrides.  */
 #define FUNCTION_WITHOUT_M_N(NAME, UNSPEC) FUNCTION			\
@@ -173,6 +182,8 @@ FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
 FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
 FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
+FUNCTION_WITH_RTX_M_NO_F (vmaxq, SMAX, UMAX, VMAXQ)
+FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index f2e40cda2af..3b42bf46e81 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -25,6 +25,8 @@ DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
 DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vminq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 5b62de6a922..81d10f4a8f4 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -30,6 +30,8 @@ extern const function_base *const vcreateq;
 extern const function_base *const veorq;
 extern const function_base *const vhaddq;
 extern const function_base *const vhsubq;
+extern const function_base *const vmaxq;
+extern const function_base *const vminq;
 extern const function_base *const vmulhq;
 extern const function_base *const vmulq;
 extern const function_base *const vorrq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ad67dcfd024..5fbea52c8ef 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -65,9 +65,7 @@
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
 #define vmladavq(__a, __b) __arm_vmladavq(__a, __b)
 #define vminvq(__a, __b) __arm_vminvq(__a, __b)
-#define vminq(__a, __b) __arm_vminq(__a, __b)
 #define vmaxvq(__a, __b) __arm_vmaxvq(__a, __b)
-#define vmaxq(__a, __b) __arm_vmaxq(__a, __b)
 #define vcmphiq(__a, __b) __arm_vcmphiq(__a, __b)
 #define vcmpeqq(__a, __b) __arm_vcmpeqq(__a, __b)
 #define vcmpcsq(__a, __b) __arm_vcmpcsq(__a, __b)
@@ -214,8 +212,6 @@
 #define vcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot270_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m(__inactive, __a, __b, __p)
-#define vmaxq_m(__inactive, __a, __b, __p) __arm_vmaxq_m(__inactive, __a, __b, __p)
-#define vminq_m(__inactive, __a, __b, __p) __arm_vminq_m(__inactive, __a, __b, __p)
 #define vmladavaq_p(__a, __b, __c, __p) __arm_vmladavaq_p(__a, __b, __c, __p)
 #define vmladavaxq_p(__a, __b, __c, __p) __arm_vmladavaxq_p(__a, __b, __c, __p)
 #define vmlaq_m(__a, __b, __c, __p) __arm_vmlaq_m(__a, __b, __c, __p)
@@ -339,8 +335,6 @@
 #define viwdupq_x_u8(__a, __b, __imm, __p) __arm_viwdupq_x_u8(__a, __b, __imm, __p)
 #define viwdupq_x_u16(__a, __b, __imm, __p) __arm_viwdupq_x_u16(__a, __b, __imm, __p)
 #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a, __b, __imm, __p)
-#define vminq_x(__a, __b, __p) __arm_vminq_x(__a, __b, __p)
-#define vmaxq_x(__a, __b, __p) __arm_vmaxq_x(__a, __b, __p)
 #define vabsq_x(__a, __p) __arm_vabsq_x(__a, __p)
 #define vclsq_x(__a, __p) __arm_vclsq_x(__a, __p)
 #define vclzq_x(__a, __p) __arm_vclzq_x(__a, __p)
@@ -614,9 +608,7 @@
 #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
 #define vmladavq_u8(__a, __b) __arm_vmladavq_u8(__a, __b)
 #define vminvq_u8(__a, __b) __arm_vminvq_u8(__a, __b)
-#define vminq_u8(__a, __b) __arm_vminq_u8(__a, __b)
 #define vmaxvq_u8(__a, __b) __arm_vmaxvq_u8(__a, __b)
-#define vmaxq_u8(__a, __b) __arm_vmaxq_u8(__a, __b)
 #define vcmpneq_n_u8(__a, __b) __arm_vcmpneq_n_u8(__a, __b)
 #define vcmphiq_u8(__a, __b) __arm_vcmphiq_u8(__a, __b)
 #define vcmphiq_n_u8(__a, __b) __arm_vcmphiq_n_u8(__a, __b)
@@ -656,9 +648,7 @@
 #define vmladavxq_s8(__a, __b) __arm_vmladavxq_s8(__a, __b)
 #define vmladavq_s8(__a, __b) __arm_vmladavq_s8(__a, __b)
 #define vminvq_s8(__a, __b) __arm_vminvq_s8(__a, __b)
-#define vminq_s8(__a, __b) __arm_vminq_s8(__a, __b)
 #define vmaxvq_s8(__a, __b) __arm_vmaxvq_s8(__a, __b)
-#define vmaxq_s8(__a, __b) __arm_vmaxq_s8(__a, __b)
 #define vhcaddq_rot90_s8(__a, __b) __arm_vhcaddq_rot90_s8(__a, __b)
 #define vhcaddq_rot270_s8(__a, __b) __arm_vhcaddq_rot270_s8(__a, __b)
 #define vcaddq_rot90_s8(__a, __b) __arm_vcaddq_rot90_s8(__a, __b)
@@ -672,9 +662,7 @@
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
 #define vmladavq_u16(__a, __b) __arm_vmladavq_u16(__a, __b)
 #define vminvq_u16(__a, __b) __arm_vminvq_u16(__a, __b)
-#define vminq_u16(__a, __b) __arm_vminq_u16(__a, __b)
 #define vmaxvq_u16(__a, __b) __arm_vmaxvq_u16(__a, __b)
-#define vmaxq_u16(__a, __b) __arm_vmaxq_u16(__a, __b)
 #define vcmpneq_n_u16(__a, __b) __arm_vcmpneq_n_u16(__a, __b)
 #define vcmphiq_u16(__a, __b) __arm_vcmphiq_u16(__a, __b)
 #define vcmphiq_n_u16(__a, __b) __arm_vcmphiq_n_u16(__a, __b)
@@ -714,9 +702,7 @@
 #define vmladavxq_s16(__a, __b) __arm_vmladavxq_s16(__a, __b)
 #define vmladavq_s16(__a, __b) __arm_vmladavq_s16(__a, __b)
 #define vminvq_s16(__a, __b) __arm_vminvq_s16(__a, __b)
-#define vminq_s16(__a, __b) __arm_vminq_s16(__a, __b)
 #define vmaxvq_s16(__a, __b) __arm_vmaxvq_s16(__a, __b)
-#define vmaxq_s16(__a, __b) __arm_vmaxq_s16(__a, __b)
 #define vhcaddq_rot90_s16(__a, __b) __arm_vhcaddq_rot90_s16(__a, __b)
 #define vhcaddq_rot270_s16(__a, __b) __arm_vhcaddq_rot270_s16(__a, __b)
 #define vcaddq_rot90_s16(__a, __b) __arm_vcaddq_rot90_s16(__a, __b)
@@ -730,9 +716,7 @@
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
 #define vmladavq_u32(__a, __b) __arm_vmladavq_u32(__a, __b)
 #define vminvq_u32(__a, __b) __arm_vminvq_u32(__a, __b)
-#define vminq_u32(__a, __b) __arm_vminq_u32(__a, __b)
 #define vmaxvq_u32(__a, __b) __arm_vmaxvq_u32(__a, __b)
-#define vmaxq_u32(__a, __b) __arm_vmaxq_u32(__a, __b)
 #define vcmpneq_n_u32(__a, __b) __arm_vcmpneq_n_u32(__a, __b)
 #define vcmphiq_u32(__a, __b) __arm_vcmphiq_u32(__a, __b)
 #define vcmphiq_n_u32(__a, __b) __arm_vcmphiq_n_u32(__a, __b)
@@ -772,9 +756,7 @@
 #define vmladavxq_s32(__a, __b) __arm_vmladavxq_s32(__a, __b)
 #define vmladavq_s32(__a, __b) __arm_vmladavq_s32(__a, __b)
 #define vminvq_s32(__a, __b) __arm_vminvq_s32(__a, __b)
-#define vminq_s32(__a, __b) __arm_vminq_s32(__a, __b)
 #define vmaxvq_s32(__a, __b) __arm_vmaxvq_s32(__a, __b)
-#define vmaxq_s32(__a, __b) __arm_vmaxq_s32(__a, __b)
 #define vhcaddq_rot90_s32(__a, __b) __arm_vhcaddq_rot90_s32(__a, __b)
 #define vhcaddq_rot270_s32(__a, __b) __arm_vhcaddq_rot270_s32(__a, __b)
 #define vcaddq_rot90_s32(__a, __b) __arm_vcaddq_rot90_s32(__a, __b)
@@ -1411,18 +1393,6 @@
 #define vhcaddq_rot90_m_s8(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s8(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s32(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s32(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s16(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s16(__inactive, __a, __b, __p)
-#define vmaxq_m_s8(__inactive, __a, __b, __p) __arm_vmaxq_m_s8(__inactive, __a, __b, __p)
-#define vmaxq_m_s32(__inactive, __a, __b, __p) __arm_vmaxq_m_s32(__inactive, __a, __b, __p)
-#define vmaxq_m_s16(__inactive, __a, __b, __p) __arm_vmaxq_m_s16(__inactive, __a, __b, __p)
-#define vmaxq_m_u8(__inactive, __a, __b, __p) __arm_vmaxq_m_u8(__inactive, __a, __b, __p)
-#define vmaxq_m_u32(__inactive, __a, __b, __p) __arm_vmaxq_m_u32(__inactive, __a, __b, __p)
-#define vmaxq_m_u16(__inactive, __a, __b, __p) __arm_vmaxq_m_u16(__inactive, __a, __b, __p)
-#define vminq_m_s8(__inactive, __a, __b, __p) __arm_vminq_m_s8(__inactive, __a, __b, __p)
-#define vminq_m_s32(__inactive, __a, __b, __p) __arm_vminq_m_s32(__inactive, __a, __b, __p)
-#define vminq_m_s16(__inactive, __a, __b, __p) __arm_vminq_m_s16(__inactive, __a, __b, __p)
-#define vminq_m_u8(__inactive, __a, __b, __p) __arm_vminq_m_u8(__inactive, __a, __b, __p)
-#define vminq_m_u32(__inactive, __a, __b, __p) __arm_vminq_m_u32(__inactive, __a, __b, __p)
-#define vminq_m_u16(__inactive, __a, __b, __p) __arm_vminq_m_u16(__inactive, __a, __b, __p)
 #define vmladavaq_p_s8(__a, __b, __c, __p) __arm_vmladavaq_p_s8(__a, __b, __c, __p)
 #define vmladavaq_p_s32(__a, __b, __c, __p) __arm_vmladavaq_p_s32(__a, __b, __c, __p)
 #define vmladavaq_p_s16(__a, __b, __c, __p) __arm_vmladavaq_p_s16(__a, __b, __c, __p)
@@ -1943,18 +1913,6 @@
 #define vdupq_x_n_u8(__a, __p) __arm_vdupq_x_n_u8(__a, __p)
 #define vdupq_x_n_u16(__a, __p) __arm_vdupq_x_n_u16(__a, __p)
 #define vdupq_x_n_u32(__a, __p) __arm_vdupq_x_n_u32(__a, __p)
-#define vminq_x_s8(__a, __b, __p) __arm_vminq_x_s8(__a, __b, __p)
-#define vminq_x_s16(__a, __b, __p) __arm_vminq_x_s16(__a, __b, __p)
-#define vminq_x_s32(__a, __b, __p) __arm_vminq_x_s32(__a, __b, __p)
-#define vminq_x_u8(__a, __b, __p) __arm_vminq_x_u8(__a, __b, __p)
-#define vminq_x_u16(__a, __b, __p) __arm_vminq_x_u16(__a, __b, __p)
-#define vminq_x_u32(__a, __b, __p) __arm_vminq_x_u32(__a, __b, __p)
-#define vmaxq_x_s8(__a, __b, __p) __arm_vmaxq_x_s8(__a, __b, __p)
-#define vmaxq_x_s16(__a, __b, __p) __arm_vmaxq_x_s16(__a, __b, __p)
-#define vmaxq_x_s32(__a, __b, __p) __arm_vmaxq_x_s32(__a, __b, __p)
-#define vmaxq_x_u8(__a, __b, __p) __arm_vmaxq_x_u8(__a, __b, __p)
-#define vmaxq_x_u16(__a, __b, __p) __arm_vmaxq_x_u16(__a, __b, __p)
-#define vmaxq_x_u32(__a, __b, __p) __arm_vmaxq_x_u32(__a, __b, __p)
 #define vabsq_x_s8(__a, __p) __arm_vabsq_x_s8(__a, __p)
 #define vabsq_x_s16(__a, __p) __arm_vabsq_x_s16(__a, __p)
 #define vabsq_x_s32(__a, __p) __arm_vabsq_x_s32(__a, __p)
@@ -2937,13 +2895,6 @@ __arm_vminvq_u8 (uint8_t __a, uint8x16_t __b)
   return __builtin_mve_vminvq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vminq_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_u8 (uint8_t __a, uint8x16_t __b)
@@ -2951,13 +2902,6 @@ __arm_vmaxvq_u8 (uint8_t __a, uint8x16_t __b)
   return __builtin_mve_vmaxvq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmaxq_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_u8 (uint8x16_t __a, uint8_t __b)
@@ -3233,13 +3177,6 @@ __arm_vminvq_s8 (int8_t __a, int8x16_t __b)
   return __builtin_mve_vminvq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vminq_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_s8 (int8_t __a, int8x16_t __b)
@@ -3247,13 +3184,6 @@ __arm_vmaxvq_s8 (int8_t __a, int8x16_t __b)
   return __builtin_mve_vmaxvq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmaxq_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
@@ -3345,13 +3275,6 @@ __arm_vminvq_u16 (uint16_t __a, uint16x8_t __b)
   return __builtin_mve_vminvq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vminq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_u16 (uint16_t __a, uint16x8_t __b)
@@ -3359,13 +3282,6 @@ __arm_vmaxvq_u16 (uint16_t __a, uint16x8_t __b)
   return __builtin_mve_vmaxvq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmaxq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_u16 (uint16x8_t __a, uint16_t __b)
@@ -3641,13 +3557,6 @@ __arm_vminvq_s16 (int16_t __a, int16x8_t __b)
   return __builtin_mve_vminvq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vminq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_s16 (int16_t __a, int16x8_t __b)
@@ -3655,13 +3564,6 @@ __arm_vmaxvq_s16 (int16_t __a, int16x8_t __b)
   return __builtin_mve_vmaxvq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmaxq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
@@ -3753,13 +3655,6 @@ __arm_vminvq_u32 (uint32_t __a, uint32x4_t __b)
   return __builtin_mve_vminvq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vminq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_u32 (uint32_t __a, uint32x4_t __b)
@@ -3767,13 +3662,6 @@ __arm_vmaxvq_u32 (uint32_t __a, uint32x4_t __b)
   return __builtin_mve_vmaxvq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vmaxq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_u32 (uint32x4_t __a, uint32_t __b)
@@ -4049,13 +3937,6 @@ __arm_vminvq_s32 (int32_t __a, int32x4_t __b)
   return __builtin_mve_vminvq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vminq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq_s32 (int32_t __a, int32x4_t __b)
@@ -4063,13 +3944,6 @@ __arm_vmaxvq_s32 (int32_t __a, int32x4_t __b)
   return __builtin_mve_vmaxvq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmaxq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
@@ -7380,90 +7254,6 @@ __arm_vhcaddq_rot90_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, m
   return __builtin_mve_vhcaddq_rot90_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
@@ -10635,90 +10425,6 @@ __arm_vdupq_x_n_u32 (uint32_t __a, mve_pred16_t __p)
   return __builtin_mve_vdupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vminq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmaxq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x_s8 (int8x16_t __a, mve_pred16_t __p)
@@ -15624,13 +15330,6 @@ __arm_vminvq (uint8_t __a, uint8x16_t __b)
  return __arm_vminvq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vminq_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (uint8_t __a, uint8x16_t __b)
@@ -15638,13 +15337,6 @@ __arm_vmaxvq (uint8_t __a, uint8x16_t __b)
  return __arm_vmaxvq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmaxq_u8 (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (uint8x16_t __a, uint8_t __b)
@@ -15918,13 +15610,6 @@ __arm_vminvq (int8_t __a, int8x16_t __b)
  return __arm_vminvq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vminq_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (int8_t __a, int8x16_t __b)
@@ -15932,13 +15617,6 @@ __arm_vmaxvq (int8_t __a, int8x16_t __b)
  return __arm_vmaxvq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmaxq_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int8x16_t __a, int8x16_t __b)
@@ -16030,13 +15708,6 @@ __arm_vminvq (uint16_t __a, uint16x8_t __b)
  return __arm_vminvq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vminq_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (uint16_t __a, uint16x8_t __b)
@@ -16044,13 +15715,6 @@ __arm_vmaxvq (uint16_t __a, uint16x8_t __b)
  return __arm_vmaxvq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmaxq_u16 (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (uint16x8_t __a, uint16_t __b)
@@ -16324,13 +15988,6 @@ __arm_vminvq (int16_t __a, int16x8_t __b)
  return __arm_vminvq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vminq_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (int16_t __a, int16x8_t __b)
@@ -16338,13 +15995,6 @@ __arm_vmaxvq (int16_t __a, int16x8_t __b)
  return __arm_vmaxvq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmaxq_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int16x8_t __a, int16x8_t __b)
@@ -16436,13 +16086,6 @@ __arm_vminvq (uint32_t __a, uint32x4_t __b)
  return __arm_vminvq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vminq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (uint32_t __a, uint32x4_t __b)
@@ -16450,13 +16093,6 @@ __arm_vmaxvq (uint32_t __a, uint32x4_t __b)
  return __arm_vmaxvq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmaxq_u32 (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (uint32x4_t __a, uint32_t __b)
@@ -16730,13 +16366,6 @@ __arm_vminvq (int32_t __a, int32x4_t __b)
  return __arm_vminvq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vminq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmaxvq (int32_t __a, int32x4_t __b)
@@ -16744,13 +16373,6 @@ __arm_vmaxvq (int32_t __a, int32x4_t __b)
  return __arm_vmaxvq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmaxq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int32x4_t __a, int32x4_t __b)
@@ -20020,90 +19642,6 @@ __arm_vhcaddq_rot90_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
  return __arm_vhcaddq_rot90_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_m_u16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq_p (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
@@ -22806,90 +22344,6 @@ __arm_viwdupq_x_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t
  return __arm_viwdupq_x_wb_u32 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vminq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vminq_x_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmaxq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmaxq_x_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabsq_x (int8x16_t __a, mve_pred16_t __p)
@@ -27274,16 +26728,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
 
-#define __arm_vminq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vminaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27291,16 +26735,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
 
-#define __arm_vmaxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vmaxaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28867,16 +28301,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vminq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vminaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28884,16 +28308,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
 
-#define __arm_vmaxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vmaxaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -30608,28 +30022,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -31068,26 +30460,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vminvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (12 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 14/23] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:00   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq Christophe Lyon
                   ` (8 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_rshift_narrow shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_rshift_narrow):
	New.
	* config/arm/arm-mve-builtins-shapes.h (binary_rshift_narrow): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 47 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 48 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 4ecb612ece5..88934e1ca15 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -617,6 +617,53 @@ struct binary_lshift_r_def : public overloaded_base<0>
 };
 SHAPE (binary_lshift_r)
 
+/* <T0:half>_t vfoo[_n_t0](<T0:half>_t, <T0>_t, const int)
+
+   Narrowing right shifts.
+   Check that 'imm' is in the [1..#bits/2] range.
+
+   Example: vqrshrnbq.
+   int8x16_t [__arm_]vqrshrnbq[_n_s16](int8x16_t a, int16x8_t b, const int imm)
+   int8x16_t [__arm_]vqrshrnbq_m[_n_s16](int8x16_t a, int16x8_t b, const int imm, mve_pred16_t p)  */
+struct binary_rshift_narrow_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "vh0,vh0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i))
+      return error_mark_node;
+
+    type_suffix_index narrow_suffix
+      = find_type_suffix (type_suffixes[type].tclass,
+			  type_suffixes[type].element_bits / 2);
+
+    if (!r.require_matching_vector_type (0, narrow_suffix))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 1, bits / 2);
+  }
+};
+SHAPE (binary_rshift_narrow)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 25d9b60a670..d72686d187b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -40,6 +40,7 @@ namespace arm_mve
     extern const function_shape *const binary_opt_n;
     extern const function_shape *const binary_orrq;
     extern const function_shape *const binary_round_lshift;
+    extern const function_shape *const binary_rshift_narrow;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (13 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:00   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq " Christophe Lyon
                   ` (7 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq, vshrntq, vshrnbq,
vrshrnbq and vrshrntq so that they use the same pattern.

Introduce <isu> iterator for *shrn* so that we can use the same
pattern despite the different "s", "u" and "i" suffixes.

2022-09-08  Christophe Lyon <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_SHRN_N, MVE_SHRN_M_N): New.
	(mve_insn): Add vqrshrnb, vqrshrnt, vqshrnb, vqshrnt, vrshrnb,
	vrshrnt, vshrnb, vshrnt.
	(isu): New.
	* config/arm/mve.md (mve_vqrshrnbq_n_<supf><mode>)
	(mve_vqrshrntq_n_<supf><mode>, mve_vqshrnbq_n_<supf><mode>)
	(mve_vqshrntq_n_<supf><mode>, mve_vrshrnbq_n_<supf><mode>)
	(mve_vrshrntq_n_<supf><mode>, mve_vshrnbq_n_<supf><mode>)
	(mve_vshrntq_n_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vqrshrnbq_m_n_<supf><mode>, mve_vqrshrntq_m_n_<supf><mode>)
	(mve_vqshrnbq_m_n_<supf><mode>, mve_vqshrntq_m_n_<supf><mode>)
	(mve_vrshrnbq_m_n_<supf><mode>, mve_vrshrntq_m_n_<supf><mode>)
	(mve_vshrnbq_m_n_<supf><mode>, mve_vshrntq_m_n_<supf><mode>):
	Merge into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  57 ++++++++
 gcc/config/arm/mve.md       | 270 ++++--------------------------------
 2 files changed, 85 insertions(+), 242 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 9ff61e0573b..d64c924a513 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -468,6 +468,28 @@ (define_int_iterator MVE_RSHIFT_N   [
 		     VRSHLQ_N_S VRSHLQ_N_U
 		     ])
 
+(define_int_iterator MVE_SHRN_N [
+		     VQRSHRNBQ_N_S VQRSHRNBQ_N_U
+		     VQRSHRNTQ_N_S VQRSHRNTQ_N_U
+		     VQSHRNBQ_N_S VQSHRNBQ_N_U
+		     VQSHRNTQ_N_S VQSHRNTQ_N_U
+		     VRSHRNBQ_N_S VRSHRNBQ_N_U
+		     VRSHRNTQ_N_S VRSHRNTQ_N_U
+		     VSHRNBQ_N_S VSHRNBQ_N_U
+		     VSHRNTQ_N_S VSHRNTQ_N_U
+		     ])
+
+(define_int_iterator MVE_SHRN_M_N [
+		     VQRSHRNBQ_M_N_S VQRSHRNBQ_M_N_U
+		     VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U
+		     VQSHRNBQ_M_N_S VQSHRNBQ_M_N_U
+		     VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U
+		     VRSHRNBQ_M_N_S VRSHRNBQ_M_N_U
+		     VRSHRNTQ_M_N_S VRSHRNTQ_M_N_U
+		     VSHRNBQ_M_N_S VSHRNBQ_M_N_U
+		     VSHRNTQ_M_N_S VSHRNTQ_M_N_U
+		     ])
+
 (define_int_iterator MVE_FP_M_BINARY   [
 		     VABDQ_M_F
 		     VADDQ_M_F
@@ -568,12 +590,20 @@ (define_int_attr mve_insn [
 		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
 		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
 		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
+		 (VQRSHRNBQ_M_N_S "vqrshrnb") (VQRSHRNBQ_M_N_U "vqrshrnb")
+		 (VQRSHRNBQ_N_S "vqrshrnb") (VQRSHRNBQ_N_U "vqrshrnb")
+		 (VQRSHRNTQ_M_N_S "vqrshrnt") (VQRSHRNTQ_M_N_U "vqrshrnt")
+		 (VQRSHRNTQ_N_S "vqrshrnt") (VQRSHRNTQ_N_U "vqrshrnt")
 		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
 		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
 		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
 		 (VQSHLQ_N_S "vqshl") (VQSHLQ_N_U "vqshl")
 		 (VQSHLQ_R_S "vqshl") (VQSHLQ_R_U "vqshl")
 		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
+		 (VQSHRNBQ_M_N_S "vqshrnb") (VQSHRNBQ_M_N_U "vqshrnb")
+		 (VQSHRNBQ_N_S "vqshrnb") (VQSHRNBQ_N_U "vqshrnb")
+		 (VQSHRNTQ_M_N_S "vqshrnt") (VQSHRNTQ_M_N_U "vqshrnt")
+		 (VQSHRNTQ_N_S "vqshrnt") (VQSHRNTQ_N_U "vqshrnt")
 		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
 		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
 		 (VQSUBQ_N_S "vqsub") (VQSUBQ_N_U "vqsub")
@@ -586,17 +616,44 @@ (define_int_attr mve_insn [
 		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
 		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
 		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
+		 (VRSHRNBQ_M_N_S "vrshrnb") (VRSHRNBQ_M_N_U "vrshrnb")
+		 (VRSHRNBQ_N_S "vrshrnb") (VRSHRNBQ_N_U "vrshrnb")
+		 (VRSHRNTQ_M_N_S "vrshrnt") (VRSHRNTQ_M_N_U "vrshrnt")
+		 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
 		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
 		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
 		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
 		 (VSHLQ_N_S "vshl") (VSHLQ_N_U "vshl")
 		 (VSHLQ_R_S "vshl") (VSHLQ_R_U "vshl")
 		 (VSHLQ_S "vshl") (VSHLQ_U "vshl")
+		 (VSHRNBQ_M_N_S "vshrnb") (VSHRNBQ_M_N_U "vshrnb")
+		 (VSHRNBQ_N_S "vshrnb") (VSHRNBQ_N_U "vshrnb")
+		 (VSHRNTQ_M_N_S "vshrnt") (VSHRNTQ_M_N_U "vshrnt")
+		 (VSHRNTQ_N_S "vshrnt") (VSHRNTQ_N_U "vshrnt")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
 		 ])
 
+(define_int_attr isu    [
+		 (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
+		 (VQRSHRNBQ_N_S "s") (VQRSHRNBQ_N_U "u")
+		 (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
+		 (VQRSHRNTQ_N_S "s") (VQRSHRNTQ_N_U "u")
+		 (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
+		 (VQSHRNBQ_N_S "s") (VQSHRNBQ_N_U "u")
+		 (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
+		 (VQSHRNTQ_N_S "s") (VQSHRNTQ_N_U "u")
+		 (VRSHRNBQ_M_N_S "i") (VRSHRNBQ_M_N_U "i")
+		 (VRSHRNBQ_N_S "i") (VRSHRNBQ_N_U "i")
+		 (VRSHRNTQ_M_N_S "i") (VRSHRNTQ_M_N_U "i")
+		 (VRSHRNTQ_N_S "i") (VRSHRNTQ_N_U "i")
+		 (VSHRNBQ_M_N_S "i") (VSHRNBQ_M_N_U "i")
+		 (VSHRNBQ_N_S "i") (VSHRNBQ_N_U "i")
+		 (VSHRNTQ_M_N_S "i") (VSHRNTQ_M_N_U "i")
+		 (VSHRNTQ_N_S "i") (VSHRNTQ_N_U "i")
+		 ])
+
 ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
 ;; a stack pointer operand.  The minus operation is a candidate for an rsub
 ;; and hence only plus is supported.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 44409b40e5f..d64a075c7bb 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2162,21 +2162,30 @@ (define_insn "mve_vcvtq_m_to_f_<supf><mode>"
   "vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>	 %q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
+
 ;;
-;; [vqrshrnbq_n_u, vqrshrnbq_n_s])
+;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
+;; [vqrshrntq_n_u, vqrshrntq_n_s]
+;; [vqshrnbq_n_u, vqshrnbq_n_s]
+;; [vqshrntq_n_u, vqshrntq_n_s]
+;; [vrshrnbq_n_s, vrshrnbq_n_u]
+;; [vrshrntq_n_u, vrshrntq_n_s]
+;; [vshrnbq_n_u, vshrnbq_n_s]
+;; [vshrntq_n_s, vshrntq_n_u]
 ;;
-(define_insn "mve_vqrshrnbq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
 	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
 				 (match_operand:MVE_5 2 "s_register_operand" "w")
 				 (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQRSHRNBQ_N))
+	 MVE_SHRN_N))
   ]
   "TARGET_HAVE_MVE"
-  "vqrshrnb.<supf>%#<V_sz_elem>	%q0, %q2, %3"
+  "<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
+
 ;;
 ;; [vqrshrunbq_n_s])
 ;;
@@ -2192,6 +2201,7 @@ (define_insn "mve_vqrshrunbq_n_s<mode>"
   "vqrshrunb.s%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
+
 ;;
 ;; [vrmlaldavhaq_s vrmlaldavhaq_u])
 ;;
@@ -3992,22 +4002,6 @@ (define_insn "mve_vqmovuntq_m_s<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vqrshrntq_n_u, vqrshrntq_n_s])
-;;
-(define_insn "mve_vqrshrntq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQRSHRNTQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrshrnt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vqrshruntq_n_s])
 ;;
@@ -4024,38 +4018,6 @@ (define_insn "mve_vqrshruntq_n_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqshrnbq_n_u, vqshrnbq_n_s])
-;;
-(define_insn "mve_vqshrnbq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQSHRNBQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqshrnb.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqshrntq_n_u, vqshrntq_n_s])
-;;
-(define_insn "mve_vqshrntq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQSHRNTQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqshrnt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vqshrunbq_n_s])
 ;;
@@ -4296,70 +4258,6 @@ (define_insn "mve_vrndxq_m_f<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrshrnbq_n_s, vrshrnbq_n_u])
-;;
-(define_insn "mve_vrshrnbq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VRSHRNBQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrshrnb.i%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vrshrntq_n_u, vrshrntq_n_s])
-;;
-(define_insn "mve_vrshrntq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VRSHRNTQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrshrnt.i%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vshrnbq_n_u, vshrnbq_n_s])
-;;
-(define_insn "mve_vshrnbq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VSHRNBQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vshrnb.i%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vshrntq_n_s, vshrntq_n_u])
-;;
-(define_insn "mve_vshrntq_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-				 (match_operand:MVE_5 2 "s_register_operand" "w")
-				 (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VSHRNTQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vshrnt.i%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vcvtmq_m_s, vcvtmq_m_u])
 ;;
@@ -4992,70 +4890,26 @@ (define_insn "mve_vmlaldavaxq_p_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s])
-;;
-(define_insn "mve_vqrshrnbq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQRSHRNBQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqrshrnbt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u])
-;;
-(define_insn "mve_vqrshrntq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQRSHRNTQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqrshrntt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s])
-;;
-(define_insn "mve_vqshrnbq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHRNBQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\n\tvqshrnbt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqshrntq_m_n_s, vqshrntq_m_n_u])
+;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s]
+;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u]
+;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s]
+;; [vqshrntq_m_n_s, vqshrntq_m_n_u]
+;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s]
+;; [vrshrntq_m_n_u, vrshrntq_m_n_s]
+;; [vshrnbq_m_n_s, vshrnbq_m_n_u]
+;; [vshrntq_m_n_s, vshrntq_m_n_u]
 ;;
-(define_insn "mve_vqshrntq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
 	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHRNTQ_M_N))
+				 (match_operand:MVE_5 2 "s_register_operand" "w")
+				 (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
+				 (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
+	 MVE_SHRN_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqshrntt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -5076,40 +4930,6 @@ (define_insn "mve_vrmlaldavhaq_p_sv4si"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s])
-;;
-(define_insn "mve_vrshrnbq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRSHRNBQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrshrnbt.i%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vrshrntq_m_n_u, vrshrntq_m_n_s])
-;;
-(define_insn "mve_vrshrntq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRSHRNTQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrshrntt.i%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vshllbq_m_n_u, vshllbq_m_n_s])
 ;;
@@ -5144,40 +4964,6 @@ (define_insn "mve_vshlltq_m_n_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vshrnbq_m_n_s, vshrnbq_m_n_u])
-;;
-(define_insn "mve_vshrnbq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VSHRNBQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vshrnbt.i%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vshrntq_m_n_s, vshrntq_m_n_u])
-;;
-(define_insn "mve_vshrntq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VSHRNTQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vshrntt.i%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vmlsldavaq_p_s])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (14 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:02   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape Christophe Lyon
                   ` (6 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vshrnbq, vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq,
vqrshrnbq, vqrshrntq using the new MVE builtins framework.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_ONLY_N_NO_F): New.
	(vshrnbq, vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq)
	(vqrshrnbq, vqrshrntq): New.
	* config/arm/arm-mve-builtins-base.def (vshrnbq, vshrntq)
	(vrshrnbq, vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq):
	New.
	* config/arm/arm-mve-builtins-base.h (vshrnbq, vshrntq, vrshrnbq)
	(vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vshrnbq,
	vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq,
	vqrshrntq.
	* config/arm/arm_mve.h (vshrnbq): Remove.
	(vshrntq): Remove.
	(vshrnbq_m): Remove.
	(vshrntq_m): Remove.
	(vshrnbq_n_s16): Remove.
	(vshrntq_n_s16): Remove.
	(vshrnbq_n_u16): Remove.
	(vshrntq_n_u16): Remove.
	(vshrnbq_n_s32): Remove.
	(vshrntq_n_s32): Remove.
	(vshrnbq_n_u32): Remove.
	(vshrntq_n_u32): Remove.
	(vshrnbq_m_n_s32): Remove.
	(vshrnbq_m_n_s16): Remove.
	(vshrnbq_m_n_u32): Remove.
	(vshrnbq_m_n_u16): Remove.
	(vshrntq_m_n_s32): Remove.
	(vshrntq_m_n_s16): Remove.
	(vshrntq_m_n_u32): Remove.
	(vshrntq_m_n_u16): Remove.
	(__arm_vshrnbq_n_s16): Remove.
	(__arm_vshrntq_n_s16): Remove.
	(__arm_vshrnbq_n_u16): Remove.
	(__arm_vshrntq_n_u16): Remove.
	(__arm_vshrnbq_n_s32): Remove.
	(__arm_vshrntq_n_s32): Remove.
	(__arm_vshrnbq_n_u32): Remove.
	(__arm_vshrntq_n_u32): Remove.
	(__arm_vshrnbq_m_n_s32): Remove.
	(__arm_vshrnbq_m_n_s16): Remove.
	(__arm_vshrnbq_m_n_u32): Remove.
	(__arm_vshrnbq_m_n_u16): Remove.
	(__arm_vshrntq_m_n_s32): Remove.
	(__arm_vshrntq_m_n_s16): Remove.
	(__arm_vshrntq_m_n_u32): Remove.
	(__arm_vshrntq_m_n_u16): Remove.
	(__arm_vshrnbq): Remove.
	(__arm_vshrntq): Remove.
	(__arm_vshrnbq_m): Remove.
	(__arm_vshrntq_m): Remove.
	(vrshrnbq): Remove.
	(vrshrntq): Remove.
	(vrshrnbq_m): Remove.
	(vrshrntq_m): Remove.
	(vrshrnbq_n_s16): Remove.
	(vrshrntq_n_s16): Remove.
	(vrshrnbq_n_u16): Remove.
	(vrshrntq_n_u16): Remove.
	(vrshrnbq_n_s32): Remove.
	(vrshrntq_n_s32): Remove.
	(vrshrnbq_n_u32): Remove.
	(vrshrntq_n_u32): Remove.
	(vrshrnbq_m_n_s32): Remove.
	(vrshrnbq_m_n_s16): Remove.
	(vrshrnbq_m_n_u32): Remove.
	(vrshrnbq_m_n_u16): Remove.
	(vrshrntq_m_n_s32): Remove.
	(vrshrntq_m_n_s16): Remove.
	(vrshrntq_m_n_u32): Remove.
	(vrshrntq_m_n_u16): Remove.
	(__arm_vrshrnbq_n_s16): Remove.
	(__arm_vrshrntq_n_s16): Remove.
	(__arm_vrshrnbq_n_u16): Remove.
	(__arm_vrshrntq_n_u16): Remove.
	(__arm_vrshrnbq_n_s32): Remove.
	(__arm_vrshrntq_n_s32): Remove.
	(__arm_vrshrnbq_n_u32): Remove.
	(__arm_vrshrntq_n_u32): Remove.
	(__arm_vrshrnbq_m_n_s32): Remove.
	(__arm_vrshrnbq_m_n_s16): Remove.
	(__arm_vrshrnbq_m_n_u32): Remove.
	(__arm_vrshrnbq_m_n_u16): Remove.
	(__arm_vrshrntq_m_n_s32): Remove.
	(__arm_vrshrntq_m_n_s16): Remove.
	(__arm_vrshrntq_m_n_u32): Remove.
	(__arm_vrshrntq_m_n_u16): Remove.
	(__arm_vrshrnbq): Remove.
	(__arm_vrshrntq): Remove.
	(__arm_vrshrnbq_m): Remove.
	(__arm_vrshrntq_m): Remove.
	(vqshrnbq): Remove.
	(vqshrntq): Remove.
	(vqshrnbq_m): Remove.
	(vqshrntq_m): Remove.
	(vqshrnbq_n_s16): Remove.
	(vqshrntq_n_s16): Remove.
	(vqshrnbq_n_u16): Remove.
	(vqshrntq_n_u16): Remove.
	(vqshrnbq_n_s32): Remove.
	(vqshrntq_n_s32): Remove.
	(vqshrnbq_n_u32): Remove.
	(vqshrntq_n_u32): Remove.
	(vqshrnbq_m_n_s32): Remove.
	(vqshrnbq_m_n_s16): Remove.
	(vqshrnbq_m_n_u32): Remove.
	(vqshrnbq_m_n_u16): Remove.
	(vqshrntq_m_n_s32): Remove.
	(vqshrntq_m_n_s16): Remove.
	(vqshrntq_m_n_u32): Remove.
	(vqshrntq_m_n_u16): Remove.
	(__arm_vqshrnbq_n_s16): Remove.
	(__arm_vqshrntq_n_s16): Remove.
	(__arm_vqshrnbq_n_u16): Remove.
	(__arm_vqshrntq_n_u16): Remove.
	(__arm_vqshrnbq_n_s32): Remove.
	(__arm_vqshrntq_n_s32): Remove.
	(__arm_vqshrnbq_n_u32): Remove.
	(__arm_vqshrntq_n_u32): Remove.
	(__arm_vqshrnbq_m_n_s32): Remove.
	(__arm_vqshrnbq_m_n_s16): Remove.
	(__arm_vqshrnbq_m_n_u32): Remove.
	(__arm_vqshrnbq_m_n_u16): Remove.
	(__arm_vqshrntq_m_n_s32): Remove.
	(__arm_vqshrntq_m_n_s16): Remove.
	(__arm_vqshrntq_m_n_u32): Remove.
	(__arm_vqshrntq_m_n_u16): Remove.
	(__arm_vqshrnbq): Remove.
	(__arm_vqshrntq): Remove.
	(__arm_vqshrnbq_m): Remove.
	(__arm_vqshrntq_m): Remove.
	(vqrshrnbq): Remove.
	(vqrshrntq): Remove.
	(vqrshrnbq_m): Remove.
	(vqrshrntq_m): Remove.
	(vqrshrnbq_n_s16): Remove.
	(vqrshrnbq_n_u16): Remove.
	(vqrshrnbq_n_s32): Remove.
	(vqrshrnbq_n_u32): Remove.
	(vqrshrntq_n_s16): Remove.
	(vqrshrntq_n_u16): Remove.
	(vqrshrntq_n_s32): Remove.
	(vqrshrntq_n_u32): Remove.
	(vqrshrnbq_m_n_s32): Remove.
	(vqrshrnbq_m_n_s16): Remove.
	(vqrshrnbq_m_n_u32): Remove.
	(vqrshrnbq_m_n_u16): Remove.
	(vqrshrntq_m_n_s32): Remove.
	(vqrshrntq_m_n_s16): Remove.
	(vqrshrntq_m_n_u32): Remove.
	(vqrshrntq_m_n_u16): Remove.
	(__arm_vqrshrnbq_n_s16): Remove.
	(__arm_vqrshrnbq_n_u16): Remove.
	(__arm_vqrshrnbq_n_s32): Remove.
	(__arm_vqrshrnbq_n_u32): Remove.
	(__arm_vqrshrntq_n_s16): Remove.
	(__arm_vqrshrntq_n_u16): Remove.
	(__arm_vqrshrntq_n_s32): Remove.
	(__arm_vqrshrntq_n_u32): Remove.
	(__arm_vqrshrnbq_m_n_s32): Remove.
	(__arm_vqrshrnbq_m_n_s16): Remove.
	(__arm_vqrshrnbq_m_n_u32): Remove.
	(__arm_vqrshrnbq_m_n_u16): Remove.
	(__arm_vqrshrntq_m_n_s32): Remove.
	(__arm_vqrshrntq_m_n_s16): Remove.
	(__arm_vqrshrntq_m_n_u32): Remove.
	(__arm_vqrshrntq_m_n_u16): Remove.
	(__arm_vqrshrnbq): Remove.
	(__arm_vqrshrntq): Remove.
	(__arm_vqrshrnbq_m): Remove.
	(__arm_vqrshrntq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   17 +
 gcc/config/arm/arm-mve-builtins-base.def |    8 +
 gcc/config/arm/arm-mve-builtins-base.h   |    8 +
 gcc/config/arm/arm-mve-builtins.cc       |   11 +-
 gcc/config/arm/arm_mve.h                 | 1196 +---------------------
 5 files changed, 65 insertions(+), 1175 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 1839d5cb1a5..c95abe70239 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -175,6 +175,15 @@ namespace arm_mve {
     UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,				\
     -1, -1, -1))
 
+  /* Helper for builtins with only unspec codes, _m predicated
+     overrides, only _n version, no floating-point.  */
+#define FUNCTION_ONLY_N_NO_F(NAME, UNSPEC) FUNCTION			\
+  (NAME, unspec_mve_function_exact_insn,				\
+   (-1, -1, -1,								\
+    UNSPEC##_N_S, UNSPEC##_N_U, -1,					\
+    -1, -1, -1,								\
+    UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
+
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
@@ -192,12 +201,20 @@ FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
 FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
 FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
 FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
+FUNCTION_ONLY_N_NO_F (vqrshrnbq, VQRSHRNBQ)
+FUNCTION_ONLY_N_NO_F (vqrshrntq, VQRSHRNTQ)
+FUNCTION_ONLY_N_NO_F (vqshrnbq, VQSHRNBQ)
+FUNCTION_ONLY_N_NO_F (vqshrntq, VQSHRNTQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
 FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
+FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
+FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
 FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
+FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
+FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 3b42bf46e81..3dd40086663 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -34,15 +34,23 @@ DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r" forms do not support the "x" predicate
+DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 81d10f4a8f4..9e11ac83681 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -39,13 +39,21 @@ extern const function_base *const vqaddq;
 extern const function_base *const vqdmulhq;
 extern const function_base *const vqrdmulhq;
 extern const function_base *const vqrshlq;
+extern const function_base *const vqrshrnbq;
+extern const function_base *const vqrshrntq;
 extern const function_base *const vqshlq;
+extern const function_base *const vqshrnbq;
+extern const function_base *const vqshrntq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
 extern const function_base *const vrhaddq;
 extern const function_base *const vrmulhq;
 extern const function_base *const vrshlq;
+extern const function_base *const vrshrnbq;
+extern const function_base *const vrshrntq;
 extern const function_base *const vshlq;
+extern const function_base *const vshrnbq;
+extern const function_base *const vshrntq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index c25b1be9903..667bbc58483 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -672,7 +672,16 @@ function_instance::has_inactive_argument () const
   if (mode_suffix_id == MODE_r
       || (base == functions::vorrq && mode_suffix_id == MODE_n)
       || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
-      || (base == functions::vrshlq && mode_suffix_id == MODE_n))
+      || base == functions::vqrshrnbq
+      || base == functions::vqrshrntq
+      || base == functions::vqshrnbq
+      || base == functions::vqshrntq
+      || (base == functions::vrshlq && mode_suffix_id == MODE_n)
+      || base == functions::vrshrnbq
+      || base == functions::vrshrntq
+      || base == functions::vshrnbq
+      || base == functions::vshrntq
+      )
     return false;
 
   return true;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 5fbea52c8ef..ed7852e2460 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -113,7 +113,6 @@
 #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
 #define vabavq(__a, __b, __c) __arm_vabavq(__a, __b, __c)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
-#define vqrshrnbq(__a, __b, __imm) __arm_vqrshrnbq(__a, __b, __imm)
 #define vqrshrunbq(__a, __b, __imm) __arm_vqrshrunbq(__a, __b, __imm)
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
@@ -176,13 +175,6 @@
 #define vrmlaldavhxq_p(__a, __b, __p) __arm_vrmlaldavhxq_p(__a, __b, __p)
 #define vrmlsldavhq_p(__a, __b, __p) __arm_vrmlsldavhq_p(__a, __b, __p)
 #define vrmlsldavhxq_p(__a, __b, __p) __arm_vrmlsldavhxq_p(__a, __b, __p)
-#define vqrshrntq(__a, __b, __imm) __arm_vqrshrntq(__a, __b, __imm)
-#define vqshrnbq(__a, __b, __imm) __arm_vqshrnbq(__a, __b, __imm)
-#define vqshrntq(__a, __b, __imm) __arm_vqshrntq(__a, __b, __imm)
-#define vrshrnbq(__a, __b, __imm) __arm_vrshrnbq(__a, __b, __imm)
-#define vrshrntq(__a, __b, __imm) __arm_vrshrntq(__a, __b, __imm)
-#define vshrnbq(__a, __b, __imm) __arm_vshrnbq(__a, __b, __imm)
-#define vshrntq(__a, __b, __imm) __arm_vshrntq(__a, __b, __imm)
 #define vmlaldavaq(__a, __b, __c) __arm_vmlaldavaq(__a, __b, __c)
 #define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c)
 #define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c)
@@ -244,24 +236,16 @@
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
 #define vqdmullbq_m(__inactive, __a, __b, __p) __arm_vqdmullbq_m(__inactive, __a, __b, __p)
 #define vqdmulltq_m(__inactive, __a, __b, __p) __arm_vqdmulltq_m(__inactive, __a, __b, __p)
-#define vqrshrnbq_m(__a, __b, __imm, __p) __arm_vqrshrnbq_m(__a, __b, __imm, __p)
-#define vqrshrntq_m(__a, __b, __imm, __p) __arm_vqrshrntq_m(__a, __b, __imm, __p)
 #define vqrshrunbq_m(__a, __b, __imm, __p) __arm_vqrshrunbq_m(__a, __b, __imm, __p)
 #define vqrshruntq_m(__a, __b, __imm, __p) __arm_vqrshruntq_m(__a, __b, __imm, __p)
-#define vqshrnbq_m(__a, __b, __imm, __p) __arm_vqshrnbq_m(__a, __b, __imm, __p)
-#define vqshrntq_m(__a, __b, __imm, __p) __arm_vqshrntq_m(__a, __b, __imm, __p)
 #define vqshrunbq_m(__a, __b, __imm, __p) __arm_vqshrunbq_m(__a, __b, __imm, __p)
 #define vqshruntq_m(__a, __b, __imm, __p) __arm_vqshruntq_m(__a, __b, __imm, __p)
 #define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a, __b, __c, __p)
 #define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a, __b, __c, __p)
 #define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a, __b, __c, __p)
 #define vrmlsldavhaxq_p(__a, __b, __c, __p) __arm_vrmlsldavhaxq_p(__a, __b, __c, __p)
-#define vrshrnbq_m(__a, __b, __imm, __p) __arm_vrshrnbq_m(__a, __b, __imm, __p)
-#define vrshrntq_m(__a, __b, __imm, __p) __arm_vrshrntq_m(__a, __b, __imm, __p)
 #define vshllbq_m(__inactive, __a, __imm, __p) __arm_vshllbq_m(__inactive, __a, __imm, __p)
 #define vshlltq_m(__inactive, __a, __imm, __p) __arm_vshlltq_m(__inactive, __a, __imm, __p)
-#define vshrnbq_m(__a, __b, __imm, __p) __arm_vshrnbq_m(__a, __b, __imm, __p)
-#define vshrntq_m(__a, __b, __imm, __p) __arm_vshrntq_m(__a, __b, __imm, __p)
 #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
 #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
 #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
@@ -905,10 +889,6 @@
 #define vcvtq_m_f16_u16(__inactive, __a, __p) __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
 #define vcvtq_m_f32_s32(__inactive, __a, __p) __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
 #define vcvtq_m_f32_u32(__inactive, __a, __p) __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
-#define vqrshrnbq_n_s16(__a, __b,  __imm) __arm_vqrshrnbq_n_s16(__a, __b,  __imm)
-#define vqrshrnbq_n_u16(__a, __b,  __imm) __arm_vqrshrnbq_n_u16(__a, __b,  __imm)
-#define vqrshrnbq_n_s32(__a, __b,  __imm) __arm_vqrshrnbq_n_s32(__a, __b,  __imm)
-#define vqrshrnbq_n_u32(__a, __b,  __imm) __arm_vqrshrnbq_n_u32(__a, __b,  __imm)
 #define vqrshrunbq_n_s16(__a, __b,  __imm) __arm_vqrshrunbq_n_s16(__a, __b,  __imm)
 #define vqrshrunbq_n_s32(__a, __b,  __imm) __arm_vqrshrunbq_n_s32(__a, __b,  __imm)
 #define vrmlaldavhaq_s32(__a, __b, __c) __arm_vrmlaldavhaq_s32(__a, __b, __c)
@@ -1167,13 +1147,6 @@
 #define vrev16q_m_u8(__inactive, __a, __p) __arm_vrev16q_m_u8(__inactive, __a, __p)
 #define vrmlaldavhq_p_u32(__a, __b, __p) __arm_vrmlaldavhq_p_u32(__a, __b, __p)
 #define vmvnq_m_n_s16(__inactive,  __imm, __p) __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
-#define vqrshrntq_n_s16(__a, __b,  __imm) __arm_vqrshrntq_n_s16(__a, __b,  __imm)
-#define vqshrnbq_n_s16(__a, __b,  __imm) __arm_vqshrnbq_n_s16(__a, __b,  __imm)
-#define vqshrntq_n_s16(__a, __b,  __imm) __arm_vqshrntq_n_s16(__a, __b,  __imm)
-#define vrshrnbq_n_s16(__a, __b,  __imm) __arm_vrshrnbq_n_s16(__a, __b,  __imm)
-#define vrshrntq_n_s16(__a, __b,  __imm) __arm_vrshrntq_n_s16(__a, __b,  __imm)
-#define vshrnbq_n_s16(__a, __b,  __imm) __arm_vshrnbq_n_s16(__a, __b,  __imm)
-#define vshrntq_n_s16(__a, __b,  __imm) __arm_vshrntq_n_s16(__a, __b,  __imm)
 #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
 #define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c)
 #define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a, __b, __c)
@@ -1239,13 +1212,6 @@
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
 #define vqmovunbq_m_s16(__a, __b, __p) __arm_vqmovunbq_m_s16(__a, __b, __p)
 #define vqmovuntq_m_s16(__a, __b, __p) __arm_vqmovuntq_m_s16(__a, __b, __p)
-#define vqrshrntq_n_u16(__a, __b,  __imm) __arm_vqrshrntq_n_u16(__a, __b,  __imm)
-#define vqshrnbq_n_u16(__a, __b,  __imm) __arm_vqshrnbq_n_u16(__a, __b,  __imm)
-#define vqshrntq_n_u16(__a, __b,  __imm) __arm_vqshrntq_n_u16(__a, __b,  __imm)
-#define vrshrnbq_n_u16(__a, __b,  __imm) __arm_vrshrnbq_n_u16(__a, __b,  __imm)
-#define vrshrntq_n_u16(__a, __b,  __imm) __arm_vrshrntq_n_u16(__a, __b,  __imm)
-#define vshrnbq_n_u16(__a, __b,  __imm) __arm_vshrnbq_n_u16(__a, __b,  __imm)
-#define vshrntq_n_u16(__a, __b,  __imm) __arm_vshrntq_n_u16(__a, __b,  __imm)
 #define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b, __c)
 #define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b, __p)
 #define vmovlbq_m_u8(__inactive, __a, __p) __arm_vmovlbq_m_u8(__inactive, __a, __p)
@@ -1256,13 +1222,6 @@
 #define vqmovntq_m_u16(__a, __b, __p) __arm_vqmovntq_m_u16(__a, __b, __p)
 #define vrev32q_m_u8(__inactive, __a, __p) __arm_vrev32q_m_u8(__inactive, __a, __p)
 #define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
-#define vqrshrntq_n_s32(__a, __b,  __imm) __arm_vqrshrntq_n_s32(__a, __b,  __imm)
-#define vqshrnbq_n_s32(__a, __b,  __imm) __arm_vqshrnbq_n_s32(__a, __b,  __imm)
-#define vqshrntq_n_s32(__a, __b,  __imm) __arm_vqshrntq_n_s32(__a, __b,  __imm)
-#define vrshrnbq_n_s32(__a, __b,  __imm) __arm_vrshrnbq_n_s32(__a, __b,  __imm)
-#define vrshrntq_n_s32(__a, __b,  __imm) __arm_vrshrntq_n_s32(__a, __b,  __imm)
-#define vshrnbq_n_s32(__a, __b,  __imm) __arm_vshrnbq_n_s32(__a, __b,  __imm)
-#define vshrntq_n_s32(__a, __b,  __imm) __arm_vshrntq_n_s32(__a, __b,  __imm)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
 #define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a, __b, __c)
@@ -1328,13 +1287,6 @@
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
 #define vqmovunbq_m_s32(__a, __b, __p) __arm_vqmovunbq_m_s32(__a, __b, __p)
 #define vqmovuntq_m_s32(__a, __b, __p) __arm_vqmovuntq_m_s32(__a, __b, __p)
-#define vqrshrntq_n_u32(__a, __b,  __imm) __arm_vqrshrntq_n_u32(__a, __b,  __imm)
-#define vqshrnbq_n_u32(__a, __b,  __imm) __arm_vqshrnbq_n_u32(__a, __b,  __imm)
-#define vqshrntq_n_u32(__a, __b,  __imm) __arm_vqshrntq_n_u32(__a, __b,  __imm)
-#define vrshrnbq_n_u32(__a, __b,  __imm) __arm_vrshrnbq_n_u32(__a, __b,  __imm)
-#define vrshrntq_n_u32(__a, __b,  __imm) __arm_vrshrntq_n_u32(__a, __b,  __imm)
-#define vshrnbq_n_u32(__a, __b,  __imm) __arm_vshrnbq_n_u32(__a, __b,  __imm)
-#define vshrntq_n_u32(__a, __b,  __imm) __arm_vshrntq_n_u32(__a, __b,  __imm)
 #define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b, __c)
 #define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b, __p)
 #define vmovlbq_m_u16(__inactive, __a, __p) __arm_vmovlbq_m_u16(__inactive, __a, __p)
@@ -1514,26 +1466,10 @@
 #define vqdmulltq_m_n_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
 #define vqdmulltq_m_s32(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s32(__inactive, __a, __b, __p)
 #define vqdmulltq_m_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s16(__inactive, __a, __b, __p)
-#define vqrshrnbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshrnbq_m_n_s32(__a, __b,  __imm, __p)
-#define vqrshrnbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshrnbq_m_n_s16(__a, __b,  __imm, __p)
-#define vqrshrnbq_m_n_u32(__a, __b,  __imm, __p) __arm_vqrshrnbq_m_n_u32(__a, __b,  __imm, __p)
-#define vqrshrnbq_m_n_u16(__a, __b,  __imm, __p) __arm_vqrshrnbq_m_n_u16(__a, __b,  __imm, __p)
-#define vqrshrntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshrntq_m_n_s32(__a, __b,  __imm, __p)
-#define vqrshrntq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshrntq_m_n_s16(__a, __b,  __imm, __p)
-#define vqrshrntq_m_n_u32(__a, __b,  __imm, __p) __arm_vqrshrntq_m_n_u32(__a, __b,  __imm, __p)
-#define vqrshrntq_m_n_u16(__a, __b,  __imm, __p) __arm_vqrshrntq_m_n_u16(__a, __b,  __imm, __p)
 #define vqrshrunbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
 #define vqrshrunbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
 #define vqrshruntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
 #define vqrshruntq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
-#define vqshrnbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshrnbq_m_n_s32(__a, __b,  __imm, __p)
-#define vqshrnbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqshrnbq_m_n_s16(__a, __b,  __imm, __p)
-#define vqshrnbq_m_n_u32(__a, __b,  __imm, __p) __arm_vqshrnbq_m_n_u32(__a, __b,  __imm, __p)
-#define vqshrnbq_m_n_u16(__a, __b,  __imm, __p) __arm_vqshrnbq_m_n_u16(__a, __b,  __imm, __p)
-#define vqshrntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshrntq_m_n_s32(__a, __b,  __imm, __p)
-#define vqshrntq_m_n_s16(__a, __b,  __imm, __p) __arm_vqshrntq_m_n_s16(__a, __b,  __imm, __p)
-#define vqshrntq_m_n_u32(__a, __b,  __imm, __p) __arm_vqshrntq_m_n_u32(__a, __b,  __imm, __p)
-#define vqshrntq_m_n_u16(__a, __b,  __imm, __p) __arm_vqshrntq_m_n_u16(__a, __b,  __imm, __p)
 #define vqshrunbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
 #define vqshrunbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
 #define vqshruntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshruntq_m_n_s32(__a, __b,  __imm, __p)
@@ -1543,14 +1479,6 @@
 #define vrmlaldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
 #define vrmlsldavhaq_p_s32(__a, __b, __c, __p) __arm_vrmlsldavhaq_p_s32(__a, __b, __c, __p)
 #define vrmlsldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlsldavhaxq_p_s32(__a, __b, __c, __p)
-#define vrshrnbq_m_n_s32(__a, __b,  __imm, __p) __arm_vrshrnbq_m_n_s32(__a, __b,  __imm, __p)
-#define vrshrnbq_m_n_s16(__a, __b,  __imm, __p) __arm_vrshrnbq_m_n_s16(__a, __b,  __imm, __p)
-#define vrshrnbq_m_n_u32(__a, __b,  __imm, __p) __arm_vrshrnbq_m_n_u32(__a, __b,  __imm, __p)
-#define vrshrnbq_m_n_u16(__a, __b,  __imm, __p) __arm_vrshrnbq_m_n_u16(__a, __b,  __imm, __p)
-#define vrshrntq_m_n_s32(__a, __b,  __imm, __p) __arm_vrshrntq_m_n_s32(__a, __b,  __imm, __p)
-#define vrshrntq_m_n_s16(__a, __b,  __imm, __p) __arm_vrshrntq_m_n_s16(__a, __b,  __imm, __p)
-#define vrshrntq_m_n_u32(__a, __b,  __imm, __p) __arm_vrshrntq_m_n_u32(__a, __b,  __imm, __p)
-#define vrshrntq_m_n_u16(__a, __b,  __imm, __p) __arm_vrshrntq_m_n_u16(__a, __b,  __imm, __p)
 #define vshllbq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vshllbq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vshllbq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vshllbq_m_n_s16(__inactive, __a,  __imm, __p)
 #define vshllbq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vshllbq_m_n_u8(__inactive, __a,  __imm, __p)
@@ -1559,14 +1487,6 @@
 #define vshlltq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vshlltq_m_n_s16(__inactive, __a,  __imm, __p)
 #define vshlltq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vshlltq_m_n_u8(__inactive, __a,  __imm, __p)
 #define vshlltq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vshlltq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vshrnbq_m_n_s32(__a, __b,  __imm, __p) __arm_vshrnbq_m_n_s32(__a, __b,  __imm, __p)
-#define vshrnbq_m_n_s16(__a, __b,  __imm, __p) __arm_vshrnbq_m_n_s16(__a, __b,  __imm, __p)
-#define vshrnbq_m_n_u32(__a, __b,  __imm, __p) __arm_vshrnbq_m_n_u32(__a, __b,  __imm, __p)
-#define vshrnbq_m_n_u16(__a, __b,  __imm, __p) __arm_vshrnbq_m_n_u16(__a, __b,  __imm, __p)
-#define vshrntq_m_n_s32(__a, __b,  __imm, __p) __arm_vshrntq_m_n_s32(__a, __b,  __imm, __p)
-#define vshrntq_m_n_s16(__a, __b,  __imm, __p) __arm_vshrntq_m_n_s16(__a, __b,  __imm, __p)
-#define vshrntq_m_n_u32(__a, __b,  __imm, __p) __arm_vshrntq_m_n_u32(__a, __b,  __imm, __p)
-#define vshrntq_m_n_u16(__a, __b,  __imm, __p) __arm_vshrntq_m_n_u16(__a, __b,  __imm, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
 #define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
@@ -4525,34 +4445,6 @@ __arm_vbicq_m_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrnbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrnbq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrnbq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrnbq_n_uv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
@@ -6316,55 +6208,6 @@ __arm_vmvnq_m_n_s16 (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_sv8hi (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrntq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrnbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrntq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrnbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrntq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vshrnbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vshrntq_n_sv8hi (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
@@ -6512,55 +6355,6 @@ __arm_vqmovuntq_m_s16 (uint8x16_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vqmovuntq_m_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrntq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrnbq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrntq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrnbq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrntq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vshrnbq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vshrntq_n_uv8hi (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -6631,55 +6425,6 @@ __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_sv4si (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrntq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrnbq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrntq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrnbq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrntq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vshrnbq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vshrntq_n_sv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
@@ -6827,55 +6572,6 @@ __arm_vqmovuntq_m_s32 (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vqmovuntq_m_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrntq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrnbq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrntq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrnbq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vrshrntq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vshrnbq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vshrntq_n_uv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
@@ -8101,62 +7797,6 @@ __arm_vqdmulltq_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
   return __builtin_mve_vqdmulltq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrnbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrnbq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrntq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrntq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -8185,62 +7825,6 @@ __arm_vqrshruntq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pr
   return __builtin_mve_vqrshruntq_m_n_sv8hi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrnbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrnbq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrntq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrntq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -8304,62 +7888,6 @@ __arm_vrmlsldavhaxq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16
   return __builtin_mve_vrmlsldavhaxq_p_sv4si (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrnbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrnbq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrntq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrntq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshllbq_m_n_s8 (int16x8_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -8416,62 +7944,6 @@ __arm_vshlltq_m_n_u16 (uint32x4_t __inactive, uint16x8_t __a, const int __imm, m
   return __builtin_mve_vshlltq_m_n_uv8hi (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrnbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrnbq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrntq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrntq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -16926,34 +16398,6 @@ __arm_vbicq_m_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
  return __arm_vbicq_m_n_u32 (__a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqrshrnbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vqrshrnbq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqrshrnbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
-{
- return __arm_vqrshrnbq_n_u32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
@@ -18704,55 +18148,6 @@ __arm_vmvnq_m (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_s16 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqrshrntq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqshrnbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqshrntq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vrshrnbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vrshrntq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vshrnbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vshrntq_n_s16 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq (int64_t __a, int16x8_t __b, int16x8_t __c)
@@ -18900,55 +18295,6 @@ __arm_vqmovuntq_m (uint8x16_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vqmovuntq_m_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vqrshrntq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vqshrnbq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vqshrntq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vrshrnbq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vrshrntq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vshrnbq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vshrntq_n_u16 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -19019,55 +18365,6 @@ __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_s32 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqrshrntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshrnbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshrntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vrshrnbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vrshrntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vshrnbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vshrntq_n_s32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaldavaq (int64_t __a, int32x4_t __b, int32x4_t __c)
@@ -19152,116 +18449,67 @@ __arm_vmovntq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vmovntq_m_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqmovnbq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqmovnbq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqmovntq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqmovntq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_s16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshruntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
+__arm_vqmovnbq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
 {
- return __arm_vqmovunbq_m_s32 (__a, __b, __p);
+ return __arm_vqmovnbq_m_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqmovuntq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
+__arm_vqmovntq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
 {
- return __arm_vqmovuntq_m_s32 (__a, __b, __p);
+ return __arm_vqmovntq_m_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vrev32q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
 {
- return __arm_vqrshrntq_n_u32 (__a, __b, __imm);
+ return __arm_vrev32q_m_s16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
 {
- return __arm_vqshrnbq_n_u32 (__a, __b, __imm);
+ return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
 {
- return __arm_vqshrntq_n_u32 (__a, __b, __imm);
+ return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
 {
- return __arm_vrshrnbq_n_u32 (__a, __b, __imm);
+ return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
 {
- return __arm_vrshrntq_n_u32 (__a, __b, __imm);
+ return __arm_vqshruntq_n_s32 (__a, __b, __imm);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
 {
- return __arm_vshrnbq_n_u32 (__a, __b, __imm);
+ return __arm_vqmovunbq_m_s32 (__a, __b, __p);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
+__arm_vqmovuntq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
 {
- return __arm_vshrntq_n_u32 (__a, __b, __imm);
+ return __arm_vqmovuntq_m_s32 (__a, __b, __p);
 }
 
 __extension__ extern __inline uint64_t
@@ -20489,62 +19737,6 @@ __arm_vqdmulltq_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred1
  return __arm_vqdmulltq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrnbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrnbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrnbq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrnbq_m_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrntq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrntq_m_n_u16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -20573,62 +19765,6 @@ __arm_vqrshruntq_m (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t
  return __arm_vqrshruntq_m_n_s16 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrnbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrnbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrnbq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrnbq_m_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrntq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrntq_m_n_u16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -20692,62 +19828,6 @@ __arm_vrmlsldavhaxq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t _
  return __arm_vrmlsldavhaxq_p_s32 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrnbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrnbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrnbq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrnbq_m_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrntq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrntq_m_n_u16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshllbq_m (int16x8_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
@@ -20804,62 +19884,6 @@ __arm_vshlltq_m (uint32x4_t __inactive, uint16x8_t __a, const int __imm, mve_pre
  return __arm_vshlltq_m_n_u16 (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrnbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrnbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrnbq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrnbq_m_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrntq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrntq_m_n_u16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -26775,14 +25799,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27006,14 +26022,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovltq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovltq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
 
-#define __arm_vshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27350,14 +26358,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)));})
 
-#define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27370,22 +26370,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vqshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vqshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27420,14 +26404,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovuntq_m_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovuntq_m_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vqrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28568,14 +27544,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28885,22 +27853,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vmovntq_m_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vmovntq_m_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vrev32q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28921,36 +27873,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vqshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vqrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vqshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vqmovuntq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -29474,22 +28402,6 @@ extern void *__ARM_undef;
 
 #endif /* MVE Integer.  */
 
-#define __arm_vshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-
-#define __arm_vrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
 
 #define __arm_vmvnq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
@@ -29798,22 +28710,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vshllbq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vshllbq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
 
-#define __arm_vshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vshlltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -29822,14 +28718,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vshlltq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vshlltq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
 
-#define __arm_vrshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vqshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -29842,22 +28730,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
 
-#define __arm_vqrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vqrshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqrshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqrshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vqrshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -29870,30 +28742,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
 
-#define __arm_vqshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vqshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vqshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vqshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]: __arm_vrshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]: __arm_vrshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vmlaldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (15 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq " Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:03   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt Christophe Lyon
                   ` (5 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_rshift_narrow_unsigned shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc
	(binary_rshift_narrow_unsigned): New.
	* config/arm/arm-mve-builtins-shapes.h
	(binary_rshift_narrow_unsigned): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 48 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 49 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 88934e1ca15..e3bf586565c 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -664,6 +664,54 @@ struct binary_rshift_narrow_def : public overloaded_base<0>
 };
 SHAPE (binary_rshift_narrow)
 
+/* <uT0:half>_t vfoo[_n_t0](<uT0:half>_t, <T0>_t, const int)
+
+   Vector saturating rounding shift right and narrow.
+   Check that 'imm' is in the [1..#bits/2] range.
+
+   Example: vqshrunbq.
+   uint8x16_t [__arm_]vqshrunbq[_n_s16](uint8x16_t a, int16x8_t b, const int imm)
+   uint8x16_t [__arm_]vqshrunbq_m[_n_s16](uint8x16_t a, int16x8_t b, const int imm, mve_pred16_t p)  */
+struct binary_rshift_narrow_unsigned_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "vhu0,vhu0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| !r.require_integer_immediate (i))
+      return error_mark_node;
+
+    type_suffix_index narrow_suffix
+      = find_type_suffix (TYPE_unsigned,
+			  type_suffixes[type].element_bits / 2);
+
+    if (!r.require_matching_vector_type (0, narrow_suffix))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 1, bits / 2);
+  }
+
+};
+SHAPE (binary_rshift_narrow_unsigned)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index d72686d187b..ca1c1017e8e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -41,6 +41,7 @@ namespace arm_mve
     extern const function_shape *const binary_orrq;
     extern const function_shape *const binary_round_lshift;
     extern const function_shape *const binary_rshift_narrow;
+    extern const function_shape *const binary_rshift_narrow_unsigned;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (16 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:04   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq Christophe Lyon
                   ` (4 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqrshrunb, vqrshrunt, vqshrunb, vqshrunt so that they use
existing patterns.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_SHRN_N): Add VQRSHRUNBQ,
	VQRSHRUNTQ, VQSHRUNBQ, VQSHRUNTQ.
	(MVE_SHRN_M_N): Likewise.
	(mve_insn): Add vqrshrunb, vqrshrunt, vqshrunb, vqshrunt.
	(isu): Add VQRSHRUNBQ, VQRSHRUNTQ, VQSHRUNBQ, VQSHRUNTQ.
	(supf): Likewise.
	* config/arm/mve.md (mve_vqrshrunbq_n_s<mode>): Remove.
	(mve_vqrshruntq_n_s<mode>): Remove.
	(mve_vqshrunbq_n_s<mode>): Remove.
	(mve_vqshruntq_n_s<mode>): Remove.
	(mve_vqrshrunbq_m_n_s<mode>): Remove.
	(mve_vqrshruntq_m_n_s<mode>): Remove.
	(mve_vqshrunbq_m_n_s<mode>): Remove.
	(mve_vqshruntq_m_n_s<mode>): Remove.
---
 gcc/config/arm/iterators.md |  32 +++++++++
 gcc/config/arm/mve.md       | 140 +++---------------------------------
 2 files changed, 40 insertions(+), 132 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index d64c924a513..583206dac9e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -471,8 +471,12 @@ (define_int_iterator MVE_RSHIFT_N   [
 (define_int_iterator MVE_SHRN_N [
 		     VQRSHRNBQ_N_S VQRSHRNBQ_N_U
 		     VQRSHRNTQ_N_S VQRSHRNTQ_N_U
+		     VQRSHRUNBQ_N_S
+		     VQRSHRUNTQ_N_S
 		     VQSHRNBQ_N_S VQSHRNBQ_N_U
 		     VQSHRNTQ_N_S VQSHRNTQ_N_U
+		     VQSHRUNBQ_N_S
+		     VQSHRUNTQ_N_S
 		     VRSHRNBQ_N_S VRSHRNBQ_N_U
 		     VRSHRNTQ_N_S VRSHRNTQ_N_U
 		     VSHRNBQ_N_S VSHRNBQ_N_U
@@ -482,8 +486,12 @@ (define_int_iterator MVE_SHRN_N [
 (define_int_iterator MVE_SHRN_M_N [
 		     VQRSHRNBQ_M_N_S VQRSHRNBQ_M_N_U
 		     VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U
+		     VQRSHRUNBQ_M_N_S
+		     VQRSHRUNTQ_M_N_S
 		     VQSHRNBQ_M_N_S VQSHRNBQ_M_N_U
 		     VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U
+		     VQSHRUNBQ_M_N_S
+		     VQSHRUNTQ_M_N_S
 		     VRSHRNBQ_M_N_S VRSHRNBQ_M_N_U
 		     VRSHRNTQ_M_N_S VRSHRNTQ_M_N_U
 		     VSHRNBQ_M_N_S VSHRNBQ_M_N_U
@@ -594,6 +602,10 @@ (define_int_attr mve_insn [
 		 (VQRSHRNBQ_N_S "vqrshrnb") (VQRSHRNBQ_N_U "vqrshrnb")
 		 (VQRSHRNTQ_M_N_S "vqrshrnt") (VQRSHRNTQ_M_N_U "vqrshrnt")
 		 (VQRSHRNTQ_N_S "vqrshrnt") (VQRSHRNTQ_N_U "vqrshrnt")
+		 (VQRSHRUNBQ_M_N_S "vqrshrunb")
+		 (VQRSHRUNBQ_N_S "vqrshrunb")
+		 (VQRSHRUNTQ_M_N_S "vqrshrunt")
+		 (VQRSHRUNTQ_N_S "vqrshrunt")
 		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
 		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
 		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
@@ -604,6 +616,10 @@ (define_int_attr mve_insn [
 		 (VQSHRNBQ_N_S "vqshrnb") (VQSHRNBQ_N_U "vqshrnb")
 		 (VQSHRNTQ_M_N_S "vqshrnt") (VQSHRNTQ_M_N_U "vqshrnt")
 		 (VQSHRNTQ_N_S "vqshrnt") (VQSHRNTQ_N_U "vqshrnt")
+		 (VQSHRUNBQ_M_N_S "vqshrunb")
+		 (VQSHRUNBQ_N_S "vqshrunb")
+		 (VQSHRUNTQ_M_N_S "vqshrunt")
+		 (VQSHRUNTQ_N_S "vqshrunt")
 		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
 		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
 		 (VQSUBQ_N_S "vqsub") (VQSUBQ_N_U "vqsub")
@@ -640,10 +656,18 @@ (define_int_attr isu    [
 		 (VQRSHRNBQ_N_S "s") (VQRSHRNBQ_N_U "u")
 		 (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
 		 (VQRSHRNTQ_N_S "s") (VQRSHRNTQ_N_U "u")
+		 (VQRSHRUNBQ_M_N_S "s")
+		 (VQRSHRUNBQ_N_S "s")
+		 (VQRSHRUNTQ_M_N_S "s")
+		 (VQRSHRUNTQ_N_S "s")
 		 (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
 		 (VQSHRNBQ_N_S "s") (VQSHRNBQ_N_U "u")
 		 (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
 		 (VQSHRNTQ_N_S "s") (VQSHRNTQ_N_U "u")
+		 (VQSHRUNBQ_M_N_S "s")
+		 (VQSHRUNBQ_N_S "s")
+		 (VQSHRUNTQ_M_N_S "s")
+		 (VQSHRUNTQ_N_S "s")
 		 (VRSHRNBQ_M_N_S "i") (VRSHRNBQ_M_N_U "i")
 		 (VRSHRNBQ_N_S "i") (VRSHRNBQ_N_U "i")
 		 (VRSHRNTQ_M_N_S "i") (VRSHRNTQ_M_N_U "i")
@@ -1816,6 +1840,14 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQRDMULHQ_M_N_S "s")
 		       (VQDMULHQ_S "s")
 		       (VQRDMULHQ_S "s")
+		       (VQRSHRUNBQ_M_N_S "s")
+		       (VQRSHRUNBQ_N_S "s")
+		       (VQRSHRUNTQ_M_N_S "s")
+		       (VQRSHRUNTQ_N_S "s")
+		       (VQSHRUNBQ_M_N_S "s")
+		       (VQSHRUNBQ_N_S "s")
+		       (VQSHRUNTQ_M_N_S "s")
+		       (VQSHRUNTQ_N_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index d64a075c7bb..20ce7ecb3d6 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2166,8 +2166,12 @@ (define_insn "mve_vcvtq_m_to_f_<supf><mode>"
 ;;
 ;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
 ;; [vqrshrntq_n_u, vqrshrntq_n_s]
+;; [vqrshrunbq_n_s]
+;; [vqrshruntq_n_s]
 ;; [vqshrnbq_n_u, vqshrnbq_n_s]
 ;; [vqshrntq_n_u, vqshrntq_n_s]
+;; [vqshrunbq_n_s]
+;; [vqshruntq_n_s]
 ;; [vrshrnbq_n_s, vrshrnbq_n_u]
 ;; [vrshrntq_n_u, vrshrntq_n_s]
 ;; [vshrnbq_n_u, vshrnbq_n_s]
@@ -2186,22 +2190,6 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqrshrunbq_n_s])
-;;
-(define_insn "mve_vqrshrunbq_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-				 (match_operand:MVE_5 2 "s_register_operand" "w")
-				 (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQRSHRUNBQ_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrshrunb.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vrmlaldavhaq_s vrmlaldavhaq_u])
 ;;
@@ -4002,54 +3990,6 @@ (define_insn "mve_vqmovuntq_m_s<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vqrshruntq_n_s])
-;;
-(define_insn "mve_vqrshruntq_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQRSHRUNTQ_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrshrunt.s%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqshrunbq_n_s])
-;;
-(define_insn "mve_vqshrunbq_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQSHRUNBQ_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqshrunb.s%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqshruntq_n_s])
-;;
-(define_insn "mve_vqshruntq_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")]
-	 VQSHRUNTQ_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqshrunt.s%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vrev32q_m_f])
 ;;
@@ -4892,8 +4832,12 @@ (define_insn "mve_vmlaldavaxq_p_<supf><mode>"
 ;;
 ;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s]
 ;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u]
+;; [vqrshrunbq_m_n_s]
+;; [vqrshruntq_m_n_s]
 ;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s]
 ;; [vqshrntq_m_n_s, vqshrntq_m_n_u]
+;; [vqshrunbq_m_n_s]
+;; [vqshruntq_m_n_s]
 ;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s]
 ;; [vrshrntq_m_n_u, vrshrntq_m_n_s]
 ;; [vshrnbq_m_n_s, vshrnbq_m_n_u]
@@ -5100,74 +5044,6 @@ (define_insn "mve_vqdmulltq_m_s<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vqrshrunbq_m_n_s])
-;;
-(define_insn "mve_vqrshrunbq_m_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQRSHRUNBQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqrshrunbt.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqrshruntq_m_n_s])
-;;
-(define_insn "mve_vqrshruntq_m_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQRSHRUNTQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqrshruntt.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqshrunbq_m_n_s])
-;;
-(define_insn "mve_vqshrunbq_m_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHRUNBQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqshrunbt.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqshruntq_m_n_s])
-;;
-(define_insn "mve_vqshruntq_m_n_s<mode>"
-  [
-   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
-	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHRUNTQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqshruntt.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vrmlaldavhaq_p_u])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (17 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:05   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape Christophe Lyon
                   ` (3 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqrshrunbq, vqrshruntq, vqshrunbq, vqshruntq using the new
MVE builtins framework.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_ONLY_N_NO_U_F): New.
	(vqshrunbq, vqshruntq, vqrshrunbq, vqrshruntq): New.
	* config/arm/arm-mve-builtins-base.def (vqshrunbq, vqshruntq)
	(vqrshrunbq, vqrshruntq): New.
	* config/arm/arm-mve-builtins-base.h (vqshrunbq, vqshruntq)
	(vqrshrunbq, vqrshruntq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vqshrunbq,
	vqshruntq, vqrshrunbq, vqrshruntq.
	* config/arm/arm_mve.h (vqrshrunbq): Remove.
	(vqrshruntq): Remove.
	(vqrshrunbq_m): Remove.
	(vqrshruntq_m): Remove.
	(vqrshrunbq_n_s16): Remove.
	(vqrshrunbq_n_s32): Remove.
	(vqrshruntq_n_s16): Remove.
	(vqrshruntq_n_s32): Remove.
	(vqrshrunbq_m_n_s32): Remove.
	(vqrshrunbq_m_n_s16): Remove.
	(vqrshruntq_m_n_s32): Remove.
	(vqrshruntq_m_n_s16): Remove.
	(__arm_vqrshrunbq_n_s16): Remove.
	(__arm_vqrshrunbq_n_s32): Remove.
	(__arm_vqrshruntq_n_s16): Remove.
	(__arm_vqrshruntq_n_s32): Remove.
	(__arm_vqrshrunbq_m_n_s32): Remove.
	(__arm_vqrshrunbq_m_n_s16): Remove.
	(__arm_vqrshruntq_m_n_s32): Remove.
	(__arm_vqrshruntq_m_n_s16): Remove.
	(__arm_vqrshrunbq): Remove.
	(__arm_vqrshruntq): Remove.
	(__arm_vqrshrunbq_m): Remove.
	(__arm_vqrshruntq_m): Remove.
	(vqshrunbq): Remove.
	(vqshruntq): Remove.
	(vqshrunbq_m): Remove.
	(vqshruntq_m): Remove.
	(vqshrunbq_n_s16): Remove.
	(vqshruntq_n_s16): Remove.
	(vqshrunbq_n_s32): Remove.
	(vqshruntq_n_s32): Remove.
	(vqshrunbq_m_n_s32): Remove.
	(vqshrunbq_m_n_s16): Remove.
	(vqshruntq_m_n_s32): Remove.
	(vqshruntq_m_n_s16): Remove.
	(__arm_vqshrunbq_n_s16): Remove.
	(__arm_vqshruntq_n_s16): Remove.
	(__arm_vqshrunbq_n_s32): Remove.
	(__arm_vqshruntq_n_s32): Remove.
	(__arm_vqshrunbq_m_n_s32): Remove.
	(__arm_vqshrunbq_m_n_s16): Remove.
	(__arm_vqshruntq_m_n_s32): Remove.
	(__arm_vqshruntq_m_n_s16): Remove.
	(__arm_vqshrunbq): Remove.
	(__arm_vqshruntq): Remove.
	(__arm_vqshrunbq_m): Remove.
	(__arm_vqshruntq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  13 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm-mve-builtins.cc       |   4 +
 gcc/config/arm/arm_mve.h                 | 320 -----------------------
 5 files changed, 25 insertions(+), 320 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index c95abe70239..e7d2e0abffc 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -184,6 +184,15 @@ namespace arm_mve {
     -1, -1, -1,								\
     UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
 
+  /* Helper for builtins with only unspec codes, _m predicated
+     overrides, only _n version, no unsigned, no floating-point.  */
+#define FUNCTION_ONLY_N_NO_U_F(NAME, UNSPEC) FUNCTION			\
+  (NAME, unspec_mve_function_exact_insn,				\
+   (-1, -1, -1,								\
+    UNSPEC##_N_S, -1, -1,						\
+    -1, -1, -1,								\
+    UNSPEC##_M_N_S, -1, -1))
+
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
@@ -203,8 +212,12 @@ FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
 FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
 FUNCTION_ONLY_N_NO_F (vqrshrnbq, VQRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vqrshrntq, VQRSHRNTQ)
+FUNCTION_ONLY_N_NO_U_F (vqrshrunbq, VQRSHRUNBQ)
+FUNCTION_ONLY_N_NO_U_F (vqrshruntq, VQRSHRUNTQ)
 FUNCTION_ONLY_N_NO_F (vqshrnbq, VQSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vqshrntq, VQSHRNTQ)
+FUNCTION_ONLY_N_NO_U_F (vqshrunbq, VQSHRUNBQ)
+FUNCTION_ONLY_N_NO_U_F (vqshruntq, VQSHRUNTQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 3dd40086663..50cb2d055e9 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -36,10 +36,14 @@ DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqrshrunbq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqrshruntq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqshrunbq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqshruntq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 9e11ac83681..fcac772bc5b 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -41,9 +41,13 @@ extern const function_base *const vqrdmulhq;
 extern const function_base *const vqrshlq;
 extern const function_base *const vqrshrnbq;
 extern const function_base *const vqrshrntq;
+extern const function_base *const vqrshrunbq;
+extern const function_base *const vqrshruntq;
 extern const function_base *const vqshlq;
 extern const function_base *const vqshrnbq;
 extern const function_base *const vqshrntq;
+extern const function_base *const vqshrunbq;
+extern const function_base *const vqshruntq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
 extern const function_base *const vrhaddq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 667bbc58483..4fc6160a794 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -674,8 +674,12 @@ function_instance::has_inactive_argument () const
       || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
       || base == functions::vqrshrnbq
       || base == functions::vqrshrntq
+      || base == functions::vqrshrunbq
+      || base == functions::vqrshruntq
       || base == functions::vqshrnbq
       || base == functions::vqshrntq
+      || base == functions::vqshrunbq
+      || base == functions::vqshruntq
       || (base == functions::vrshlq && mode_suffix_id == MODE_n)
       || base == functions::vrshrnbq
       || base == functions::vrshrntq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ed7852e2460..b2701f1135d 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -113,7 +113,6 @@
 #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
 #define vabavq(__a, __b, __c) __arm_vabavq(__a, __b, __c)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
-#define vqrshrunbq(__a, __b, __imm) __arm_vqrshrunbq(__a, __b, __imm)
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
@@ -190,9 +189,6 @@
 #define vqmovnbq_m(__a, __b, __p) __arm_vqmovnbq_m(__a, __b, __p)
 #define vqmovntq_m(__a, __b, __p) __arm_vqmovntq_m(__a, __b, __p)
 #define vrev32q_m(__inactive, __a, __p) __arm_vrev32q_m(__inactive, __a, __p)
-#define vqrshruntq(__a, __b, __imm) __arm_vqrshruntq(__a, __b, __imm)
-#define vqshrunbq(__a, __b, __imm) __arm_vqshrunbq(__a, __b, __imm)
-#define vqshruntq(__a, __b, __imm) __arm_vqshruntq(__a, __b, __imm)
 #define vqmovunbq_m(__a, __b, __p) __arm_vqmovunbq_m(__a, __b, __p)
 #define vqmovuntq_m(__a, __b, __p) __arm_vqmovuntq_m(__a, __b, __p)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
@@ -236,10 +232,6 @@
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
 #define vqdmullbq_m(__inactive, __a, __b, __p) __arm_vqdmullbq_m(__inactive, __a, __b, __p)
 #define vqdmulltq_m(__inactive, __a, __b, __p) __arm_vqdmulltq_m(__inactive, __a, __b, __p)
-#define vqrshrunbq_m(__a, __b, __imm, __p) __arm_vqrshrunbq_m(__a, __b, __imm, __p)
-#define vqrshruntq_m(__a, __b, __imm, __p) __arm_vqrshruntq_m(__a, __b, __imm, __p)
-#define vqshrunbq_m(__a, __b, __imm, __p) __arm_vqshrunbq_m(__a, __b, __imm, __p)
-#define vqshruntq_m(__a, __b, __imm, __p) __arm_vqshruntq_m(__a, __b, __imm, __p)
 #define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a, __b, __c, __p)
 #define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a, __b, __c, __p)
 #define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a, __b, __c, __p)
@@ -889,8 +881,6 @@
 #define vcvtq_m_f16_u16(__inactive, __a, __p) __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
 #define vcvtq_m_f32_s32(__inactive, __a, __p) __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
 #define vcvtq_m_f32_u32(__inactive, __a, __p) __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
-#define vqrshrunbq_n_s16(__a, __b,  __imm) __arm_vqrshrunbq_n_s16(__a, __b,  __imm)
-#define vqrshrunbq_n_s32(__a, __b,  __imm) __arm_vqrshrunbq_n_s32(__a, __b,  __imm)
 #define vrmlaldavhaq_s32(__a, __b, __c) __arm_vrmlaldavhaq_s32(__a, __b, __c)
 #define vrmlaldavhaq_u32(__a, __b, __c) __arm_vrmlaldavhaq_u32(__a, __b, __c)
 #define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
@@ -1203,9 +1193,6 @@
 #define vcmpneq_m_f16(__a, __b, __p) __arm_vcmpneq_m_f16(__a, __b, __p)
 #define vcmpneq_m_n_f16(__a, __b, __p) __arm_vcmpneq_m_n_f16(__a, __b, __p)
 #define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
-#define vqrshruntq_n_s16(__a, __b,  __imm) __arm_vqrshruntq_n_s16(__a, __b,  __imm)
-#define vqshrunbq_n_s16(__a, __b,  __imm) __arm_vqshrunbq_n_s16(__a, __b,  __imm)
-#define vqshruntq_n_s16(__a, __b,  __imm) __arm_vqshruntq_n_s16(__a, __b,  __imm)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
@@ -1278,9 +1265,6 @@
 #define vcmpneq_m_f32(__a, __b, __p) __arm_vcmpneq_m_f32(__a, __b, __p)
 #define vcmpneq_m_n_f32(__a, __b, __p) __arm_vcmpneq_m_n_f32(__a, __b, __p)
 #define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
-#define vqrshruntq_n_s32(__a, __b,  __imm) __arm_vqrshruntq_n_s32(__a, __b,  __imm)
-#define vqshrunbq_n_s32(__a, __b,  __imm) __arm_vqshrunbq_n_s32(__a, __b,  __imm)
-#define vqshruntq_n_s32(__a, __b,  __imm) __arm_vqshruntq_n_s32(__a, __b,  __imm)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
@@ -1466,14 +1450,6 @@
 #define vqdmulltq_m_n_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
 #define vqdmulltq_m_s32(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s32(__inactive, __a, __b, __p)
 #define vqdmulltq_m_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s16(__inactive, __a, __b, __p)
-#define vqrshrunbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
-#define vqrshrunbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
-#define vqrshruntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
-#define vqrshruntq_m_n_s16(__a, __b,  __imm, __p) __arm_vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
-#define vqshrunbq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
-#define vqshrunbq_m_n_s16(__a, __b,  __imm, __p) __arm_vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
-#define vqshruntq_m_n_s32(__a, __b,  __imm, __p) __arm_vqshruntq_m_n_s32(__a, __b,  __imm, __p)
-#define vqshruntq_m_n_s16(__a, __b,  __imm, __p) __arm_vqshruntq_m_n_s16(__a, __b,  __imm, __p)
 #define vrmlaldavhaq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_s32(__a, __b, __c, __p)
 #define vrmlaldavhaq_p_u32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_u32(__a, __b, __c, __p)
 #define vrmlaldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
@@ -4445,20 +4421,6 @@ __arm_vbicq_m_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrunbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshrunbq_n_sv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
@@ -6320,27 +6282,6 @@ __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_uv8hi (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshruntq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrunbq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vqshruntq_n_sv8hi (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovunbq_m_s16 (uint8x16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -6537,27 +6478,6 @@ __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_uv4si (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqrshruntq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshrunbq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vqshruntq_n_sv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovunbq_m_s32 (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -7797,62 +7717,6 @@ __arm_vqdmulltq_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
   return __builtin_mve_vqdmulltq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrunbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshrunbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshruntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrshruntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrunbq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshrunbq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshruntq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshruntq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -16398,20 +16262,6 @@ __arm_vbicq_m_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
  return __arm_vbicq_m_n_u32 (__a, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqrshrunbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqrshrunbq_n_s32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq (int64_t __a, int32x4_t __b, int32x4_t __c)
@@ -18260,27 +18110,6 @@ __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_u16 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqrshruntq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqshrunbq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq (uint8x16_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vqshruntq_n_s16 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovunbq_m (uint8x16_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -18477,27 +18306,6 @@ __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vqshruntq_n_s32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -19737,62 +19545,6 @@ __arm_vqdmulltq_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred1
  return __arm_vqdmulltq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrunbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshrunbq_m (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshrunbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshruntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrshruntq_m (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqrshruntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrunbq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshrunbq_m (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshrunbq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_m (uint16x8_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshruntq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshruntq_m (uint8x16_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshruntq_m_n_s16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -25799,12 +25551,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -26364,18 +26110,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vqshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
-#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vqmovnbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -26404,12 +26138,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqmovuntq_m_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqmovuntq_m_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vnegq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -27544,12 +27272,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vqrdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -27861,24 +27583,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev32q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev32q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
 
-#define __arm_vqshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vqmovuntq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28718,30 +28428,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vshlltq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vshlltq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
 
-#define __arm_vqshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-
-#define __arm_vqshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-
-#define __arm_vqrshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-
-#define __arm_vqrshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqrshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqrshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-
 #define __arm_vmlaldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -28831,12 +28517,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce1(__p1, int) , p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce1(__p1, int) , p2));})
 
-#define __arm_vqshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]: __arm_vqshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]: __arm_vqshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vqshluq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (18 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:05   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq Christophe Lyon
                   ` (2 subsequent siblings)
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_rshift shape description.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_rshift): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_rshift): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 36 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 37 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index e3bf586565c..7078f7d7220 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -365,6 +365,42 @@ struct binary_def : public overloaded_base<0>
 };
 SHAPE (binary)
 
+/* <T0>_t vfoo[_n_t0](<T0>_t, const int)
+
+   Shape for vector shift right operations that take a vector first
+   argument and an integer, and produce a vector.
+
+   Check that 'imm' is in the [1..#bits] range.
+
+   Example: vrshrq.
+   int8x16_t [__arm_]vrshrq[_n_s8](int8x16_t a, const int imm)
+   int8x16_t [__arm_]vrshrq_m[_n_s8](int8x16_t inactive, int8x16_t a, const int imm, mve_pred16_t p)
+   int8x16_t [__arm_]vrshrq_x[_n_s8](int8x16_t a, const int imm, mve_pred16_t p)  */
+struct binary_rshift_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1, 1);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 1, bits);
+  }
+};
+SHAPE (binary_rshift)
+
 /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t)
    <T0>_t vfoo[_n_t0](<T0>_t, <S0>_t)
 
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index ca1c1017e8e..09e00b69e63 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -40,6 +40,7 @@ namespace arm_mve
     extern const function_shape *const binary_opt_n;
     extern const function_shape *const binary_orrq;
     extern const function_shape *const binary_round_lshift;
+    extern const function_shape *const binary_rshift;
     extern const function_shape *const binary_rshift_narrow;
     extern const function_shape *const binary_rshift_narrow_unsigned;
     extern const function_shape *const create;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (19 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:06   ` Kyrylo Tkachov
  2023-05-05  8:39 ` [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq Christophe Lyon
  2023-05-05  9:55 ` [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Kyrylo Tkachov
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vsrhrq vrshrq so that they use the same pattern.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VSHRQ_M_N, MVE_VSHRQ_N): New.
	(mve_insn): Add vrshr, vshr.
	* config/arm/mve.md (mve_vshrq_n_<supf><mode>)
	(mve_vrshrq_n_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vrshrq_m_n_<supf><mode>, mve_vshrq_m_n_<supf><mode>): Merge
	into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 14 +++++++++++
 gcc/config/arm/mve.md       | 46 +++++++------------------------------
 2 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 583206dac9e..53873704174 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -408,6 +408,16 @@ (define_int_iterator MVE_INT_N_BINARY   [
 		     VSUBQ_N_S VSUBQ_N_U
 		     ])
 
+(define_int_iterator MVE_VSHRQ_M_N [
+		     VRSHRQ_M_N_S VRSHRQ_M_N_U
+		     VSHRQ_M_N_S VSHRQ_M_N_U
+		     ])
+
+(define_int_iterator MVE_VSHRQ_N [
+		     VRSHRQ_N_S VRSHRQ_N_U
+		     VSHRQ_N_S VSHRQ_N_U
+		     ])
+
 (define_int_iterator MVE_INT_SU_N_BINARY   [
 		     VHADDQ_N_S VHADDQ_N_U
 		     VHSUBQ_N_S VHSUBQ_N_U
@@ -636,6 +646,8 @@ (define_int_attr mve_insn [
 		 (VRSHRNBQ_N_S "vrshrnb") (VRSHRNBQ_N_U "vrshrnb")
 		 (VRSHRNTQ_M_N_S "vrshrnt") (VRSHRNTQ_M_N_U "vrshrnt")
 		 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
+		 (VRSHRQ_M_N_S "vrshr") (VRSHRQ_M_N_U "vrshr")
+		 (VRSHRQ_N_S "vrshr") (VRSHRQ_N_U "vrshr")
 		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
 		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
 		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
@@ -646,6 +658,8 @@ (define_int_attr mve_insn [
 		 (VSHRNBQ_N_S "vshrnb") (VSHRNBQ_N_U "vshrnb")
 		 (VSHRNTQ_M_N_S "vshrnt") (VSHRNTQ_M_N_U "vshrnt")
 		 (VSHRNTQ_N_S "vshrnt") (VSHRNTQ_N_U "vshrnt")
+		 (VSHRQ_M_N_S "vshr") (VSHRQ_M_N_U "vshr")
+		 (VSHRQ_N_S "vshr") (VSHRQ_N_U "vshr")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 20ce7ecb3d6..b5c89fd4105 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -728,18 +728,19 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vshrq_n_s, vshrq_n_u])
+;; [vrshrq_n_s, vrshrq_n_u]
+;; [vshrq_n_s, vshrq_n_u]
 ;;
 ;; Version that takes an immediate as operand 2.
-(define_insn "mve_vshrq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:SI 2 "<MVE_pred2>" "<MVE_constraint2>")]
-	 VSHRQ_N))
+	 MVE_VSHRQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vshr.<supf><V_sz_elem>\t%q0, %q1, %2"
+  "<mve_insn>.<supf><V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1401,21 +1402,6 @@ (define_insn "mve_vqshluq_n_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vrshrq_n_s, vrshrq_n_u])
-;;
-(define_insn "mve_vrshrq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:SI 2 "<MVE_pred2>" "<MVE_constraint2>")]
-	 VRSHRQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrshr.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vabdq_f]
 ;;
@@ -4661,35 +4647,19 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
 
 ;;
 ;; [vrshrq_m_n_s, vrshrq_m_n_u])
-;;
-(define_insn "mve_vrshrq_m_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRSHRQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrshrt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vshrq_m_n_s, vshrq_m_n_u])
 ;;
-(define_insn "mve_vshrq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VSHRQ_M_N))
+	 MVE_VSHRQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vshrt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (20 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq Christophe Lyon
@ 2023-05-05  8:39 ` Christophe Lyon
  2023-05-05 11:07   ` Kyrylo Tkachov
  2023-05-05  9:55 ` [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Kyrylo Tkachov
  22 siblings, 1 reply; 46+ messages in thread
From: Christophe Lyon @ 2023-05-05  8:39 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vshrq and vrshrq using the new MVE builtins framework.

2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vrshrq, vshrq): New.
	* config/arm/arm-mve-builtins-base.def (vrshrq, vshrq): New.
	* config/arm/arm-mve-builtins-base.h (vrshrq, vshrq): New.
	* config/arm/arm_mve.h (vshrq): Remove.
	(vrshrq): Remove.
	(vrshrq_m): Remove.
	(vshrq_m): Remove.
	(vrshrq_x): Remove.
	(vshrq_x): Remove.
	(vshrq_n_s8): Remove.
	(vshrq_n_s16): Remove.
	(vshrq_n_s32): Remove.
	(vshrq_n_u8): Remove.
	(vshrq_n_u16): Remove.
	(vshrq_n_u32): Remove.
	(vrshrq_n_u8): Remove.
	(vrshrq_n_s8): Remove.
	(vrshrq_n_u16): Remove.
	(vrshrq_n_s16): Remove.
	(vrshrq_n_u32): Remove.
	(vrshrq_n_s32): Remove.
	(vrshrq_m_n_s8): Remove.
	(vrshrq_m_n_s32): Remove.
	(vrshrq_m_n_s16): Remove.
	(vrshrq_m_n_u8): Remove.
	(vrshrq_m_n_u32): Remove.
	(vrshrq_m_n_u16): Remove.
	(vshrq_m_n_s8): Remove.
	(vshrq_m_n_s32): Remove.
	(vshrq_m_n_s16): Remove.
	(vshrq_m_n_u8): Remove.
	(vshrq_m_n_u32): Remove.
	(vshrq_m_n_u16): Remove.
	(vrshrq_x_n_s8): Remove.
	(vrshrq_x_n_s16): Remove.
	(vrshrq_x_n_s32): Remove.
	(vrshrq_x_n_u8): Remove.
	(vrshrq_x_n_u16): Remove.
	(vrshrq_x_n_u32): Remove.
	(vshrq_x_n_s8): Remove.
	(vshrq_x_n_s16): Remove.
	(vshrq_x_n_s32): Remove.
	(vshrq_x_n_u8): Remove.
	(vshrq_x_n_u16): Remove.
	(vshrq_x_n_u32): Remove.
	(__arm_vshrq_n_s8): Remove.
	(__arm_vshrq_n_s16): Remove.
	(__arm_vshrq_n_s32): Remove.
	(__arm_vshrq_n_u8): Remove.
	(__arm_vshrq_n_u16): Remove.
	(__arm_vshrq_n_u32): Remove.
	(__arm_vrshrq_n_u8): Remove.
	(__arm_vrshrq_n_s8): Remove.
	(__arm_vrshrq_n_u16): Remove.
	(__arm_vrshrq_n_s16): Remove.
	(__arm_vrshrq_n_u32): Remove.
	(__arm_vrshrq_n_s32): Remove.
	(__arm_vrshrq_m_n_s8): Remove.
	(__arm_vrshrq_m_n_s32): Remove.
	(__arm_vrshrq_m_n_s16): Remove.
	(__arm_vrshrq_m_n_u8): Remove.
	(__arm_vrshrq_m_n_u32): Remove.
	(__arm_vrshrq_m_n_u16): Remove.
	(__arm_vshrq_m_n_s8): Remove.
	(__arm_vshrq_m_n_s32): Remove.
	(__arm_vshrq_m_n_s16): Remove.
	(__arm_vshrq_m_n_u8): Remove.
	(__arm_vshrq_m_n_u32): Remove.
	(__arm_vshrq_m_n_u16): Remove.
	(__arm_vrshrq_x_n_s8): Remove.
	(__arm_vrshrq_x_n_s16): Remove.
	(__arm_vrshrq_x_n_s32): Remove.
	(__arm_vrshrq_x_n_u8): Remove.
	(__arm_vrshrq_x_n_u16): Remove.
	(__arm_vrshrq_x_n_u32): Remove.
	(__arm_vshrq_x_n_s8): Remove.
	(__arm_vshrq_x_n_s16): Remove.
	(__arm_vshrq_x_n_s32): Remove.
	(__arm_vshrq_x_n_u8): Remove.
	(__arm_vshrq_x_n_u16): Remove.
	(__arm_vshrq_x_n_u32): Remove.
	(__arm_vshrq): Remove.
	(__arm_vrshrq): Remove.
	(__arm_vrshrq_m): Remove.
	(__arm_vshrq_m): Remove.
	(__arm_vrshrq_x): Remove.
	(__arm_vshrq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 628 -----------------------
 4 files changed, 6 insertions(+), 628 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index e7d2e0abffc..bb585a3921f 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -225,9 +225,11 @@ FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
 FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
 FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
+FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
 FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
 FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
+FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 50cb2d055e9..33c95c02396 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -51,10 +51,12 @@ DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r" forms do not support the "x" predicate
 DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
+DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index fcac772bc5b..2a230f5f34d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -55,9 +55,11 @@ extern const function_base *const vrmulhq;
 extern const function_base *const vrshlq;
 extern const function_base *const vrshrnbq;
 extern const function_base *const vrshrntq;
+extern const function_base *const vrshrq;
 extern const function_base *const vshlq;
 extern const function_base *const vshrnbq;
 extern const function_base *const vshrntq;
+extern const function_base *const vshrq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index b2701f1135d..89de7e0e46b 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -57,7 +57,6 @@
 #define vrev64q(__a) __arm_vrev64q(__a)
 #define vqabsq(__a) __arm_vqabsq(__a)
 #define vqnegq(__a) __arm_vqnegq(__a)
-#define vshrq(__a, __imm) __arm_vshrq(__a, __imm)
 #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
 #define vcmpneq(__a, __b) __arm_vcmpneq(__a, __b)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
@@ -79,7 +78,6 @@
 #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
 #define vmaxaq(__a, __b) __arm_vmaxaq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
-#define vrshrq(__a, __imm) __arm_vrshrq(__a, __imm)
 #define vcmpltq(__a, __b) __arm_vcmpltq(__a, __b)
 #define vcmpleq(__a, __b) __arm_vcmpleq(__a, __b)
 #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
@@ -221,8 +219,6 @@
 #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b, __c, __p)
 #define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
-#define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive, __a, __imm, __p)
-#define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive, __a, __imm, __p)
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmlaldavaq_p(__a, __b, __c, __p) __arm_vmlaldavaq_p(__a, __b, __c, __p)
 #define vmlaldavaxq_p(__a, __b, __c, __p) __arm_vmlaldavaxq_p(__a, __b, __c, __p)
@@ -334,8 +330,6 @@
 #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
 #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
 #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
-#define vrshrq_x(__a, __imm, __p) __arm_vrshrq_x(__a, __imm, __p)
-#define vshrq_x(__a, __imm, __p) __arm_vshrq_x(__a, __imm, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
 #define vadciq_m(__inactive, __a, __b, __carry_out, __p) __arm_vadciq_m(__inactive, __a, __b, __carry_out, __p)
 #define vadcq(__a, __b, __carry) __arm_vadcq(__a, __b, __carry)
@@ -565,12 +559,6 @@
 #define vcvtq_n_s32_f32(__a,  __imm6) __arm_vcvtq_n_s32_f32(__a,  __imm6)
 #define vcvtq_n_u16_f16(__a,  __imm6) __arm_vcvtq_n_u16_f16(__a,  __imm6)
 #define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,  __imm6)
-#define vshrq_n_s8(__a,  __imm) __arm_vshrq_n_s8(__a,  __imm)
-#define vshrq_n_s16(__a,  __imm) __arm_vshrq_n_s16(__a,  __imm)
-#define vshrq_n_s32(__a,  __imm) __arm_vshrq_n_s32(__a,  __imm)
-#define vshrq_n_u8(__a,  __imm) __arm_vshrq_n_u8(__a,  __imm)
-#define vshrq_n_u16(__a,  __imm) __arm_vshrq_n_u16(__a,  __imm)
-#define vshrq_n_u32(__a,  __imm) __arm_vshrq_n_u32(__a,  __imm)
 #define vaddlvq_p_s32(__a, __p) __arm_vaddlvq_p_s32(__a, __p)
 #define vaddlvq_p_u32(__a, __p) __arm_vaddlvq_p_u32(__a, __p)
 #define vcmpneq_s8(__a, __b) __arm_vcmpneq_s8(__a, __b)
@@ -602,7 +590,6 @@
 #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
 #define vmaxaq_s8(__a, __b) __arm_vmaxaq_s8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
-#define vrshrq_n_u8(__a,  __imm) __arm_vrshrq_n_u8(__a,  __imm)
 #define vcmpneq_n_s8(__a, __b) __arm_vcmpneq_n_s8(__a, __b)
 #define vcmpltq_s8(__a, __b) __arm_vcmpltq_s8(__a, __b)
 #define vcmpltq_n_s8(__a, __b) __arm_vcmpltq_n_s8(__a, __b)
@@ -632,7 +619,6 @@
 #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
 #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
-#define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
@@ -656,7 +642,6 @@
 #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
 #define vmaxaq_s16(__a, __b) __arm_vmaxaq_s16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
-#define vrshrq_n_u16(__a,  __imm) __arm_vrshrq_n_u16(__a,  __imm)
 #define vcmpneq_n_s16(__a, __b) __arm_vcmpneq_n_s16(__a, __b)
 #define vcmpltq_s16(__a, __b) __arm_vcmpltq_s16(__a, __b)
 #define vcmpltq_n_s16(__a, __b) __arm_vcmpltq_n_s16(__a, __b)
@@ -686,7 +671,6 @@
 #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
 #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
-#define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
@@ -710,7 +694,6 @@
 #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
 #define vmaxaq_s32(__a, __b) __arm_vmaxaq_s32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
-#define vrshrq_n_u32(__a,  __imm) __arm_vrshrq_n_u32(__a,  __imm)
 #define vcmpneq_n_s32(__a, __b) __arm_vcmpneq_n_s32(__a, __b)
 #define vcmpltq_s32(__a, __b) __arm_vcmpltq_s32(__a, __b)
 #define vcmpltq_n_s32(__a, __b) __arm_vcmpltq_n_s32(__a, __b)
@@ -740,7 +723,6 @@
 #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
-#define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
 #define vqmovntq_u16(__a, __b) __arm_vqmovntq_u16(__a, __b)
 #define vqmovnbq_u16(__a, __b) __arm_vqmovnbq_u16(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
@@ -1410,18 +1392,6 @@
 #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
-#define vrshrq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
-#define vrshrq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
-#define vrshrq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
-#define vrshrq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vrshrq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vrshrq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s8(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s32(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_s16(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vshrq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vshrq_m_n_u16(__inactive, __a,  __imm, __p)
 #define vsliq_m_n_s8(__a, __b,  __imm, __p) __arm_vsliq_m_n_s8(__a, __b,  __imm, __p)
 #define vsliq_m_n_s32(__a, __b,  __imm, __p) __arm_vsliq_m_n_s32(__a, __b,  __imm, __p)
 #define vsliq_m_n_s16(__a, __b,  __imm, __p) __arm_vsliq_m_n_s16(__a, __b,  __imm, __p)
@@ -1914,18 +1884,6 @@
 #define vshlltq_x_n_s16(__a,  __imm, __p) __arm_vshlltq_x_n_s16(__a,  __imm, __p)
 #define vshlltq_x_n_u8(__a,  __imm, __p) __arm_vshlltq_x_n_u8(__a,  __imm, __p)
 #define vshlltq_x_n_u16(__a,  __imm, __p) __arm_vshlltq_x_n_u16(__a,  __imm, __p)
-#define vrshrq_x_n_s8(__a,  __imm, __p) __arm_vrshrq_x_n_s8(__a,  __imm, __p)
-#define vrshrq_x_n_s16(__a,  __imm, __p) __arm_vrshrq_x_n_s16(__a,  __imm, __p)
-#define vrshrq_x_n_s32(__a,  __imm, __p) __arm_vrshrq_x_n_s32(__a,  __imm, __p)
-#define vrshrq_x_n_u8(__a,  __imm, __p) __arm_vrshrq_x_n_u8(__a,  __imm, __p)
-#define vrshrq_x_n_u16(__a,  __imm, __p) __arm_vrshrq_x_n_u16(__a,  __imm, __p)
-#define vrshrq_x_n_u32(__a,  __imm, __p) __arm_vrshrq_x_n_u32(__a,  __imm, __p)
-#define vshrq_x_n_s8(__a,  __imm, __p) __arm_vshrq_x_n_s8(__a,  __imm, __p)
-#define vshrq_x_n_s16(__a,  __imm, __p) __arm_vshrq_x_n_s16(__a,  __imm, __p)
-#define vshrq_x_n_s32(__a,  __imm, __p) __arm_vshrq_x_n_s32(__a,  __imm, __p)
-#define vshrq_x_n_u8(__a,  __imm, __p) __arm_vshrq_x_n_u8(__a,  __imm, __p)
-#define vshrq_x_n_u16(__a,  __imm, __p) __arm_vshrq_x_n_u16(__a,  __imm, __p)
-#define vshrq_x_n_u32(__a,  __imm, __p) __arm_vshrq_x_n_u32(__a,  __imm, __p)
 #define vdupq_x_n_f16(__a, __p) __arm_vdupq_x_n_f16(__a, __p)
 #define vdupq_x_n_f32(__a, __p) __arm_vdupq_x_n_f32(__a, __p)
 #define vminnmq_x_f16(__a, __b, __p) __arm_vminnmq_x_f16(__a, __b, __p)
@@ -2659,47 +2617,6 @@ __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_s8 (int8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_sv16qi (__a, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_sv8hi (__a, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_sv4si (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_u8 (uint8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_uv16qi (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_u16 (uint16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_uv8hi (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_n_u32 (uint32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vshrq_n_uv4si (__a, __imm);
-}
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
@@ -2919,13 +2836,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_u8 (uint8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_uv16qi (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s8 (int8x16_t __a, int8_t __b)
@@ -3129,13 +3039,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
   return __builtin_mve_vaddvaq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_s8 (int8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_sv16qi (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -3299,13 +3202,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_u16 (uint16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_uv8hi (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s16 (int16x8_t __a, int16_t __b)
@@ -3509,13 +3405,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
   return __builtin_mve_vaddvaq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_sv8hi (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -3679,13 +3568,6 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_u32 (uint32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_uv4si (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq_n_s32 (int32x4_t __a, int32_t __b)
@@ -3889,13 +3771,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
   return __builtin_mve_vaddvaq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vrshrq_n_sv4si (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovntq_u16 (uint8x16_t __a, uint16x8_t __b)
@@ -7437,90 +7312,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve
   return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -10496,90 +10287,6 @@ __arm_vshlltq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (), __a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vrshrq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vshrq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vadciq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
@@ -14533,48 +14240,6 @@ __arm_vaddlvq (uint32x4_t __a)
  return __arm_vaddlvq_u32 (__a);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (int8x16_t __a, const int __imm)
-{
- return __arm_vshrq_n_s8 (__a, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (int16x8_t __a, const int __imm)
-{
- return __arm_vshrq_n_s16 (__a, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (int32x4_t __a, const int __imm)
-{
- return __arm_vshrq_n_s32 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (uint8x16_t __a, const int __imm)
-{
- return __arm_vshrq_n_u8 (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (uint16x8_t __a, const int __imm)
-{
- return __arm_vshrq_n_u16 (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq (uint32x4_t __a, const int __imm)
-{
- return __arm_vshrq_n_u32 (__a, __imm);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq_p (int32x4_t __a, mve_pred16_t __p)
@@ -14792,13 +14457,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
  return __arm_vbrsrq_n_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (uint8x16_t __a, const int __imm)
-{
- return __arm_vrshrq_n_u8 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int8x16_t __a, int8_t __b)
@@ -15002,13 +14660,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
  return __arm_vaddvaq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (int8x16_t __a, const int __imm)
-{
- return __arm_vrshrq_n_s8 (__a, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint16x8_t __a, uint16x8_t __b)
@@ -15170,13 +14821,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
  return __arm_vbrsrq_n_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (uint16x8_t __a, const int __imm)
-{
- return __arm_vrshrq_n_u16 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int16x8_t __a, int16_t __b)
@@ -15380,13 +15024,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
  return __arm_vaddvaq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (int16x8_t __a, const int __imm)
-{
- return __arm_vrshrq_n_s16 (__a, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint32x4_t __a, uint32x4_t __b)
@@ -15548,13 +15185,6 @@ __arm_vbrsrq (uint32x4_t __a, int32_t __b)
  return __arm_vbrsrq_n_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (uint32x4_t __a, const int __imm)
-{
- return __arm_vrshrq_n_u32 (__a, __imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmpneq (int32x4_t __a, int32_t __b)
@@ -15758,13 +15388,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
  return __arm_vaddvaq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq (int32x4_t __a, const int __imm)
-{
- return __arm_vrshrq_n_s32 (__a, __imm);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqmovntq (uint8x16_t __a, uint16x8_t __b)
@@ -19265,90 +18888,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pre
  return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_s8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_s32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_s16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_m (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_s8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (int32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_s32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (int16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_s16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (uint8x16_t __inactive, uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (uint32x4_t __inactive, uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_m (uint16x8_t __inactive, uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -21827,90 +21366,6 @@ __arm_vshlltq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
  return __arm_vshlltq_x_n_u16 (__a, __imm, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_s8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_s16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_s32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrshrq_x (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vrshrq_x_n_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_s8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_s16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_s32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (uint8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshrq_x (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshrq_x_n_u32 (__a, __imm, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vadciq (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
@@ -25121,15 +24576,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vcvtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_n_f16_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
@@ -25394,24 +24840,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshllbq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshllbq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1));})
 
-#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -26935,15 +26363,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vqnegq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vqnegq_s32 (__ARM_mve_coerce(__p0, int32x4_t)));})
 
-#define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -26966,15 +26385,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
 
-#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -28128,15 +27538,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vshllbq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshllbq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
@@ -28211,15 +27612,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n]: __arm_vddupq_x_n_u32 ((uint32_t) __p1, p2, p3), \
   int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_x_wb_u32 (__ARM_mve_coerce(__p1, uint32_t *), p2, p3));})
 
-#define __arm_vshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vhcaddq_rot270_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -28366,26 +27758,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
 
-#define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrshrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrshrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrshrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrshrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrshrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrshrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
-
-#define __arm_vshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vshrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vshrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vshrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vshrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vshrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vshrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
-
 #define __arm_vsliq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape
  2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
                   ` (21 preceding siblings ...)
  2023-05-05  8:39 ` [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq Christophe Lyon
@ 2023-05-05  9:55 ` Kyrylo Tkachov
  22 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05  9:55 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape
> 
> This patch adds the binary_round_lshift shape description.
> 

Ok.
I expect the series to be mostly okay given that it follows the schemes introduced in the previous series, but I'll review each patch individually anyway to make sure.
Thanks again for working on this.
Kyrill

> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_round_lshift): New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_round_lshift): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 61 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 62 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index 5e6681c784a..28a2d66ddd1 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -458,6 +458,67 @@ struct binary_orrq_def : public overloaded_base<0>
>  };
>  SHAPE (binary_orrq)
> 
> +/* <T0>_t vfoo[t0](<T0>_t, <T0>_t)
> +   <T0>_t vfoo[_n_t0](<T0>_t, int32_t)
> +
> +   Shape for rounding shift left operations.
> +
> +   Example: vrshlq.
> +   int8x16_t [__arm_]vrshlq[_n_s8](int8x16_t a, int32_t b)
> +   int8x16_t [__arm_]vrshlq_m_n[_s8](int8x16_t a, int32_t b, mve_pred16_t
> p)
> +   int8x16_t [__arm_]vrshlq[_s8](int8x16_t a, int8x16_t b)
> +   int8x16_t [__arm_]vrshlq_m[_s8](int8x16_t inactive, int8x16_t a, int8x16_t
> b, mve_pred16_t p)
> +   int8x16_t [__arm_]vrshlq_x[_s8](int8x16_t a, int8x16_t b, mve_pred16_t p)
> */
> +struct binary_round_lshift_def : public overloaded_base<0>
> +{
> +  bool
> +  explicit_mode_suffix_p (enum predication_index pred, enum
> mode_suffix_index mode) const override
> +  {
> +    return ((mode == MODE_n)
> +	    && (pred == PRED_m));
> +  }
> +
> +  bool
> +  skip_overload_p (enum predication_index pred, enum mode_suffix_index
> mode) const override
> +  {
> +    switch (mode)
> +      {
> +      case MODE_none:
> +	return false;
> +
> +	/* For MODE_n, share the overloaded instance with MODE_none,
> except for PRED_m.  */
> +      case MODE_n:
> +	return pred != PRED_m;
> +
> +      default:
> +	gcc_unreachable ();
> +      }
> +  }
> +
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_none,
> preserve_user_namespace);
> +    b.add_overloaded_functions (group, MODE_n,
> preserve_user_namespace);
> +    build_all (b, "v0,v0,vs0", group, MODE_none, preserve_user_namespace);
> +    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace,
> false, preds_m_or_none);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (2, i, nargs)
> +	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
> +      return error_mark_node;
> +
> +    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
> +  }
> +};
> +SHAPE (binary_round_lshift)
> +
>  /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
> 
>     where there are N arguments in total.
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index 3305d12877a..cef081aa8ec 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -37,6 +37,7 @@ namespace arm_mve
>      extern const function_shape *const binary;
>      extern const function_shape *const binary_opt_n;
>      extern const function_shape *const binary_orrq;
> +    extern const function_shape *const binary_round_lshift;
>      extern const function_shape *const create;
>      extern const function_shape *const inherent;
>      extern const function_shape *const unary_convert;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq
  2023-05-05  8:39 ` [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq Christophe Lyon
@ 2023-05-05  9:58   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05  9:58 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq
> 
> Factorize vqrshlq, vrshlq so that they use the same pattern.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_RSHIFT_M_N, MVE_RSHIFT_N): New.
> 	(mve_insn): Add vqrshl, vrshl.
> 	* config/arm/mve.md (mve_vqrshlq_n_<supf><mode>)
> 	(mve_vrshlq_n_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
> 	(mve_vqrshlq_m_n_<supf><mode>,
> mve_vrshlq_m_n_<supf><mode>): Merge
> 	into ...
> 	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
> ---
>  gcc/config/arm/iterators.md | 14 +++++++++++
>  gcc/config/arm/mve.md       | 49 ++++++++-----------------------------
>  2 files changed, 24 insertions(+), 39 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 593be83e0be..e7622fe752a 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -435,6 +435,16 @@ (define_int_iterator MVE_INT_N_BINARY_LOGIC   [
>  		     VORRQ_N_S VORRQ_N_U
>  		     ])
> 
> +(define_int_iterator MVE_RSHIFT_M_N   [
> +		     VQRSHLQ_M_N_S VQRSHLQ_M_N_U
> +		     VRSHLQ_M_N_S VRSHLQ_M_N_U
> +		     ])
> +
> +(define_int_iterator MVE_RSHIFT_N   [
> +		     VQRSHLQ_N_S VQRSHLQ_N_U
> +		     VRSHLQ_N_S VRSHLQ_N_U
> +		     ])
> +
>  (define_int_iterator MVE_FP_M_BINARY   [
>  		     VADDQ_M_F
>  		     VMULQ_M_F
> @@ -526,7 +536,9 @@ (define_int_attr mve_insn [
>  		 (VQRDMULHQ_M_S "vqrdmulh")
>  		 (VQRDMULHQ_N_S "vqrdmulh")
>  		 (VQRDMULHQ_S "vqrdmulh")
> +		 (VQRSHLQ_M_N_S "vqrshl") (VQRSHLQ_M_N_U "vqrshl")
>  		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
> +		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
>  		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
>  		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
>  		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
> @@ -538,7 +550,9 @@ (define_int_attr mve_insn [
>  		 (VRHADDQ_S "vrhadd") (VRHADDQ_U "vrhadd")
>  		 (VRMULHQ_M_S "vrmulh") (VRMULHQ_M_U "vrmulh")
>  		 (VRMULHQ_S "vrmulh") (VRMULHQ_U "vrmulh")
> +		 (VRSHLQ_M_N_S "vrshl") (VRSHLQ_M_N_U "vrshl")
>  		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
> +		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
>  		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
>  		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
>  		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub")
> (VSUBQ_M_N_F "vsub")
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 6b88fdb8a7a..0d3343b6e29 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1373,17 +1373,18 @@ (define_expand "mve_vorrq_u<mode>"
>  )
> 
>  ;;
> -;; [vqrshlq_n_s, vqrshlq_n_u])
> +;; [vqrshlq_n_s, vqrshlq_n_u]
> +;; [vrshlq_n_u, vrshlq_n_s]
>  ;;
> -(define_insn "mve_vqrshlq_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:SI 2 "s_register_operand" "r")]
> -	 VQRSHLQ_N))
> +	 MVE_RSHIFT_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vqrshl.<supf>%#<V_sz_elem>\t%q0, %2"
> +  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -1432,21 +1433,6 @@ (define_insn "mve_vqshluq_n_s<mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vrshlq_n_u, vrshlq_n_s])
> -;;
> -(define_insn "mve_vrshlq_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:SI 2 "s_register_operand" "r")]
> -	 VRSHLQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vrshl.<supf>%#<V_sz_elem>\t%q0, %2"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vrshrq_n_s, vrshrq_n_u])
>  ;;
> @@ -3098,18 +3084,19 @@ (define_insn "mve_vqrdmlsdhxq_s<mode>"
>  ])
> 
>  ;;
> -;; [vqrshlq_m_n_s, vqrshlq_m_n_u])
> +;; [vqrshlq_m_n_s, vqrshlq_m_n_u]
> +;; [vrshlq_m_n_s, vrshlq_m_n_u]
>  ;;
> -(define_insn "mve_vqrshlq_m_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:SI 2 "s_register_operand" "r")
>  		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VQRSHLQ_M_N))
> +	 MVE_RSHIFT_M_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vqrshlt.<supf>%#<V_sz_elem>	%q0, %2"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -3145,22 +3132,6 @@ (define_insn "mve_vrev64q_m_<supf><mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vrshlq_m_n_s, vrshlq_m_n_u])
> -;;
> -(define_insn "mve_vrshlq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:SI 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VRSHLQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vrshlt.<supf>%#<V_sz_elem>\t%q0, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vshlq_m_r_u, vshlq_m_r_s])
>  ;;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq
  2023-05-05  8:39 ` [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq Christophe Lyon
@ 2023-05-05  9:59   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05  9:59 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq
> 
> Implement vrshlq, vqrshlq using the new MVE builtins framework.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (vqrshlq, vrshlq): New.
> 	* config/arm/arm-mve-builtins-base.def (vqrshlq, vrshlq): New.
> 	* config/arm/arm-mve-builtins-base.h (vqrshlq, vrshlq): New.
> 	* config/arm/arm-mve-builtins.cc (has_inactive_argument): Handle
> 	vqrshlq, vrshlq.
> 	* config/arm/arm_mve.h (vrshlq): Remove.
> 	(vrshlq_m_n): Remove.
> 	(vrshlq_m): Remove.
> 	(vrshlq_x): Remove.
> 	(vrshlq_u8): Remove.
> 	(vrshlq_n_u8): Remove.
> 	(vrshlq_s8): Remove.
> 	(vrshlq_n_s8): Remove.
> 	(vrshlq_u16): Remove.
> 	(vrshlq_n_u16): Remove.
> 	(vrshlq_s16): Remove.
> 	(vrshlq_n_s16): Remove.
> 	(vrshlq_u32): Remove.
> 	(vrshlq_n_u32): Remove.
> 	(vrshlq_s32): Remove.
> 	(vrshlq_n_s32): Remove.
> 	(vrshlq_m_n_u8): Remove.
> 	(vrshlq_m_n_s8): Remove.
> 	(vrshlq_m_n_u16): Remove.
> 	(vrshlq_m_n_s16): Remove.
> 	(vrshlq_m_n_u32): Remove.
> 	(vrshlq_m_n_s32): Remove.
> 	(vrshlq_m_s8): Remove.
> 	(vrshlq_m_s32): Remove.
> 	(vrshlq_m_s16): Remove.
> 	(vrshlq_m_u8): Remove.
> 	(vrshlq_m_u32): Remove.
> 	(vrshlq_m_u16): Remove.
> 	(vrshlq_x_s8): Remove.
> 	(vrshlq_x_s16): Remove.
> 	(vrshlq_x_s32): Remove.
> 	(vrshlq_x_u8): Remove.
> 	(vrshlq_x_u16): Remove.
> 	(vrshlq_x_u32): Remove.
> 	(__arm_vrshlq_u8): Remove.
> 	(__arm_vrshlq_n_u8): Remove.
> 	(__arm_vrshlq_s8): Remove.
> 	(__arm_vrshlq_n_s8): Remove.
> 	(__arm_vrshlq_u16): Remove.
> 	(__arm_vrshlq_n_u16): Remove.
> 	(__arm_vrshlq_s16): Remove.
> 	(__arm_vrshlq_n_s16): Remove.
> 	(__arm_vrshlq_u32): Remove.
> 	(__arm_vrshlq_n_u32): Remove.
> 	(__arm_vrshlq_s32): Remove.
> 	(__arm_vrshlq_n_s32): Remove.
> 	(__arm_vrshlq_m_n_u8): Remove.
> 	(__arm_vrshlq_m_n_s8): Remove.
> 	(__arm_vrshlq_m_n_u16): Remove.
> 	(__arm_vrshlq_m_n_s16): Remove.
> 	(__arm_vrshlq_m_n_u32): Remove.
> 	(__arm_vrshlq_m_n_s32): Remove.
> 	(__arm_vrshlq_m_s8): Remove.
> 	(__arm_vrshlq_m_s32): Remove.
> 	(__arm_vrshlq_m_s16): Remove.
> 	(__arm_vrshlq_m_u8): Remove.
> 	(__arm_vrshlq_m_u32): Remove.
> 	(__arm_vrshlq_m_u16): Remove.
> 	(__arm_vrshlq_x_s8): Remove.
> 	(__arm_vrshlq_x_s16): Remove.
> 	(__arm_vrshlq_x_s32): Remove.
> 	(__arm_vrshlq_x_u8): Remove.
> 	(__arm_vrshlq_x_u16): Remove.
> 	(__arm_vrshlq_x_u32): Remove.
> 	(__arm_vrshlq): Remove.
> 	(__arm_vrshlq_m_n): Remove.
> 	(__arm_vrshlq_m): Remove.
> 	(__arm_vrshlq_x): Remove.
> 	(vqrshlq): Remove.
> 	(vqrshlq_m_n): Remove.
> 	(vqrshlq_m): Remove.
> 	(vqrshlq_u8): Remove.
> 	(vqrshlq_n_u8): Remove.
> 	(vqrshlq_s8): Remove.
> 	(vqrshlq_n_s8): Remove.
> 	(vqrshlq_u16): Remove.
> 	(vqrshlq_n_u16): Remove.
> 	(vqrshlq_s16): Remove.
> 	(vqrshlq_n_s16): Remove.
> 	(vqrshlq_u32): Remove.
> 	(vqrshlq_n_u32): Remove.
> 	(vqrshlq_s32): Remove.
> 	(vqrshlq_n_s32): Remove.
> 	(vqrshlq_m_n_u8): Remove.
> 	(vqrshlq_m_n_s8): Remove.
> 	(vqrshlq_m_n_u16): Remove.
> 	(vqrshlq_m_n_s16): Remove.
> 	(vqrshlq_m_n_u32): Remove.
> 	(vqrshlq_m_n_s32): Remove.
> 	(vqrshlq_m_s8): Remove.
> 	(vqrshlq_m_s32): Remove.
> 	(vqrshlq_m_s16): Remove.
> 	(vqrshlq_m_u8): Remove.
> 	(vqrshlq_m_u32): Remove.
> 	(vqrshlq_m_u16): Remove.
> 	(__arm_vqrshlq_u8): Remove.
> 	(__arm_vqrshlq_n_u8): Remove.
> 	(__arm_vqrshlq_s8): Remove.
> 	(__arm_vqrshlq_n_s8): Remove.
> 	(__arm_vqrshlq_u16): Remove.
> 	(__arm_vqrshlq_n_u16): Remove.
> 	(__arm_vqrshlq_s16): Remove.
> 	(__arm_vqrshlq_n_s16): Remove.
> 	(__arm_vqrshlq_u32): Remove.
> 	(__arm_vqrshlq_n_u32): Remove.
> 	(__arm_vqrshlq_s32): Remove.
> 	(__arm_vqrshlq_n_s32): Remove.
> 	(__arm_vqrshlq_m_n_u8): Remove.
> 	(__arm_vqrshlq_m_n_s8): Remove.
> 	(__arm_vqrshlq_m_n_u16): Remove.
> 	(__arm_vqrshlq_m_n_s16): Remove.
> 	(__arm_vqrshlq_m_n_u32): Remove.
> 	(__arm_vqrshlq_m_n_s32): Remove.
> 	(__arm_vqrshlq_m_s8): Remove.
> 	(__arm_vqrshlq_m_s32): Remove.
> 	(__arm_vqrshlq_m_s16): Remove.
> 	(__arm_vqrshlq_m_u8): Remove.
> 	(__arm_vqrshlq_m_u32): Remove.
> 	(__arm_vqrshlq_m_u16): Remove.
> 	(__arm_vqrshlq): Remove.
> 	(__arm_vqrshlq_m_n): Remove.
> 	(__arm_vqrshlq_m): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
>  gcc/config/arm/arm-mve-builtins-base.def |   2 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   2 +
>  gcc/config/arm/arm-mve-builtins.cc       |   4 +-
>  gcc/config/arm/arm_mve.h                 | 969 +----------------------
>  5 files changed, 18 insertions(+), 961 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index de0cdb4229b..f5e48519b19 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -157,10 +157,12 @@ FUNCTION_WITH_RTX_M_N (vmulq, MULT,
> VMULQ)
>  FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
>  FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
>  FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
> +FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
>  FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
>  FUNCTION (vreinterpretq, vreinterpretq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
>  FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
> +FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
>  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
>  FUNCTION (vuninitializedq, vuninitializedq_impl,)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index d256f3ebb2d..e6dc2b00aaa 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -29,10 +29,12 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n,
> all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
> +DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer,
> none)
>  DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index d64cb5e1dec..31ba3fece82 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -34,10 +34,12 @@ extern const function_base *const vmulq;
>  extern const function_base *const vorrq;
>  extern const function_base *const vqaddq;
>  extern const function_base *const vqdmulhq;
> +extern const function_base *const vqrshlq;
>  extern const function_base *const vqsubq;
>  extern const function_base *const vreinterpretq;
>  extern const function_base *const vrhaddq;
>  extern const function_base *const vrmulhq;
> +extern const function_base *const vrshlq;
>  extern const function_base *const vsubq;
>  extern const function_base *const vuninitializedq;
> 
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index 0708d4fa94a..91b3ae71f94 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -669,7 +669,9 @@ function_instance::has_inactive_argument () const
>    if (pred != PRED_m)
>      return false;
> 
> -  if (base == functions::vorrq && mode_suffix_id == MODE_n)
> +  if ((base == functions::vorrq && mode_suffix_id == MODE_n)
> +      || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
> +      || (base == functions::vrshlq && mode_suffix_id == MODE_n))
>      return false;
> 
>    return true;
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 9c5d14794a1..636945d6ef0 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -79,10 +79,8 @@
>  #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
>  #define vabdq(__a, __b) __arm_vabdq(__a, __b)
>  #define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
> -#define vrshlq(__a, __b) __arm_vrshlq(__a, __b)
>  #define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
>  #define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
> -#define vqrshlq(__a, __b) __arm_vqrshlq(__a, __b)
>  #define vminavq(__a, __b) __arm_vminavq(__a, __b)
>  #define vminaq(__a, __b) __arm_vminaq(__a, __b)
>  #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
> @@ -153,9 +151,7 @@
>  #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
>  #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
>  #define vshlq_m_r(__a, __b, __p) __arm_vshlq_m_r(__a, __b, __p)
> -#define vrshlq_m_n(__a, __b, __p) __arm_vrshlq_m_n(__a, __b, __p)
>  #define vqshlq_m_r(__a, __b, __p) __arm_vqshlq_m_r(__a, __b, __p)
> -#define vqrshlq_m_n(__a, __b, __p) __arm_vqrshlq_m_n(__a, __b, __p)
>  #define vminavq_p(__a, __b, __p) __arm_vminavq_p(__a, __b, __p)
>  #define vminaq_m(__a, __b, __p) __arm_vminaq_m(__a, __b, __p)
>  #define vmaxavq_p(__a, __b, __p) __arm_vmaxavq_p(__a, __b, __p)
> @@ -254,10 +250,8 @@
>  #define vqrdmlsdhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
>  #define vqrdmulhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m(__inactive, __a, __b, __p)
> -#define vqrshlq_m(__inactive, __a, __b, __p) __arm_vqrshlq_m(__inactive,
> __a, __b, __p)
>  #define vqshlq_m_n(__inactive, __a, __imm, __p)
> __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
>  #define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive,
> __a, __b, __p)
> -#define vrshlq_m(__inactive, __a, __b, __p) __arm_vrshlq_m(__inactive,
> __a, __b, __p)
>  #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive,
> __a, __imm, __p)
>  #define vshlq_m_n(__inactive, __a, __imm, __p)
> __arm_vshlq_m_n(__inactive, __a, __imm, __p)
>  #define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive,
> __a, __imm, __p)
> @@ -385,7 +379,6 @@
>  #define vrev16q_x(__a, __p) __arm_vrev16q_x(__a, __p)
>  #define vrev32q_x(__a, __p) __arm_vrev32q_x(__a, __p)
>  #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
> -#define vrshlq_x(__a, __b, __p) __arm_vrshlq_x(__a, __b, __p)
>  #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
>  #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
>  #define vshlq_x(__a, __b, __p) __arm_vshlq_x(__a, __b, __p)
> @@ -663,12 +656,8 @@
>  #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
>  #define vabdq_u8(__a, __b) __arm_vabdq_u8(__a, __b)
>  #define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
> -#define vrshlq_u8(__a, __b) __arm_vrshlq_u8(__a, __b)
> -#define vrshlq_n_u8(__a, __b) __arm_vrshlq_n_u8(__a, __b)
>  #define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
>  #define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
> -#define vqrshlq_u8(__a, __b) __arm_vqrshlq_u8(__a, __b)
> -#define vqrshlq_n_u8(__a, __b) __arm_vqrshlq_n_u8(__a, __b)
>  #define vminavq_s8(__a, __b) __arm_vminavq_s8(__a, __b)
>  #define vminaq_s8(__a, __b) __arm_vminaq_s8(__a, __b)
>  #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
> @@ -691,12 +680,8 @@
>  #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
>  #define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
>  #define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
> -#define vrshlq_s8(__a, __b) __arm_vrshlq_s8(__a, __b)
> -#define vrshlq_n_s8(__a, __b) __arm_vrshlq_n_s8(__a, __b)
>  #define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
>  #define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
> -#define vqrshlq_s8(__a, __b) __arm_vqrshlq_s8(__a, __b)
> -#define vqrshlq_n_s8(__a, __b) __arm_vqrshlq_n_s8(__a, __b)
>  #define vqrdmulhq_s8(__a, __b) __arm_vqrdmulhq_s8(__a, __b)
>  #define vqrdmulhq_n_s8(__a, __b) __arm_vqrdmulhq_n_s8(__a, __b)
>  #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
> @@ -743,12 +728,8 @@
>  #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
>  #define vabdq_u16(__a, __b) __arm_vabdq_u16(__a, __b)
>  #define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
> -#define vrshlq_u16(__a, __b) __arm_vrshlq_u16(__a, __b)
> -#define vrshlq_n_u16(__a, __b) __arm_vrshlq_n_u16(__a, __b)
>  #define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
>  #define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
> -#define vqrshlq_u16(__a, __b) __arm_vqrshlq_u16(__a, __b)
> -#define vqrshlq_n_u16(__a, __b) __arm_vqrshlq_n_u16(__a, __b)
>  #define vminavq_s16(__a, __b) __arm_vminavq_s16(__a, __b)
>  #define vminaq_s16(__a, __b) __arm_vminaq_s16(__a, __b)
>  #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
> @@ -771,12 +752,8 @@
>  #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
>  #define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
>  #define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
> -#define vrshlq_s16(__a, __b) __arm_vrshlq_s16(__a, __b)
> -#define vrshlq_n_s16(__a, __b) __arm_vrshlq_n_s16(__a, __b)
>  #define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
>  #define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
> -#define vqrshlq_s16(__a, __b) __arm_vqrshlq_s16(__a, __b)
> -#define vqrshlq_n_s16(__a, __b) __arm_vqrshlq_n_s16(__a, __b)
>  #define vqrdmulhq_s16(__a, __b) __arm_vqrdmulhq_s16(__a, __b)
>  #define vqrdmulhq_n_s16(__a, __b) __arm_vqrdmulhq_n_s16(__a, __b)
>  #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
> @@ -823,12 +800,8 @@
>  #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
>  #define vabdq_u32(__a, __b) __arm_vabdq_u32(__a, __b)
>  #define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
> -#define vrshlq_u32(__a, __b) __arm_vrshlq_u32(__a, __b)
> -#define vrshlq_n_u32(__a, __b) __arm_vrshlq_n_u32(__a, __b)
>  #define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
>  #define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
> -#define vqrshlq_u32(__a, __b) __arm_vqrshlq_u32(__a, __b)
> -#define vqrshlq_n_u32(__a, __b) __arm_vqrshlq_n_u32(__a, __b)
>  #define vminavq_s32(__a, __b) __arm_vminavq_s32(__a, __b)
>  #define vminaq_s32(__a, __b) __arm_vminaq_s32(__a, __b)
>  #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
> @@ -851,12 +824,8 @@
>  #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
>  #define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
>  #define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
> -#define vrshlq_s32(__a, __b) __arm_vrshlq_s32(__a, __b)
> -#define vrshlq_n_s32(__a, __b) __arm_vrshlq_n_s32(__a, __b)
>  #define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
>  #define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
> -#define vqrshlq_s32(__a, __b) __arm_vqrshlq_s32(__a, __b)
> -#define vqrshlq_n_s32(__a, __b) __arm_vqrshlq_n_s32(__a, __b)
>  #define vqrdmulhq_s32(__a, __b) __arm_vqrdmulhq_s32(__a, __b)
>  #define vqrdmulhq_n_s32(__a, __b) __arm_vqrdmulhq_n_s32(__a, __b)
>  #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
> @@ -1064,9 +1033,7 @@
>  #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
>  #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
>  #define vshlq_m_r_u8(__a, __b, __p) __arm_vshlq_m_r_u8(__a, __b, __p)
> -#define vrshlq_m_n_u8(__a, __b, __p) __arm_vrshlq_m_n_u8(__a, __b,
> __p)
>  #define vqshlq_m_r_u8(__a, __b, __p) __arm_vqshlq_m_r_u8(__a, __b,
> __p)
> -#define vqrshlq_m_n_u8(__a, __b, __p) __arm_vqrshlq_m_n_u8(__a, __b,
> __p)
>  #define vminavq_p_s8(__a, __b, __p) __arm_vminavq_p_s8(__a, __b, __p)
>  #define vminaq_m_s8(__a, __b, __p) __arm_vminaq_m_s8(__a, __b, __p)
>  #define vmaxavq_p_s8(__a, __b, __p) __arm_vmaxavq_p_s8(__a, __b, __p)
> @@ -1084,10 +1051,8 @@
>  #define vcmpeqq_m_s8(__a, __b, __p) __arm_vcmpeqq_m_s8(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s8(__a, __b, __p) __arm_vcmpeqq_m_n_s8(__a,
> __b, __p)
>  #define vshlq_m_r_s8(__a, __b, __p) __arm_vshlq_m_r_s8(__a, __b, __p)
> -#define vrshlq_m_n_s8(__a, __b, __p) __arm_vrshlq_m_n_s8(__a, __b, __p)
>  #define vrev64q_m_s8(__inactive, __a, __p)
> __arm_vrev64q_m_s8(__inactive, __a, __p)
>  #define vqshlq_m_r_s8(__a, __b, __p) __arm_vqshlq_m_r_s8(__a, __b, __p)
> -#define vqrshlq_m_n_s8(__a, __b, __p) __arm_vqrshlq_m_n_s8(__a, __b,
> __p)
>  #define vqnegq_m_s8(__inactive, __a, __p) __arm_vqnegq_m_s8(__inactive,
> __a, __p)
>  #define vqabsq_m_s8(__inactive, __a, __p) __arm_vqabsq_m_s8(__inactive,
> __a, __p)
>  #define vnegq_m_s8(__inactive, __a, __p) __arm_vnegq_m_s8(__inactive,
> __a, __p)
> @@ -1147,9 +1112,7 @@
>  #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,
> __imm)
>  #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,
> __imm)
>  #define vshlq_m_r_u16(__a, __b, __p) __arm_vshlq_m_r_u16(__a, __b,
> __p)
> -#define vrshlq_m_n_u16(__a, __b, __p) __arm_vrshlq_m_n_u16(__a, __b,
> __p)
>  #define vqshlq_m_r_u16(__a, __b, __p) __arm_vqshlq_m_r_u16(__a, __b,
> __p)
> -#define vqrshlq_m_n_u16(__a, __b, __p) __arm_vqrshlq_m_n_u16(__a,
> __b, __p)
>  #define vminavq_p_s16(__a, __b, __p) __arm_vminavq_p_s16(__a, __b,
> __p)
>  #define vminaq_m_s16(__a, __b, __p) __arm_vminaq_m_s16(__a, __b, __p)
>  #define vmaxavq_p_s16(__a, __b, __p) __arm_vmaxavq_p_s16(__a, __b,
> __p)
> @@ -1167,10 +1130,8 @@
>  #define vcmpeqq_m_s16(__a, __b, __p) __arm_vcmpeqq_m_s16(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s16(__a, __b, __p) __arm_vcmpeqq_m_n_s16(__a,
> __b, __p)
>  #define vshlq_m_r_s16(__a, __b, __p) __arm_vshlq_m_r_s16(__a, __b, __p)
> -#define vrshlq_m_n_s16(__a, __b, __p) __arm_vrshlq_m_n_s16(__a, __b,
> __p)
>  #define vrev64q_m_s16(__inactive, __a, __p)
> __arm_vrev64q_m_s16(__inactive, __a, __p)
>  #define vqshlq_m_r_s16(__a, __b, __p) __arm_vqshlq_m_r_s16(__a, __b,
> __p)
> -#define vqrshlq_m_n_s16(__a, __b, __p) __arm_vqrshlq_m_n_s16(__a, __b,
> __p)
>  #define vqnegq_m_s16(__inactive, __a, __p)
> __arm_vqnegq_m_s16(__inactive, __a, __p)
>  #define vqabsq_m_s16(__inactive, __a, __p)
> __arm_vqabsq_m_s16(__inactive, __a, __p)
>  #define vnegq_m_s16(__inactive, __a, __p) __arm_vnegq_m_s16(__inactive,
> __a, __p)
> @@ -1230,9 +1191,7 @@
>  #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,
> __imm)
>  #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,
> __imm)
>  #define vshlq_m_r_u32(__a, __b, __p) __arm_vshlq_m_r_u32(__a, __b,
> __p)
> -#define vrshlq_m_n_u32(__a, __b, __p) __arm_vrshlq_m_n_u32(__a, __b,
> __p)
>  #define vqshlq_m_r_u32(__a, __b, __p) __arm_vqshlq_m_r_u32(__a, __b,
> __p)
> -#define vqrshlq_m_n_u32(__a, __b, __p) __arm_vqrshlq_m_n_u32(__a,
> __b, __p)
>  #define vminavq_p_s32(__a, __b, __p) __arm_vminavq_p_s32(__a, __b,
> __p)
>  #define vminaq_m_s32(__a, __b, __p) __arm_vminaq_m_s32(__a, __b, __p)
>  #define vmaxavq_p_s32(__a, __b, __p) __arm_vmaxavq_p_s32(__a, __b,
> __p)
> @@ -1250,10 +1209,8 @@
>  #define vcmpeqq_m_s32(__a, __b, __p) __arm_vcmpeqq_m_s32(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s32(__a, __b, __p) __arm_vcmpeqq_m_n_s32(__a,
> __b, __p)
>  #define vshlq_m_r_s32(__a, __b, __p) __arm_vshlq_m_r_s32(__a, __b, __p)
> -#define vrshlq_m_n_s32(__a, __b, __p) __arm_vrshlq_m_n_s32(__a, __b,
> __p)
>  #define vrev64q_m_s32(__inactive, __a, __p)
> __arm_vrev64q_m_s32(__inactive, __a, __p)
>  #define vqshlq_m_r_s32(__a, __b, __p) __arm_vqshlq_m_r_s32(__a, __b,
> __p)
> -#define vqrshlq_m_n_s32(__a, __b, __p) __arm_vqrshlq_m_n_s32(__a, __b,
> __p)
>  #define vqnegq_m_s32(__inactive, __a, __p)
> __arm_vqnegq_m_s32(__inactive, __a, __p)
>  #define vqabsq_m_s32(__inactive, __a, __p)
> __arm_vqabsq_m_s32(__inactive, __a, __p)
>  #define vnegq_m_s32(__inactive, __a, __p) __arm_vnegq_m_s32(__inactive,
> __a, __p)
> @@ -1646,12 +1603,6 @@
>  #define vqrdmulhq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s8(__inactive, __a, __b, __p)
>  #define vqrdmulhq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s32(__inactive, __a, __b, __p)
>  #define vqrdmulhq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s16(__inactive, __a, __b, __p)
> -#define vqrshlq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_s8(__inactive, __a, __b, __p)
> -#define vqrshlq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_s32(__inactive, __a, __b, __p)
> -#define vqrshlq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_s16(__inactive, __a, __b, __p)
> -#define vqrshlq_m_u8(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_u8(__inactive, __a, __b, __p)
> -#define vqrshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_u32(__inactive, __a, __b, __p)
> -#define vqrshlq_m_u16(__inactive, __a, __b, __p)
> __arm_vqrshlq_m_u16(__inactive, __a, __b, __p)
>  #define vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> @@ -1664,12 +1615,6 @@
>  #define vqshlq_m_u8(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u8(__inactive, __a, __b, __p)
>  #define vqshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u32(__inactive, __a, __b, __p)
>  #define vqshlq_m_u16(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u16(__inactive, __a, __b, __p)
> -#define vrshlq_m_s8(__inactive, __a, __b, __p)
> __arm_vrshlq_m_s8(__inactive, __a, __b, __p)
> -#define vrshlq_m_s32(__inactive, __a, __b, __p)
> __arm_vrshlq_m_s32(__inactive, __a, __b, __p)
> -#define vrshlq_m_s16(__inactive, __a, __b, __p)
> __arm_vrshlq_m_s16(__inactive, __a, __b, __p)
> -#define vrshlq_m_u8(__inactive, __a, __b, __p)
> __arm_vrshlq_m_u8(__inactive, __a, __b, __p)
> -#define vrshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vrshlq_m_u32(__inactive, __a, __b, __p)
> -#define vrshlq_m_u16(__inactive, __a, __b, __p)
> __arm_vrshlq_m_u16(__inactive, __a, __b, __p)
>  #define vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
> @@ -2232,12 +2177,6 @@
>  #define vrev64q_x_u8(__a, __p) __arm_vrev64q_x_u8(__a, __p)
>  #define vrev64q_x_u16(__a, __p) __arm_vrev64q_x_u16(__a, __p)
>  #define vrev64q_x_u32(__a, __p) __arm_vrev64q_x_u32(__a, __p)
> -#define vrshlq_x_s8(__a, __b, __p) __arm_vrshlq_x_s8(__a, __b, __p)
> -#define vrshlq_x_s16(__a, __b, __p) __arm_vrshlq_x_s16(__a, __b, __p)
> -#define vrshlq_x_s32(__a, __b, __p) __arm_vrshlq_x_s32(__a, __b, __p)
> -#define vrshlq_x_u8(__a, __b, __p) __arm_vrshlq_x_u8(__a, __b, __p)
> -#define vrshlq_x_u16(__a, __b, __p) __arm_vrshlq_x_u16(__a, __b, __p)
> -#define vrshlq_x_u32(__a, __b, __p) __arm_vrshlq_x_u32(__a, __b, __p)
>  #define vshllbq_x_n_s8(__a,  __imm, __p) __arm_vshllbq_x_n_s8(__a,
> __imm, __p)
>  #define vshllbq_x_n_s16(__a,  __imm, __p) __arm_vshllbq_x_n_s16(__a,
> __imm, __p)
>  #define vshllbq_x_n_u8(__a,  __imm, __p) __arm_vshllbq_x_n_u8(__a,
> __imm, __p)
> @@ -3300,20 +3239,6 @@ __arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vrshlq_uv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_u8 (uint8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
> @@ -3328,20 +3253,6 @@ __arm_vqshlq_r_u8 (uint8x16_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vqrshlq_uv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_u8 (uint8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s8 (uint8_t __a, int8x16_t __b)
> @@ -3496,20 +3407,6 @@ __arm_vshlq_r_s8 (int8x16_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vrshlq_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_s8 (int8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_s8 (int8x16_t __a, int8x16_t __b)
> @@ -3524,20 +3421,6 @@ __arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vqrshlq_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_s8 (int8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq_s8 (int8x16_t __a, int8x16_t __b)
> @@ -3862,20 +3745,6 @@ __arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vrshlq_uv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_u16 (uint16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
> @@ -3890,20 +3759,6 @@ __arm_vqshlq_r_u16 (uint16x8_t __a, int32_t
> __b)
>    return __builtin_mve_vqshlq_r_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vqrshlq_uv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_u16 (uint16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s16 (uint16_t __a, int16x8_t __b)
> @@ -4058,20 +3913,6 @@ __arm_vshlq_r_s16 (int16x8_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vrshlq_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_s16 (int16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_s16 (int16x8_t __a, int16x8_t __b)
> @@ -4086,20 +3927,6 @@ __arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vqrshlq_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_s16 (int16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
> @@ -4424,20 +4251,6 @@ __arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vrshlq_uv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_u32 (uint32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
> @@ -4452,20 +4265,6 @@ __arm_vqshlq_r_u32 (uint32x4_t __a, int32_t
> __b)
>    return __builtin_mve_vqshlq_r_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vqrshlq_uv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_u32 (uint32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s32 (uint32_t __a, int32x4_t __b)
> @@ -4620,20 +4419,6 @@ __arm_vshlq_r_s32 (int32x4_t __a, int32_t __b)
>    return __builtin_mve_vshlq_r_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vrshlq_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_n_s32 (int32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vrshlq_n_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_s32 (int32x4_t __a, int32x4_t __b)
> @@ -4648,20 +4433,6 @@ __arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vqrshlq_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_n_s32 (int32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrshlq_n_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
> @@ -5633,13 +5404,6 @@ __arm_vshlq_m_r_u8 (uint8x16_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_uv16qi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_uv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> @@ -5647,13 +5411,6 @@ __arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_uv16qi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_uv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s8 (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
> @@ -5773,13 +5530,6 @@ __arm_vshlq_m_r_s8 (int8x16_t __a, int32_t __b,
> mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_sv16qi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_sv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t
> __p)
> @@ -5794,13 +5544,6 @@ __arm_vqshlq_m_r_s8 (int8x16_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_sv16qi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_sv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t
> __p)
> @@ -6215,13 +5958,6 @@ __arm_vshlq_m_r_u16 (uint16x8_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_uv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_uv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> @@ -6229,13 +5965,6 @@ __arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_uv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_uv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s16 (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
> @@ -6355,13 +6084,6 @@ __arm_vshlq_m_r_s16 (int16x8_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_sv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_sv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t
> __p)
> @@ -6376,13 +6098,6 @@ __arm_vqshlq_m_r_s16 (int16x8_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_sv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_sv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t
> __p)
> @@ -6796,13 +6511,6 @@ __arm_vshlq_m_r_u32 (uint32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_uv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_uv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> @@ -6810,13 +6518,6 @@ __arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_uv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_uv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s32 (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
> @@ -6936,13 +6637,6 @@ __arm_vshlq_m_r_s32 (int32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vshlq_m_r_sv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_n_sv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t
> __p)
> @@ -6957,13 +6651,6 @@ __arm_vqshlq_m_r_s32 (int32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vqshlq_m_r_sv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_n_sv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t
> __p)
> @@ -9029,48 +8716,6 @@ __arm_vqrdmulhq_m_s16 (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_p
>    return __builtin_mve_vqrdmulhq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshlq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -9155,48 +8800,6 @@ __arm_vqshlq_m_u16 (uint16x8_t __inactive,
> uint16x8_t __a, int16x8_t __b, mve_pr
>    return __builtin_mve_vqshlq_m_uv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -12648,48 +12251,6 @@ __arm_vrev64q_x_u32 (uint32x4_t __a,
> mve_pred16_t __p)
>    return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_sv4si (__arm_vuninitializedq_s32 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_u8 (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv16qi (__arm_vuninitializedq_u8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_u16 (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv8hi (__arm_vuninitializedq_u16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x_u32 (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshlq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> @@ -17203,20 +16764,6 @@ __arm_vshlq_r (uint8x16_t __a, int32_t __b)
>   return __arm_vshlq_r_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vrshlq_u8 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint8x16_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq (uint8x16_t __a, int8x16_t __b)
> @@ -17231,20 +16778,6 @@ __arm_vqshlq_r (uint8x16_t __a, int32_t __b)
>   return __arm_vqshlq_r_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vqrshlq_u8 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint8x16_t __a, int32_t __b)
> -{
> - return __arm_vqrshlq_n_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq (uint8_t __a, int8x16_t __b)
> @@ -17399,20 +16932,6 @@ __arm_vshlq_r (int8x16_t __a, int32_t __b)
>   return __arm_vshlq_r_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vrshlq_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int8x16_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq (int8x16_t __a, int8x16_t __b)
> @@ -17427,20 +16946,6 @@ __arm_vqshlq_r (int8x16_t __a, int32_t __b)
>   return __arm_vqshlq_r_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vqrshlq_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int8x16_t __a, int32_t __b)
> -{
> - return __arm_vqrshlq_n_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq (int8x16_t __a, int8x16_t __b)
> @@ -17746,63 +17251,35 @@ __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
>  {
> - return __arm_vaddvaq_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
> -{
> - return __arm_vabdq_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (uint16x8_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vrshlq_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint16x8_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_u16 (__a, __b);
> + return __arm_vaddvaq_u16 (__a, __b);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
> +__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
>  {
> - return __arm_vqshlq_u16 (__a, __b);
> + return __arm_vabdq_u16 (__a, __b);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
> +__arm_vshlq_r (uint16x8_t __a, int32_t __b)
>  {
> - return __arm_vqshlq_r_u16 (__a, __b);
> + return __arm_vshlq_r_u16 (__a, __b);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint16x8_t __a, int16x8_t __b)
> +__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
>  {
> - return __arm_vqrshlq_u16 (__a, __b);
> + return __arm_vqshlq_u16 (__a, __b);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint16x8_t __a, int32_t __b)
> +__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
>  {
> - return __arm_vqrshlq_n_u16 (__a, __b);
> + return __arm_vqshlq_r_u16 (__a, __b);
>  }
> 
>  __extension__ extern __inline uint16_t
> @@ -17959,20 +17436,6 @@ __arm_vshlq_r (int16x8_t __a, int32_t __b)
>   return __arm_vshlq_r_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vrshlq_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int16x8_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq (int16x8_t __a, int16x8_t __b)
> @@ -17987,20 +17450,6 @@ __arm_vqshlq_r (int16x8_t __a, int32_t __b)
>   return __arm_vqshlq_r_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vqrshlq_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int16x8_t __a, int32_t __b)
> -{
> - return __arm_vqrshlq_n_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq (int16x8_t __a, int16x8_t __b)
> @@ -18323,20 +17772,6 @@ __arm_vshlq_r (uint32x4_t __a, int32_t __b)
>   return __arm_vshlq_r_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vrshlq_u32 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (uint32x4_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq (uint32x4_t __a, int32x4_t __b)
> @@ -18351,20 +17786,6 @@ __arm_vqshlq_r (uint32x4_t __a, int32_t __b)
>   return __arm_vqshlq_r_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vqrshlq_u32 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (uint32x4_t __a, int32_t __b)
> -{
> - return __arm_vqrshlq_n_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq (uint32_t __a, int32x4_t __b)
> @@ -18519,20 +17940,6 @@ __arm_vshlq_r (int32x4_t __a, int32_t __b)
>   return __arm_vshlq_r_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vrshlq_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq (int32x4_t __a, int32_t __b)
> -{
> - return __arm_vrshlq_n_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq (int32x4_t __a, int32x4_t __b)
> @@ -18547,20 +17954,6 @@ __arm_vqshlq_r (int32x4_t __a, int32_t __b)
>   return __arm_vqshlq_r_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vqrshlq_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq (int32x4_t __a, int32_t __b)
> -{
> - return __arm_vqrshlq_n_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmulhq (int32x4_t __a, int32x4_t __b)
> @@ -19492,13 +18885,6 @@ __arm_vshlq_m_r (uint8x16_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_u8 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_u8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> @@ -19506,13 +18892,6 @@ __arm_vqshlq_m_r (uint8x16_t __a, int32_t
> __b, mve_pred16_t __p)
>   return __arm_vqshlq_m_r_u8 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_u8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
> @@ -19632,13 +19011,6 @@ __arm_vshlq_m_r (int8x16_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_s8 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_s8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
> @@ -19653,13 +19025,6 @@ __arm_vqshlq_m_r (int8x16_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vqshlq_m_r_s8 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_s8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
> @@ -20073,13 +19438,6 @@ __arm_vshlq_m_r (uint16x8_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_u16 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_u16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> @@ -20087,13 +19445,6 @@ __arm_vqshlq_m_r (uint16x8_t __a, int32_t
> __b, mve_pred16_t __p)
>   return __arm_vqshlq_m_r_u16 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_u16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
> @@ -20213,13 +19564,6 @@ __arm_vshlq_m_r (int16x8_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_s16 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_s16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
> @@ -20234,13 +19578,6 @@ __arm_vqshlq_m_r (int16x8_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vqshlq_m_r_s16 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_s16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
> @@ -20654,13 +19991,6 @@ __arm_vshlq_m_r (uint32x4_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_u32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> @@ -20668,13 +19998,6 @@ __arm_vqshlq_m_r (uint32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>   return __arm_vqshlq_m_r_u32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
> @@ -20794,13 +20117,6 @@ __arm_vshlq_m_r (int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vshlq_m_r_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m_n (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_n_s32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
> @@ -20815,13 +20131,6 @@ __arm_vqshlq_m_r (int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vqshlq_m_r_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m_n (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_n_s32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
> @@ -22887,48 +22196,6 @@ __arm_vqrdmulhq_m (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pred1
>   return __arm_vqrdmulhq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshlq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -23013,48 +22280,6 @@ __arm_vqshlq_m (uint16x8_t __inactive,
> uint16x8_t __a, int16x8_t __b, mve_pred16
>   return __arm_vqshlq_m_u16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vrshlq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -26009,48 +25234,6 @@ __arm_vrev64q_x (uint32x4_t __a,
> mve_pred16_t __p)
>   return __arm_vrev64q_x_u32 (__a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_s8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshlq_x (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vrshlq_x_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshllbq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
> @@ -29858,22 +29041,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
>  #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> @@ -29908,22 +29075,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce3(p1, int)));})
> -
>  #define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -30181,16 +29332,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshlq_m_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshlq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshlq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), __p1, p2));})
> -
>  #define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> @@ -30200,15 +29341,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vqrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqrshlq_m_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqrshlq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqrshlq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -31649,22 +30781,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
>  #define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -31717,22 +30833,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vqrshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrshlq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce3(p1, int)));})
> -
>  #define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -32100,15 +31200,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t)), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t)));})
> 
> -#define __arm_vqrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqrshlq_m_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqrshlq_m_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqrshlq_m_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqrshlq_m_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqrshlq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqrshlq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> @@ -32128,16 +31219,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> -#define __arm_vrshlq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshlq_m_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshlq_m_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshlq_m_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshlq_m_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshlq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshlq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), __p1, p2));})
> -
>  #define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> @@ -33076,16 +32157,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> 
> -#define __arm_vrshlq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshlq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
>  #define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> @@ -33333,17 +32404,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2,
> int), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2,
> int), p3));})
> 
> -#define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vqrshlq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqrshlq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqrshlq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vqrshlq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vqrshlq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vqrshlq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
>  #define __arm_vqshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -33365,17 +32425,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vqshlq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
>    int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vqshlq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> 
> -#define __arm_vrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vrshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vrshlq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vrshlq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vrshlq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vrshlq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vrshlq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
>  #define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq
  2023-05-05  8:39 ` [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq Christophe Lyon
@ 2023-05-05 10:00   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:00 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq
> 
> Factorize vqshlq and vshlq so that they use the same pattern.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_SHIFT_M_R, MVE_SHIFT_M_N)
> 	(MVE_SHIFT_N, MVE_SHIFT_R): New.
> 	(mve_insn): Add vqshl, vshl.
> 	* config/arm/mve.md (mve_vqshlq_n_<supf><mode>)
> 	(mve_vshlq_n_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
> 	(mve_vqshlq_r_<supf><mode>, mve_vshlq_r_<supf><mode>): Merge
> into
> 	...
> 	(@mve_<mve_insn>q_r_<supf><mode>): ... this.
> 	(mve_vqshlq_m_r_<supf><mode>, mve_vshlq_m_r_<supf><mode>):
> Merge
> 	into ...
> 	(@mve_<mve_insn>q_m_r_<supf><mode>): ... this.
> 	(mve_vqshlq_m_n_<supf><mode>,
> mve_vshlq_m_n_<supf><mode>): Merge
> 	into ...
> 	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
> 	* config/arm/vec-common.md (mve_vshlq_<supf><mode>):
> Transform
> 	into ...
> 	(@mve_<mve_insn>q_<supf><mode>): ... this.
> ---
>  gcc/config/arm/iterators.md  | 29 +++++++++++
>  gcc/config/arm/mve.md        | 99 ++++++++----------------------------
>  gcc/config/arm/vec-common.md |  4 +-
>  3 files changed, 51 insertions(+), 81 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index e7622fe752a..c53b42a86e9 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -435,6 +435,26 @@ (define_int_iterator MVE_INT_N_BINARY_LOGIC   [
>  		     VORRQ_N_S VORRQ_N_U
>  		     ])
> 
> +(define_int_iterator MVE_SHIFT_M_R   [
> +		     VQSHLQ_M_R_S VQSHLQ_M_R_U
> +		     VSHLQ_M_R_S VSHLQ_M_R_U
> +		     ])
> +
> +(define_int_iterator MVE_SHIFT_M_N   [
> +		     VQSHLQ_M_N_S VQSHLQ_M_N_U
> +		     VSHLQ_M_N_S VSHLQ_M_N_U
> +		     ])
> +
> +(define_int_iterator MVE_SHIFT_N   [
> +		     VQSHLQ_N_S VQSHLQ_N_U
> +		     VSHLQ_N_S VSHLQ_N_U
> +		     ])
> +
> +(define_int_iterator MVE_SHIFT_R   [
> +		     VQSHLQ_R_S VQSHLQ_R_U
> +		     VSHLQ_R_S VSHLQ_R_U
> +		     ])
> +
>  (define_int_iterator MVE_RSHIFT_M_N   [
>  		     VQRSHLQ_M_N_S VQRSHLQ_M_N_U
>  		     VRSHLQ_M_N_S VRSHLQ_M_N_U
> @@ -540,7 +560,11 @@ (define_int_attr mve_insn [
>  		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
>  		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
>  		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
> +		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
> +		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
>  		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
> +		 (VQSHLQ_N_S "vqshl") (VQSHLQ_N_U "vqshl")
> +		 (VQSHLQ_R_S "vqshl") (VQSHLQ_R_U "vqshl")
>  		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
>  		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
>  		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
> @@ -554,7 +578,12 @@ (define_int_attr mve_insn [
>  		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
>  		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
>  		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
> +		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
> +		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
>  		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
> +		 (VSHLQ_N_S "vshl") (VSHLQ_N_U "vshl")
> +		 (VSHLQ_R_S "vshl") (VSHLQ_R_U "vshl")
> +		 (VSHLQ_S "vshl") (VSHLQ_U "vshl")
>  		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub")
> (VSUBQ_M_N_F "vsub")
>  		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F
> "vsub")
>  		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F
> "vsub")
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 0d3343b6e29..fb1076aef73 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1389,32 +1389,34 @@ (define_insn
> "@mve_<mve_insn>q_n_<supf><mode>"
>  ])
> 
>  ;;
> -;; [vqshlq_n_s, vqshlq_n_u])
> +;; [vqshlq_n_s, vqshlq_n_u]
> +;; [vshlq_n_u, vshlq_n_s]
>  ;;
> -(define_insn "mve_vqshlq_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")
>  		       (match_operand:SI 2 "immediate_operand" "i")]
> -	 VQSHLQ_N))
> +	 MVE_SHIFT_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vqshl.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
> +  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
>    [(set_attr "type" "mve_move")
>  ])
> 
>  ;;
> -;; [vqshlq_r_u, vqshlq_r_s])
> +;; [vqshlq_r_u, vqshlq_r_s]
> +;; [vshlq_r_s, vshlq_r_u]
>  ;;
> -(define_insn "mve_vqshlq_r_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_r_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:SI 2 "s_register_operand" "r")]
> -	 VQSHLQ_R))
> +	 MVE_SHIFT_R))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vqshl.<supf>%#<V_sz_elem>\t%q0, %2"
> +  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -1448,36 +1450,6 @@ (define_insn "mve_vrshrq_n_<supf><mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vshlq_n_u, vshlq_n_s])
> -;;
> -(define_insn "mve_vshlq_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")
> -		       (match_operand:SI 2 "immediate_operand" "i")]
> -	 VSHLQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vshl.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vshlq_r_s, vshlq_r_u])
> -;;
> -(define_insn "mve_vshlq_r_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:SI 2 "s_register_operand" "r")]
> -	 VSHLQ_R))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vshl.<supf>%#<V_sz_elem>\t%q0, %2"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vabdq_f])
>  ;;
> @@ -3101,18 +3073,19 @@ (define_insn
> "@mve_<mve_insn>q_m_n_<supf><mode>"
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vqshlq_m_r_u, vqshlq_m_r_s])
> +;; [vqshlq_m_r_u, vqshlq_m_r_s]
> +;; [vshlq_m_r_u, vshlq_m_r_s]
>  ;;
> -(define_insn "mve_vqshlq_m_r_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_m_r_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:SI 2 "s_register_operand" "r")
>  		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VQSHLQ_M_R))
> +	 MVE_SHIFT_M_R))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vqshlt.<supf>%#<V_sz_elem>\t%q0, %2"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -3132,22 +3105,6 @@ (define_insn "mve_vrev64q_m_<supf><mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vshlq_m_r_u, vshlq_m_r_s])
> -;;
> -(define_insn "mve_vshlq_m_r_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:SI 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VSHLQ_M_R))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vshlt.<supf>%#<V_sz_elem>\t%q0, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vsliq_n_u, vsliq_n_s])
>  ;;
> @@ -4881,19 +4838,20 @@ (define_insn "mve_vornq_m_<supf><mode>"
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vqshlq_m_n_s, vqshlq_m_n_u])
> +;; [vqshlq_m_n_s, vqshlq_m_n_u]
> +;; [vshlq_m_n_s, vshlq_m_n_u]
>  ;;
> -(define_insn "mve_vqshlq_m_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:MVE_2 2 "s_register_operand" "w")
>  		       (match_operand:SI 3 "immediate_operand" "i")
>  		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQSHLQ_M_N))
> +	 MVE_SHIFT_M_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vqshlt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -4914,23 +4872,6 @@ (define_insn "mve_vrshrq_m_n_<supf><mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vshlq_m_n_s, vshlq_m_n_u])
> -;;
> -(define_insn "mve_vshlq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "immediate_operand" "i")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VSHLQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vshlt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vshrq_m_n_s, vshrq_m_n_u])
>  ;;
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index 918338ca5c0..b5fc86fdf28 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -357,14 +357,14 @@ (define_expand "@movmisalign<mode>"
>      }
>  })
> 
> -(define_insn "mve_vshlq_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_<supf><mode>"
>    [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
>  	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand"
> "w,w")
>  		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon"
> "w,Ds")]
>  	 VSHLQ))]
>    "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
>    "@
> -   vshl.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
> +   <mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
>     * return neon_output_shift_immediate (\"vshl\", 'i', &operands[2],
> <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
>    [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
>  )
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq
  2023-05-05  8:39 ` [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq Christophe Lyon
@ 2023-05-05 10:01   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:01 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq
> 
> Implement vqrdmulhq using the new MVE builtins framework.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (vqrdmulhq): New.
> 	* config/arm/arm-mve-builtins-base.def (vqrdmulhq): New.
> 	* config/arm/arm-mve-builtins-base.h (vqrdmulhq): New.
> 	* config/arm/arm_mve.h (vqrdmulhq): Remove.
> 	(vqrdmulhq_m): Remove.
> 	(vqrdmulhq_s8): Remove.
> 	(vqrdmulhq_n_s8): Remove.
> 	(vqrdmulhq_s16): Remove.
> 	(vqrdmulhq_n_s16): Remove.
> 	(vqrdmulhq_s32): Remove.
> 	(vqrdmulhq_n_s32): Remove.
> 	(vqrdmulhq_m_n_s8): Remove.
> 	(vqrdmulhq_m_n_s32): Remove.
> 	(vqrdmulhq_m_n_s16): Remove.
> 	(vqrdmulhq_m_s8): Remove.
> 	(vqrdmulhq_m_s32): Remove.
> 	(vqrdmulhq_m_s16): Remove.
> 	(__arm_vqrdmulhq_s8): Remove.
> 	(__arm_vqrdmulhq_n_s8): Remove.
> 	(__arm_vqrdmulhq_s16): Remove.
> 	(__arm_vqrdmulhq_n_s16): Remove.
> 	(__arm_vqrdmulhq_s32): Remove.
> 	(__arm_vqrdmulhq_n_s32): Remove.
> 	(__arm_vqrdmulhq_m_n_s8): Remove.
> 	(__arm_vqrdmulhq_m_n_s32): Remove.
> 	(__arm_vqrdmulhq_m_n_s16): Remove.
> 	(__arm_vqrdmulhq_m_s8): Remove.
> 	(__arm_vqrdmulhq_m_s32): Remove.
> 	(__arm_vqrdmulhq_m_s16): Remove.
> 	(__arm_vqrdmulhq): Remove.
> 	(__arm_vqrdmulhq_m): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
>  gcc/config/arm/arm-mve-builtins-base.def |   1 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   1 +
>  gcc/config/arm/arm_mve.h                 | 213 -----------------------
>  4 files changed, 3 insertions(+), 213 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index f5e48519b19..8c125657c67 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -158,6 +158,7 @@ FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR,
> VORRQ)
>  FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
>  FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
> +FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
>  FUNCTION (vreinterpretq, vreinterpretq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index e6dc2b00aaa..5b9966341ce 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -29,6 +29,7 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer,
> mx_or_none)
>  DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
> +DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer,
> none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index 31ba3fece82..eeb747d52ad 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -34,6 +34,7 @@ extern const function_base *const vmulq;
>  extern const function_base *const vorrq;
>  extern const function_base *const vqaddq;
>  extern const function_base *const vqdmulhq;
> +extern const function_base *const vqrdmulhq;
>  extern const function_base *const vqrshlq;
>  extern const function_base *const vqsubq;
>  extern const function_base *const vreinterpretq;
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 636945d6ef0..44b383dbe08 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -94,7 +94,6 @@
>  #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
>  #define vcmpgeq(__a, __b) __arm_vcmpgeq(__a, __b)
>  #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
> -#define vqrdmulhq(__a, __b) __arm_vqrdmulhq(__a, __b)
>  #define vmlsdavxq(__a, __b) __arm_vmlsdavxq(__a, __b)
>  #define vmlsdavq(__a, __b) __arm_vmlsdavq(__a, __b)
>  #define vmladavxq(__a, __b) __arm_vmladavxq(__a, __b)
> @@ -249,7 +248,6 @@
>  #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b,
> __c, __p)
>  #define vqrdmlsdhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m(__inactive, __a, __b, __p)
>  #define vqshlq_m_n(__inactive, __a, __imm, __p)
> __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
>  #define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive,
> __a, __b, __p)
>  #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive,
> __a, __imm, __p)
> @@ -682,8 +680,6 @@
>  #define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
>  #define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
>  #define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
> -#define vqrdmulhq_s8(__a, __b) __arm_vqrdmulhq_s8(__a, __b)
> -#define vqrdmulhq_n_s8(__a, __b) __arm_vqrdmulhq_n_s8(__a, __b)
>  #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
>  #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
>  #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
> @@ -754,8 +750,6 @@
>  #define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
>  #define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
>  #define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
> -#define vqrdmulhq_s16(__a, __b) __arm_vqrdmulhq_s16(__a, __b)
> -#define vqrdmulhq_n_s16(__a, __b) __arm_vqrdmulhq_n_s16(__a, __b)
>  #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
>  #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
>  #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
> @@ -826,8 +820,6 @@
>  #define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
>  #define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
>  #define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
> -#define vqrdmulhq_s32(__a, __b) __arm_vqrdmulhq_s32(__a, __b)
> -#define vqrdmulhq_n_s32(__a, __b) __arm_vqrdmulhq_n_s32(__a, __b)
>  #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
>  #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
>  #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
> @@ -1597,12 +1589,6 @@
>  #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_n_s8(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_n_s8(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_n_s32(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_n_s32(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_n_s16(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_n_s16(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s8(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s32(__inactive, __a, __b, __p)
> -#define vqrdmulhq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrdmulhq_m_s16(__inactive, __a, __b, __p)
>  #define vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> @@ -3421,20 +3407,6 @@ __arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_n_s8 (int8x16_t __a, int8_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_n_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
> @@ -3927,20 +3899,6 @@ __arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_n_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
> @@ -4433,20 +4391,6 @@ __arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
>    return __builtin_mve_vqshlq_r_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqrdmulhq_n_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
> @@ -8674,48 +8618,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve
>    return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_n_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_n_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int16_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_n_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrdmulhq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -16946,20 +16848,6 @@ __arm_vqshlq_r (int8x16_t __a, int32_t __b)
>   return __arm_vqshlq_r_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vqrdmulhq_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int8x16_t __a, int8_t __b)
> -{
> - return __arm_vqrdmulhq_n_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int8x16_t __a, int8x16_t __b)
> @@ -17450,20 +17338,6 @@ __arm_vqshlq_r (int16x8_t __a, int32_t __b)
>   return __arm_vqshlq_r_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vqrdmulhq_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int16x8_t __a, int16_t __b)
> -{
> - return __arm_vqrdmulhq_n_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int16x8_t __a, int16x8_t __b)
> @@ -17954,20 +17828,6 @@ __arm_vqshlq_r (int32x4_t __a, int32_t __b)
>   return __arm_vqshlq_r_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vqrdmulhq_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq (int32x4_t __a, int32_t __b)
> -{
> - return __arm_vqrdmulhq_n_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int32x4_t __a, int32x4_t __b)
> @@ -22154,48 +22014,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pre
>   return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int8x16_t __inactive, int8x16_t __a, int8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_n_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_n_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int16x8_t __inactive, int16x8_t __a, int16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_n_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrdmulhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqrdmulhq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -29075,16 +28893,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)));})
> -
>  #define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -30833,16 +30641,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vqrdmulhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqrdmulhq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrdmulhq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrdmulhq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce3(p1, int)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]:
> __arm_vqrdmulhq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce3(p1, int)));})
> -
>  #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -32455,17 +32253,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
> 
> -#define __arm_vqrdmulhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vqrdmulhq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqrdmulhq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqrdmulhq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int_n]: __arm_vqrdmulhq_m_n_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2,
> int), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vqrdmulhq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2,
> int), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vqrdmulhq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2,
> int), p3));})
> -
>  #define __arm_vqrdmlsdhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq
  2023-05-05  8:39 ` [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq Christophe Lyon
@ 2023-05-05 10:48   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:48 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq
> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_FP_M_BINARY): Add vabdq.
> 	(MVE_FP_VABDQ_ONLY): New.
> 	(mve_insn): Add vabd.
> 	* config/arm/mve.md (mve_vabdq_f<mode>): Move into ...
> 	(@mve_<mve_insn>q_f<mode>): ... this.
> 	(mve_vabdq_m_f<mode>): Remove.

Ok.
Thanks,
Kyrill

> ---
>  gcc/config/arm/iterators.md |  9 +++++++--
>  gcc/config/arm/mve.md       | 25 +++++--------------------
>  2 files changed, 12 insertions(+), 22 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index c53b42a86e9..3133642ea82 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -466,6 +466,7 @@ (define_int_iterator MVE_RSHIFT_N   [
>  		     ])
> 
>  (define_int_iterator MVE_FP_M_BINARY   [
> +		     VABDQ_M_F
>  		     VADDQ_M_F
>  		     VMULQ_M_F
>  		     VSUBQ_M_F
> @@ -490,6 +491,10 @@ (define_int_iterator MVE_FP_N_BINARY   [
>  		     VSUBQ_N_F
>  		     ])
> 
> +(define_int_iterator MVE_FP_VABDQ_ONLY [
> +		     VABDQ_F
> +		     ])
> +
>  (define_int_iterator MVE_FP_CREATE_ONLY [
>  		     VCREATEQ_F
>  		     ])
> @@ -501,8 +506,8 @@ (define_code_attr mve_addsubmul [
>  		 ])
> 
>  (define_int_attr mve_insn [
> -		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd")
> -		 (VABDQ_S "vabd") (VABDQ_U "vabd")
> +		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd") (VABDQ_M_F
> "vabd")
> +		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
>  		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd")
> (VADDQ_M_N_F "vadd")
>  		 (VADDQ_M_S "vadd") (VADDQ_M_U "vadd") (VADDQ_M_F
> "vadd")
>  		 (VADDQ_N_S "vadd") (VADDQ_N_U "vadd") (VADDQ_N_F
> "vadd")
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index fb1076aef73..c8cb4e430ac 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1451,17 +1451,17 @@ (define_insn "mve_vrshrq_n_<supf><mode>"
>  ])
> 
>  ;;
> -;; [vabdq_f])
> +;; [vabdq_f]
>  ;;
> -(define_insn "mve_vabdq_f<mode>"
> +(define_insn "@mve_<mve_insn>q_f<mode>"
>    [
>     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
>  	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand"
> "w")
>  		       (match_operand:MVE_0 2 "s_register_operand" "w")]
> -	 VABDQ_F))
> +	 MVE_FP_VABDQ_ONLY))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vabd.f%#<V_sz_elem>	%q0, %q1, %q2"
> +  "<mve_insn>.f%#<V_sz_elem>\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -5483,24 +5483,9 @@ (define_insn "mve_vrmlsldavhaxq_p_sv4si"
>    "vpst\;vrmlsldavhaxt.s32\t%Q0, %R0, %q2, %q3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> -;;
> -;; [vabdq_m_f])
> -;;
> -(define_insn "mve_vabdq_m_f<mode>"
> -  [
> -   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> -	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:MVE_0 3 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VABDQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vabdt.f%#<V_sz_elem>	%q0, %q2, %q3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> 
>  ;;
> +;; [vabdq_m_f]
>  ;; [vaddq_m_f]
>  ;; [vsubq_m_f]
>  ;; [vmulq_m_f]
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 07/23] arm: [MVE intrinsics] rework vabdq
  2023-05-05  8:39 ` [PATCH 07/23] arm: [MVE intrinsics] rework vabdq Christophe Lyon
@ 2023-05-05 10:49   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:49 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 07/23] arm: [MVE intrinsics] rework vabdq
> 
> Implement vabdq using the new MVE builtins framework.
> 

Ok.
Thanks,
Kyrill

> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (FUNCTION_WITHOUT_N):
> New.
> 	(vabdq): New.
> 	* config/arm/arm-mve-builtins-base.def (vabdq): New.
> 	* config/arm/arm-mve-builtins-base.h (vabdq): New.
> 	* config/arm/arm_mve.h (vabdq): Remove.
> 	(vabdq_m): Remove.
> 	(vabdq_x): Remove.
> 	(vabdq_u8): Remove.
> 	(vabdq_s8): Remove.
> 	(vabdq_u16): Remove.
> 	(vabdq_s16): Remove.
> 	(vabdq_u32): Remove.
> 	(vabdq_s32): Remove.
> 	(vabdq_f16): Remove.
> 	(vabdq_f32): Remove.
> 	(vabdq_m_s8): Remove.
> 	(vabdq_m_s32): Remove.
> 	(vabdq_m_s16): Remove.
> 	(vabdq_m_u8): Remove.
> 	(vabdq_m_u32): Remove.
> 	(vabdq_m_u16): Remove.
> 	(vabdq_m_f32): Remove.
> 	(vabdq_m_f16): Remove.
> 	(vabdq_x_s8): Remove.
> 	(vabdq_x_s16): Remove.
> 	(vabdq_x_s32): Remove.
> 	(vabdq_x_u8): Remove.
> 	(vabdq_x_u16): Remove.
> 	(vabdq_x_u32): Remove.
> 	(vabdq_x_f16): Remove.
> 	(vabdq_x_f32): Remove.
> 	(__arm_vabdq_u8): Remove.
> 	(__arm_vabdq_s8): Remove.
> 	(__arm_vabdq_u16): Remove.
> 	(__arm_vabdq_s16): Remove.
> 	(__arm_vabdq_u32): Remove.
> 	(__arm_vabdq_s32): Remove.
> 	(__arm_vabdq_m_s8): Remove.
> 	(__arm_vabdq_m_s32): Remove.
> 	(__arm_vabdq_m_s16): Remove.
> 	(__arm_vabdq_m_u8): Remove.
> 	(__arm_vabdq_m_u32): Remove.
> 	(__arm_vabdq_m_u16): Remove.
> 	(__arm_vabdq_x_s8): Remove.
> 	(__arm_vabdq_x_s16): Remove.
> 	(__arm_vabdq_x_s32): Remove.
> 	(__arm_vabdq_x_u8): Remove.
> 	(__arm_vabdq_x_u16): Remove.
> 	(__arm_vabdq_x_u32): Remove.
> 	(__arm_vabdq_f16): Remove.
> 	(__arm_vabdq_f32): Remove.
> 	(__arm_vabdq_m_f32): Remove.
> 	(__arm_vabdq_m_f16): Remove.
> 	(__arm_vabdq_x_f16): Remove.
> 	(__arm_vabdq_x_f32): Remove.
> 	(__arm_vabdq): Remove.
> 	(__arm_vabdq_m): Remove.
> 	(__arm_vabdq_x): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  10 +
>  gcc/config/arm/arm-mve-builtins-base.def |   2 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   1 +
>  gcc/config/arm/arm_mve.h                 | 431 -----------------------
>  4 files changed, 13 insertions(+), 431 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index 8c125657c67..a74119db917 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -146,6 +146,16 @@ namespace arm_mve {
>      UNSPEC##_M_S, -1, -1,						\
>      UNSPEC##_M_N_S, -1, -1))
> 
> +  /* Helper for builtins with only unspec codes, _m predicated
> +     overrides, but no _n version.  */
> +#define FUNCTION_WITHOUT_N(NAME, UNSPEC) FUNCTION
> 	\
> +  (NAME, unspec_mve_function_exact_insn,				\
> +   (UNSPEC##_S, UNSPEC##_U, UNSPEC##_F,
> 	\
> +    -1, -1, -1,								\
> +    UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,
> 	\
> +    -1, -1, -1))
> +
> +FUNCTION_WITHOUT_N (vabdq, VABDQ)
>  FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
>  FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
>  FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 5b9966341ce..9230837fd43 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -18,6 +18,7 @@
>     <http://www.gnu.org/licenses/>.  */
> 
>  #define REQUIRES_FLOAT false
> +DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
> @@ -41,6 +42,7 @@ DEF_MVE_FUNCTION (vuninitializedq, inherent,
> all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> 
>  #define REQUIRES_FLOAT true
> +DEF_MVE_FUNCTION (vabdq, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
>  DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index eeb747d52ad..d9d45d1925a 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -23,6 +23,7 @@
>  namespace arm_mve {
>  namespace functions {
> 
> +extern const function_base *const vabdq;
>  extern const function_base *const vaddq;
>  extern const function_base *const vandq;
>  extern const function_base *const vcreateq;
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 44b383dbe08..175d9955c33 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -77,7 +77,6 @@
>  #define vbicq(__a, __b) __arm_vbicq(__a, __b)
>  #define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
>  #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
> -#define vabdq(__a, __b) __arm_vabdq(__a, __b)
>  #define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
>  #define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
>  #define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
> @@ -218,7 +217,6 @@
>  #define vqshluq_m(__inactive, __a, __imm, __p)
> __arm_vqshluq_m(__inactive, __a, __imm, __p)
>  #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
>  #define vshlq_m(__inactive, __a, __b, __p) __arm_vshlq_m(__inactive, __a,
> __b, __p)
> -#define vabdq_m(__inactive, __a, __b, __p) __arm_vabdq_m(__inactive,
> __a, __b, __p)
>  #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a,
> __b, __p)
>  #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive,
> __a, __b, __p)
>  #define vcaddq_rot270_m(__inactive, __a, __b, __p)
> __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
> @@ -355,7 +353,6 @@
>  #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a,
> __b, __imm, __p)
>  #define vminq_x(__a, __b, __p) __arm_vminq_x(__a, __b, __p)
>  #define vmaxq_x(__a, __b, __p) __arm_vmaxq_x(__a, __b, __p)
> -#define vabdq_x(__a, __b, __p) __arm_vabdq_x(__a, __b, __p)
>  #define vabsq_x(__a, __p) __arm_vabsq_x(__a, __p)
>  #define vclsq_x(__a, __p) __arm_vclsq_x(__a, __p)
>  #define vclzq_x(__a, __p) __arm_vclzq_x(__a, __p)
> @@ -652,7 +649,6 @@
>  #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
>  #define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
>  #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
> -#define vabdq_u8(__a, __b) __arm_vabdq_u8(__a, __b)
>  #define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
>  #define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
>  #define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
> @@ -698,7 +694,6 @@
>  #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
>  #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
>  #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
> -#define vabdq_s8(__a, __b) __arm_vabdq_s8(__a, __b)
>  #define vshlq_n_s8(__a,  __imm) __arm_vshlq_n_s8(__a,  __imm)
>  #define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
>  #define vqshlq_n_s8(__a,  __imm) __arm_vqshlq_n_s8(__a,  __imm)
> @@ -722,7 +717,6 @@
>  #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
>  #define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
>  #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
> -#define vabdq_u16(__a, __b) __arm_vabdq_u16(__a, __b)
>  #define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
>  #define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
>  #define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
> @@ -768,7 +762,6 @@
>  #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
>  #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
>  #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
> -#define vabdq_s16(__a, __b) __arm_vabdq_s16(__a, __b)
>  #define vshlq_n_s16(__a,  __imm) __arm_vshlq_n_s16(__a,  __imm)
>  #define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
>  #define vqshlq_n_s16(__a,  __imm) __arm_vqshlq_n_s16(__a,  __imm)
> @@ -792,7 +785,6 @@
>  #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
>  #define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
>  #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
> -#define vabdq_u32(__a, __b) __arm_vabdq_u32(__a, __b)
>  #define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
>  #define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
>  #define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
> @@ -838,7 +830,6 @@
>  #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
>  #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
>  #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
> -#define vabdq_s32(__a, __b) __arm_vabdq_s32(__a, __b)
>  #define vshlq_n_s32(__a,  __imm) __arm_vshlq_n_s32(__a,  __imm)
>  #define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
>  #define vqshlq_n_s32(__a,  __imm) __arm_vqshlq_n_s32(__a,  __imm)
> @@ -894,7 +885,6 @@
>  #define vcaddq_rot90_f16(__a, __b) __arm_vcaddq_rot90_f16(__a, __b)
>  #define vcaddq_rot270_f16(__a, __b) __arm_vcaddq_rot270_f16(__a, __b)
>  #define vbicq_f16(__a, __b) __arm_vbicq_f16(__a, __b)
> -#define vabdq_f16(__a, __b) __arm_vabdq_f16(__a, __b)
>  #define vshlltq_n_s8(__a,  __imm) __arm_vshlltq_n_s8(__a,  __imm)
>  #define vshllbq_n_s8(__a,  __imm) __arm_vshllbq_n_s8(__a,  __imm)
>  #define vbicq_n_s16(__a,  __imm) __arm_vbicq_n_s16(__a,  __imm)
> @@ -950,7 +940,6 @@
>  #define vcaddq_rot90_f32(__a, __b) __arm_vcaddq_rot90_f32(__a, __b)
>  #define vcaddq_rot270_f32(__a, __b) __arm_vcaddq_rot270_f32(__a, __b)
>  #define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b)
> -#define vabdq_f32(__a, __b) __arm_vabdq_f32(__a, __b)
>  #define vshlltq_n_s16(__a,  __imm) __arm_vshlltq_n_s16(__a,  __imm)
>  #define vshllbq_n_s16(__a,  __imm) __arm_vshllbq_n_s16(__a,  __imm)
>  #define vbicq_n_s32(__a,  __imm) __arm_vbicq_n_s32(__a,  __imm)
> @@ -1460,12 +1449,6 @@
>  #define vshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vshlq_m_u32(__inactive, __a, __b, __p)
>  #define vabavq_p_u32(__a, __b, __c, __p) __arm_vabavq_p_u32(__a, __b,
> __c, __p)
>  #define vshlq_m_s32(__inactive, __a, __b, __p)
> __arm_vshlq_m_s32(__inactive, __a, __b, __p)
> -#define vabdq_m_s8(__inactive, __a, __b, __p)
> __arm_vabdq_m_s8(__inactive, __a, __b, __p)
> -#define vabdq_m_s32(__inactive, __a, __b, __p)
> __arm_vabdq_m_s32(__inactive, __a, __b, __p)
> -#define vabdq_m_s16(__inactive, __a, __b, __p)
> __arm_vabdq_m_s16(__inactive, __a, __b, __p)
> -#define vabdq_m_u8(__inactive, __a, __b, __p)
> __arm_vabdq_m_u8(__inactive, __a, __b, __p)
> -#define vabdq_m_u32(__inactive, __a, __b, __p)
> __arm_vabdq_m_u32(__inactive, __a, __b, __p)
> -#define vabdq_m_u16(__inactive, __a, __b, __p)
> __arm_vabdq_m_u16(__inactive, __a, __b, __p)
>  #define vbicq_m_s8(__inactive, __a, __b, __p)
> __arm_vbicq_m_s8(__inactive, __a, __b, __p)
>  #define vbicq_m_s32(__inactive, __a, __b, __p)
> __arm_vbicq_m_s32(__inactive, __a, __b, __p)
>  #define vbicq_m_s16(__inactive, __a, __b, __p)
> __arm_vbicq_m_s16(__inactive, __a, __b, __p)
> @@ -1700,8 +1683,6 @@
>  #define vshrntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_s16(__a, __b,  __imm, __p)
>  #define vshrntq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_u32(__a, __b,  __imm, __p)
>  #define vshrntq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_u16(__a, __b,  __imm, __p)
> -#define vabdq_m_f32(__inactive, __a, __b, __p)
> __arm_vabdq_m_f32(__inactive, __a, __b, __p)
> -#define vabdq_m_f16(__inactive, __a, __b, __p)
> __arm_vabdq_m_f16(__inactive, __a, __b, __p)
>  #define vbicq_m_f32(__inactive, __a, __b, __p)
> __arm_vbicq_m_f32(__inactive, __a, __b, __p)
>  #define vbicq_m_f16(__inactive, __a, __b, __p)
> __arm_vbicq_m_f16(__inactive, __a, __b, __p)
>  #define vbrsrq_m_n_f32(__inactive, __a, __b, __p)
> __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
> @@ -2060,12 +2041,6 @@
>  #define vmaxq_x_u8(__a, __b, __p) __arm_vmaxq_x_u8(__a, __b, __p)
>  #define vmaxq_x_u16(__a, __b, __p) __arm_vmaxq_x_u16(__a, __b, __p)
>  #define vmaxq_x_u32(__a, __b, __p) __arm_vmaxq_x_u32(__a, __b, __p)
> -#define vabdq_x_s8(__a, __b, __p) __arm_vabdq_x_s8(__a, __b, __p)
> -#define vabdq_x_s16(__a, __b, __p) __arm_vabdq_x_s16(__a, __b, __p)
> -#define vabdq_x_s32(__a, __b, __p) __arm_vabdq_x_s32(__a, __b, __p)
> -#define vabdq_x_u8(__a, __b, __p) __arm_vabdq_x_u8(__a, __b, __p)
> -#define vabdq_x_u16(__a, __b, __p) __arm_vabdq_x_u16(__a, __b, __p)
> -#define vabdq_x_u32(__a, __b, __p) __arm_vabdq_x_u32(__a, __b, __p)
>  #define vabsq_x_s8(__a, __p) __arm_vabsq_x_s8(__a, __p)
>  #define vabsq_x_s16(__a, __p) __arm_vabsq_x_s16(__a, __p)
>  #define vabsq_x_s32(__a, __p) __arm_vabsq_x_s32(__a, __p)
> @@ -2201,8 +2176,6 @@
>  #define vminnmq_x_f32(__a, __b, __p) __arm_vminnmq_x_f32(__a, __b,
> __p)
>  #define vmaxnmq_x_f16(__a, __b, __p) __arm_vmaxnmq_x_f16(__a, __b,
> __p)
>  #define vmaxnmq_x_f32(__a, __b, __p) __arm_vmaxnmq_x_f32(__a, __b,
> __p)
> -#define vabdq_x_f16(__a, __b, __p) __arm_vabdq_x_f16(__a, __b, __p)
> -#define vabdq_x_f32(__a, __b, __p) __arm_vabdq_x_f32(__a, __b, __p)
>  #define vabsq_x_f16(__a, __p) __arm_vabsq_x_f16(__a, __p)
>  #define vabsq_x_f32(__a, __p) __arm_vabsq_x_f32(__a, __p)
>  #define vnegq_x_f16(__a, __p) __arm_vnegq_x_f16(__a, __p)
> @@ -3211,13 +3184,6 @@ __arm_vaddvaq_u8 (uint32_t __a, uint8x16_t
> __b)
>    return __builtin_mve_vaddvaq_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_u8 (uint8x16_t __a, uint8x16_t __b)
> -{
> -  return __builtin_mve_vabdq_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
> @@ -3533,13 +3499,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
>    return __builtin_mve_vaddvaq_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vabdq_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n_s8 (int8x16_t __a, const int __imm)
> @@ -3703,13 +3662,6 @@ __arm_vaddvaq_u16 (uint32_t __a, uint16x8_t
> __b)
>    return __builtin_mve_vaddvaq_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_u16 (uint16x8_t __a, uint16x8_t __b)
> -{
> -  return __builtin_mve_vabdq_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
> @@ -4025,13 +3977,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
>    return __builtin_mve_vaddvaq_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vabdq_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n_s16 (int16x8_t __a, const int __imm)
> @@ -4195,13 +4140,6 @@ __arm_vaddvaq_u32 (uint32_t __a, uint32x4_t
> __b)
>    return __builtin_mve_vaddvaq_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_u32 (uint32x4_t __a, uint32x4_t __b)
> -{
> -  return __builtin_mve_vabdq_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
> @@ -4517,13 +4455,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
>    return __builtin_mve_vaddvaq_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vabdq_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n_s32 (int32x4_t __a, const int __imm)
> @@ -7715,48 +7646,6 @@ __arm_vshlq_m_s32 (int32x4_t __inactive,
> int32x4_t __a, int32x4_t __b, mve_pred1
>    return __builtin_mve_vshlq_m_sv4si (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> @@ -11432,48 +11321,6 @@ __arm_vmaxq_x_u32 (uint32x4_t __a,
> uint32x4_t __b, mve_pred16_t __p)
>    return __builtin_mve_vmaxq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_sv4si (__arm_vuninitializedq_s32 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv16qi (__arm_vuninitializedq_u8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv8hi (__arm_vuninitializedq_u16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x_s8 (int8x16_t __a, mve_pred16_t __p)
> @@ -13692,13 +13539,6 @@ __arm_vbicq_f16 (float16x8_t __a, float16x8_t
> __b)
>    return __builtin_mve_vbicq_fv8hf (__a, __b);
>  }
> 
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_f16 (float16x8_t __a, float16x8_t __b)
> -{
> -  return __builtin_mve_vabdq_fv8hf (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_f32 (float32x4_t __a, float32_t __b)
> @@ -13895,13 +13735,6 @@ __arm_vbicq_f32 (float32x4_t __a, float32x4_t
> __b)
>    return __builtin_mve_vbicq_fv4sf (__a, __b);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_f32 (float32x4_t __a, float32x4_t __b)
> -{
> -  return __builtin_mve_vabdq_fv4sf (__a, __b);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b)
> @@ -14666,20 +14499,6 @@ __arm_vcvtq_m_n_f32_s32 (float32x4_t
> __inactive, int32x4_t __a, const int __imm6
>    return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__inactive, __a, __imm6,
> __p);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_fv4sf (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_fv8hf (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t
> __b, mve_pred16_t __p)
> @@ -15274,20 +15093,6 @@ __arm_vmaxnmq_x_f32 (float32x4_t __a,
> float32x4_t __b, mve_pred16_t __p)
>    return __builtin_mve_vmaxnmq_m_fv4sf (__arm_vuninitializedq_f32 (),
> __a, __b, __p);
>  }
> 
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_fv8hf (__arm_vuninitializedq_f16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vabdq_m_fv4sf (__arm_vuninitializedq_f32 (), __a,
> __b, __p);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x_f16 (float16x8_t __a, mve_pred16_t __p)
> @@ -16652,13 +16457,6 @@ __arm_vaddvaq (uint32_t __a, uint8x16_t __b)
>   return __arm_vaddvaq_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (uint8x16_t __a, uint8x16_t __b)
> -{
> - return __arm_vabdq_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r (uint8x16_t __a, int32_t __b)
> @@ -16974,13 +16772,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
>   return __arm_vaddvaq_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vabdq_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n (int8x16_t __a, const int __imm)
> @@ -17142,13 +16933,6 @@ __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
>   return __arm_vaddvaq_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (uint16x8_t __a, uint16x8_t __b)
> -{
> - return __arm_vabdq_u16 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r (uint16x8_t __a, int32_t __b)
> @@ -17464,13 +17248,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
>   return __arm_vaddvaq_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vabdq_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n (int16x8_t __a, const int __imm)
> @@ -17632,13 +17409,6 @@ __arm_vaddvaq (uint32_t __a, uint32x4_t __b)
>   return __arm_vaddvaq_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (uint32x4_t __a, uint32x4_t __b)
> -{
> - return __arm_vabdq_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_r (uint32x4_t __a, int32_t __b)
> @@ -17954,13 +17724,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
>   return __arm_vaddvaq_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vabdq_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshlq_n (int32x4_t __a, const int __imm)
> @@ -21111,48 +20874,6 @@ __arm_vshlq_m (int32x4_t __inactive, int32x4_t
> __a, int32x4_t __b, mve_pred16_t
>   return __arm_vshlq_m_s32 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> @@ -24359,48 +24080,6 @@ __arm_vmaxq_x (uint32x4_t __a, uint32x4_t
> __b, mve_pred16_t __p)
>   return __arm_vmaxq_x_u32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_s8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x (int8x16_t __a, mve_pred16_t __p)
> @@ -26195,13 +25874,6 @@ __arm_vbicq (float16x8_t __a, float16x8_t __b)
>   return __arm_vbicq_f16 (__a, __b);
>  }
> 
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (float16x8_t __a, float16x8_t __b)
> -{
> - return __arm_vabdq_f16 (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (float32x4_t __a, float32_t __b)
> @@ -26398,13 +26070,6 @@ __arm_vbicq (float32x4_t __a, float32x4_t __b)
>   return __arm_vbicq_f32 (__a, __b);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq (float32x4_t __a, float32x4_t __b)
> -{
> - return __arm_vabdq_f32 (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpeqq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
> @@ -27154,20 +26819,6 @@ __arm_vcvtq_m_n (float32x4_t __inactive,
> int32x4_t __a, const int __imm6, mve_pr
>   return __arm_vcvtq_m_n_f32_s32 (__inactive, __a, __imm6, __p);
>  }
> 
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_f32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vabdq_m_f16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b,
> mve_pred16_t __p)
> @@ -27686,20 +27337,6 @@ __arm_vmaxnmq_x (float32x4_t __a,
> float32x4_t __b, mve_pred16_t __p)
>   return __arm_vmaxnmq_x_f32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_f16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vabdq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vabdq_x_f32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline float16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x (float16x8_t __a, mve_pred16_t __p)
> @@ -28554,18 +28191,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_n_f16_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_n_f32_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vabdq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vabdq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vabdq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vabdq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vabdq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vabdq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vabdq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> -  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]:
> __arm_vabdq_f16 (__ARM_mve_coerce(__p0, float16x8_t),
> __ARM_mve_coerce(__p1, float16x8_t)), \
> -  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]:
> __arm_vabdq_f32 (__ARM_mve_coerce(__p0, float32x4_t),
> __ARM_mve_coerce(__p1, float32x4_t)));})
> -
>  #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -29746,19 +29371,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]:
> __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t),
> __ARM_mve_coerce(__p1, float16x8_t), p2), \
>    int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]:
> __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t),
> __ARM_mve_coerce(__p1, float32x4_t), p2));})
> 
> -#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_
> mve_type_float16x8_t]: __arm_vabdq_m_f16 (__ARM_mve_coerce(__p0,
> float16x8_t), __ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_
> mve_type_float32x4_t]: __arm_vabdq_m_f32 (__ARM_mve_coerce(__p0,
> float32x4_t), __ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32x4_t), p3));})
> -
>  #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -30228,18 +29840,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint32x4_t]:
> __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2,
> uint32x4_t), p3), \
>    int (*)[__ARM_mve_type_float32x4_t]:
> __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2,
> float32x4_t), p3));})
> 
> -#define __arm_vabdq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vabdq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vabdq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vabdq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vabdq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vabdq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vabdq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3), \
> -  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]:
> __arm_vabdq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16x8_t), p3), \
> -  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]:
> __arm_vabdq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32x4_t), p3));})
> -
>  #define __arm_vabsq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vabsq_x_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2), \
> @@ -30762,16 +30362,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> 
> -#define __arm_vabdq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vabdq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vabdq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vabdq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vabdq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vabdq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vabdq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vcmpeqq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -31416,17 +31006,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> 
> -#define __arm_vabdq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vabdq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vabdq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vabdq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_uint8x16_t]: __arm_vabdq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_uint16x8_t]: __arm_vabdq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vabdq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
>  #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -31834,16 +31413,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_x_u16
> (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_x_u32
> (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> -#define __arm_vabdq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vabdq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vabdq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vabdq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vabdq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vabdq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vabdq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
>  #define __arm_vbicq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape
  2023-05-05  8:39 ` [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape Christophe Lyon
@ 2023-05-05 10:51   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:51 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape
> 
> This patch adds the binary_lshift shape description.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_lshift): New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_lshift): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 57 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 58 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index 28a2d66ddd1..e5093c3f29d 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -519,6 +519,63 @@ struct binary_round_lshift_def : public
> overloaded_base<0>
>  };
>  SHAPE (binary_round_lshift)
> 
> +/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t)
> +   <T0>_t vfoo_n[_t0](<T0>_t, const int)
> +
> +   i.e. the standard shape for left shift operations that operate on
> +   vector types.
> +
> +   For the MODE_n versions, check that 'imm' is in the [0..#bits-1] range.
> +
> +   Example: vshlq.
> +   int8x16_t [__arm_]vshlq[_s8](int8x16_t a, int8x16_t b)
> +   int8x16_t [__arm_]vshlq_m[_s8](int8x16_t inactive, int8x16_t a, int8x16_t
> b, mve_pred16_t p)
> +   int8x16_t [__arm_]vshlq_x[_s8](int8x16_t a, int8x16_t b, mve_pred16_t p)
> +   int8x16_t [__arm_]vshlq_n[_s8](int8x16_t a, const int imm)
> +   int8x16_t [__arm_]vshlq_m_n[_s8](int8x16_t inactive, int8x16_t a, const
> int imm, mve_pred16_t p)
> +   int8x16_t [__arm_]vshlq_x_n[_s8](int8x16_t a, const int imm,
> mve_pred16_t p)  */
> +struct binary_lshift_def : public overloaded_base<0>
> +{
> +  bool
> +  explicit_mode_suffix_p (enum predication_index, enum
> mode_suffix_index) const override
> +  {
> +    return true;
> +  }
> +
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_none,
> preserve_user_namespace);
> +    b.add_overloaded_functions (group, MODE_n,
> preserve_user_namespace);
> +    build_all (b, "v0,v0,vs0", group, MODE_none, preserve_user_namespace);
> +    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (2, i, nargs)
> +	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
> +      return error_mark_node;
> +
> +    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
> +  }
> +
> +  bool
> +  check (function_checker &c) const override
> +  {
> +    if (c.mode_suffix_id != MODE_n)
> +      return true;
> +
> +    unsigned int bits = c.type_suffix (0).element_bits;
> +    return c.require_immediate_range (1, 0, bits - 1);
> +  }
> +};
> +SHAPE (binary_lshift)
> +
>  /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
> 
>     where there are N arguments in total.
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index cef081aa8ec..e472862ceef 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -35,6 +35,7 @@ namespace arm_mve
>    {
> 
>      extern const function_shape *const binary;
> +    extern const function_shape *const binary_lshift;
>      extern const function_shape *const binary_opt_n;
>      extern const function_shape *const binary_orrq;
>      extern const function_shape *const binary_round_lshift;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r
  2023-05-05  8:39 ` [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r Christophe Lyon
@ 2023-05-05 10:55   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:55 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r
> 

This is missing a description of what MODE_r is.
I've deduced what it is from looking at the next 3 patches in the series, but I think this patch should have at least a one-sentence summary.
Therefore ok with a cover letter.
Thanks,
Kyrill

> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins.cc (has_inactive_argument)
> 	(finish_opt_n_resolution): Handle MODE_r.
> 	* config/arm/arm-mve-builtins.def (r): New mode.
> ---
>  gcc/config/arm/arm-mve-builtins.cc  | 8 ++++++--
>  gcc/config/arm/arm-mve-builtins.def | 1 +
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index 91b3ae71f94..c25b1be9903 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -669,7 +669,8 @@ function_instance::has_inactive_argument () const
>    if (pred != PRED_m)
>      return false;
> 
> -  if ((base == functions::vorrq && mode_suffix_id == MODE_n)
> +  if (mode_suffix_id == MODE_r
> +      || (base == functions::vorrq && mode_suffix_id == MODE_n)
>        || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
>        || (base == functions::vrshlq && mode_suffix_id == MODE_n))
>      return false;
> @@ -1522,7 +1523,10 @@ finish_opt_n_resolution (unsigned int argno,
> unsigned int first_argno,
>  {
>    if (inferred_type == NUM_TYPE_SUFFIXES)
>      inferred_type = first_type;
> -  tree scalar_form = lookup_form (MODE_n, inferred_type);
> +  mode_suffix_index scalar_mode = MODE_n;
> +  if (mode_suffix_id == MODE_r)
> +    scalar_mode = MODE_r;
> +  tree scalar_form = lookup_form (scalar_mode, inferred_type);
> 
>    /* Allow the final argument to be scalar, if an _n form exists.  */
>    if (scalar_argument_p (argno))
> diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-
> builtins.def
> index 49d07364fa2..e3f37876210 100644
> --- a/gcc/config/arm/arm-mve-builtins.def
> +++ b/gcc/config/arm/arm-mve-builtins.def
> @@ -35,6 +35,7 @@
> 
>  DEF_MVE_MODE (n, none, none, none)
>  DEF_MVE_MODE (offset, none, none, bytes)
> +DEF_MVE_MODE (r, none, none, none)
> 
>  #define REQUIRES_FLOAT false
>  DEF_MVE_TYPE (mve_pred16_t, boolean_type_node)
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape
  2023-05-05  8:39 ` [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape Christophe Lyon
@ 2023-05-05 10:56   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:56 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape
> 
> This patch adds the binary_lshift_r shape description.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_lshift_r): New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_lshift_r): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 41 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 42 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index e5093c3f29d..4ecb612ece5 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -576,6 +576,47 @@ struct binary_lshift_def : public overloaded_base<0>
>  };
>  SHAPE (binary_lshift)
> 
> +/* Used with the above form, but only for the MODE_r case which does
> +   not always support the same set of predicates as MODE_none and
> +   MODE_n.  For vqshlq they are the same, but for vshlq they are not.
> +
> +   <T0>_t vfoo_r[_t0](<T0>_t, int32_t)
> +
> +   i.e. the standard shape for shift operations that operate on
> +   vector types.
> +   Example: vshlq.
> +   int8x16_t [__arm_]vshlq_r[_s8](int8x16_t a, int32_t b)
> +   int8x16_t [__arm_]vshlq_m_r[_s8](int8x16_t a, int32_t b, mve_pred16_t p)
> */
> +struct binary_lshift_r_def : public overloaded_base<0>
> +{
> +  bool
> +  explicit_mode_suffix_p (enum predication_index, enum
> mode_suffix_index) const override
> +  {
> +    return true;
> +  }
> +
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_r,
> preserve_user_namespace);
> +    build_all (b, "v0,v0,ss32", group, MODE_r, preserve_user_namespace,
> false, preds_m_or_none);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (2, i, nargs)
> +	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
> +      return error_mark_node;
> +
> +    return r.finish_opt_n_resolution (i, 0, type, TYPE_signed);
> +  }
> +};
> +SHAPE (binary_lshift_r)
> +
>  /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
> 
>     where there are N arguments in total.
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index e472862ceef..25d9b60a670 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -36,6 +36,7 @@ namespace arm_mve
> 
>      extern const function_shape *const binary;
>      extern const function_shape *const binary_lshift;
> +    extern const function_shape *const binary_lshift_r;
>      extern const function_shape *const binary_opt_n;
>      extern const function_shape *const binary_orrq;
>      extern const function_shape *const binary_round_lshift;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl
  2023-05-05  8:39 ` [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl Christophe Lyon
@ 2023-05-05 10:56   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:56 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 11/23] arm: [MVE intrinsics] add
> unspec_mve_function_exact_insn_vshl
> 
> Introduce a function that will be used to build vshl intrinsics. They
> are special because they have to handle MODE_r.
> 

Ok.
Thanks,
Kyrill

> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-functions.h (class
> 	unspec_mve_function_exact_insn_vshl): New.
> ---
>  gcc/config/arm/arm-mve-builtins-functions.h | 150 ++++++++++++++++++++
>  1 file changed, 150 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-functions.h
> b/gcc/config/arm/arm-mve-builtins-functions.h
> index 5abf913d182..533fd1159c6 100644
> --- a/gcc/config/arm/arm-mve-builtins-functions.h
> +++ b/gcc/config/arm/arm-mve-builtins-functions.h
> @@ -376,6 +376,156 @@ public:
>    }
>  };
> 
> +/* Map the function directly to CODE (UNSPEC, M) for vshl-like
> +   builtins. The difference with unspec_mve_function_exact_insn is
> +   that this function handles MODE_r and the related unspecs..  */
> +class unspec_mve_function_exact_insn_vshl : public function_base
> +{
> +public:
> +  CONSTEXPR unspec_mve_function_exact_insn_vshl (int unspec_for_sint,
> +						 int unspec_for_uint,
> +						 int unspec_for_n_sint,
> +						 int unspec_for_n_uint,
> +						 int unspec_for_m_sint,
> +						 int unspec_for_m_uint,
> +						 int unspec_for_m_n_sint,
> +						 int unspec_for_m_n_uint,
> +						 int unspec_for_m_r_sint,
> +						 int unspec_for_m_r_uint,
> +						 int unspec_for_r_sint,
> +						 int unspec_for_r_uint)
> +    : m_unspec_for_sint (unspec_for_sint),
> +      m_unspec_for_uint (unspec_for_uint),
> +      m_unspec_for_n_sint (unspec_for_n_sint),
> +      m_unspec_for_n_uint (unspec_for_n_uint),
> +      m_unspec_for_m_sint (unspec_for_m_sint),
> +      m_unspec_for_m_uint (unspec_for_m_uint),
> +      m_unspec_for_m_n_sint (unspec_for_m_n_sint),
> +      m_unspec_for_m_n_uint (unspec_for_m_n_uint),
> +      m_unspec_for_m_r_sint (unspec_for_m_r_sint),
> +      m_unspec_for_m_r_uint (unspec_for_m_r_uint),
> +      m_unspec_for_r_sint (unspec_for_r_sint),
> +      m_unspec_for_r_uint (unspec_for_r_uint)
> +  {}
> +
> +  /* The unspec code associated with signed-integer, unsigned-integer
> +     and floating-point operations respectively.  It covers the cases
> +     with the _n suffix, and/or the _m predicate.  */
> +  int m_unspec_for_sint;
> +  int m_unspec_for_uint;
> +  int m_unspec_for_n_sint;
> +  int m_unspec_for_n_uint;
> +  int m_unspec_for_m_sint;
> +  int m_unspec_for_m_uint;
> +  int m_unspec_for_m_n_sint;
> +  int m_unspec_for_m_n_uint;
> +  int m_unspec_for_m_r_sint;
> +  int m_unspec_for_m_r_uint;
> +  int m_unspec_for_r_sint;
> +  int m_unspec_for_r_uint;
> +
> +  rtx
> +  expand (function_expander &e) const override
> +  {
> +    insn_code code;
> +    switch (e.pred)
> +      {
> +      case PRED_none:
> +	switch (e.mode_suffix_id)
> +	  {
> +	  case MODE_none:
> +	    /* No predicate, no suffix.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint,
> e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint,
> e.vector_mode (0));
> +	    break;
> +
> +	  case MODE_n:
> +	    /* No predicate, _n suffix.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_n (m_unspec_for_n_uint,
> m_unspec_for_n_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_n (m_unspec_for_n_sint,
> m_unspec_for_n_sint, e.vector_mode (0));
> +	    break;
> +
> +	  case MODE_r:
> +	    /* No predicate, _r suffix.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_r (m_unspec_for_r_uint,
> m_unspec_for_r_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_r (m_unspec_for_r_sint,
> m_unspec_for_r_sint, e.vector_mode (0));
> +	    break;
> +
> +	  default:
> +	    gcc_unreachable ();
> +	  }
> +	return e.use_exact_insn (code);
> +
> +      case PRED_m:
> +	switch (e.mode_suffix_id)
> +	  {
> +	  case MODE_none:
> +	    /* No suffix, "m" predicate.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_m (m_unspec_for_m_uint,
> m_unspec_for_m_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_m (m_unspec_for_m_sint,
> m_unspec_for_m_sint, e.vector_mode (0));
> +	    break;
> +
> +	  case MODE_n:
> +	    /* _n suffix, "m" predicate.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint,
> m_unspec_for_m_n_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint,
> m_unspec_for_m_n_sint, e.vector_mode (0));
> +	    break;
> +
> +	  case MODE_r:
> +	    /* _r suffix, "m" predicate.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_m_r (m_unspec_for_m_r_uint,
> m_unspec_for_m_r_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_m_r (m_unspec_for_m_r_sint,
> m_unspec_for_m_r_sint, e.vector_mode (0));
> +	    break;
> +
> +	  default:
> +	    gcc_unreachable ();
> +	  }
> +	return e.use_cond_insn (code, 0);
> +
> +      case PRED_x:
> +	switch (e.mode_suffix_id)
> +	  {
> +	  case MODE_none:
> +	    /* No suffix, "x" predicate.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_m (m_unspec_for_m_uint,
> m_unspec_for_m_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_m (m_unspec_for_m_sint,
> m_unspec_for_m_sint, e.vector_mode (0));
> +	    break;
> +
> +	  case MODE_n:
> +	    /* _n suffix, "x" predicate.  */
> +	    if (e.type_suffix (0).unsigned_p)
> +	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint,
> m_unspec_for_m_n_uint, e.vector_mode (0));
> +	    else
> +	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint,
> m_unspec_for_m_n_sint, e.vector_mode (0));
> +	    break;
> +
> +	  default:
> +	    gcc_unreachable ();
> +	  }
> +	return e.use_pred_x_insn (code);
> +
> +      default:
> +	gcc_unreachable ();
> +      }
> +
> +    gcc_unreachable ();
> +  }
> +};
> +
>  } /* end namespace arm_mve */
> 
>  /* Declare the global function base NAME, creating it from an instance
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq
  2023-05-05  8:39 ` [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq Christophe Lyon
@ 2023-05-05 10:58   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:58 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq
> 
> Implement vqshlq, vshlq using the new MVE builtins framework.
> 

Ok.
Thanks,
Kyrill

> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (FUNCTION_WITH_M_N_R):
> New.
> 	(vqshlq, vshlq): New.
> 	* config/arm/arm-mve-builtins-base.def (vqshlq, vshlq): New.
> 	* config/arm/arm-mve-builtins-base.h (vqshlq, vshlq): New.
> 	* config/arm/arm_mve.h (vshlq): Remove.
> 	(vshlq_r): Remove.
> 	(vshlq_n): Remove.
> 	(vshlq_m_r): Remove.
> 	(vshlq_m): Remove.
> 	(vshlq_m_n): Remove.
> 	(vshlq_x): Remove.
> 	(vshlq_x_n): Remove.
> 	(vshlq_s8): Remove.
> 	(vshlq_s16): Remove.
> 	(vshlq_s32): Remove.
> 	(vshlq_u8): Remove.
> 	(vshlq_u16): Remove.
> 	(vshlq_u32): Remove.
> 	(vshlq_r_u8): Remove.
> 	(vshlq_n_u8): Remove.
> 	(vshlq_r_s8): Remove.
> 	(vshlq_n_s8): Remove.
> 	(vshlq_r_u16): Remove.
> 	(vshlq_n_u16): Remove.
> 	(vshlq_r_s16): Remove.
> 	(vshlq_n_s16): Remove.
> 	(vshlq_r_u32): Remove.
> 	(vshlq_n_u32): Remove.
> 	(vshlq_r_s32): Remove.
> 	(vshlq_n_s32): Remove.
> 	(vshlq_m_r_u8): Remove.
> 	(vshlq_m_r_s8): Remove.
> 	(vshlq_m_r_u16): Remove.
> 	(vshlq_m_r_s16): Remove.
> 	(vshlq_m_r_u32): Remove.
> 	(vshlq_m_r_s32): Remove.
> 	(vshlq_m_u8): Remove.
> 	(vshlq_m_s8): Remove.
> 	(vshlq_m_u16): Remove.
> 	(vshlq_m_s16): Remove.
> 	(vshlq_m_u32): Remove.
> 	(vshlq_m_s32): Remove.
> 	(vshlq_m_n_s8): Remove.
> 	(vshlq_m_n_s32): Remove.
> 	(vshlq_m_n_s16): Remove.
> 	(vshlq_m_n_u8): Remove.
> 	(vshlq_m_n_u32): Remove.
> 	(vshlq_m_n_u16): Remove.
> 	(vshlq_x_s8): Remove.
> 	(vshlq_x_s16): Remove.
> 	(vshlq_x_s32): Remove.
> 	(vshlq_x_u8): Remove.
> 	(vshlq_x_u16): Remove.
> 	(vshlq_x_u32): Remove.
> 	(vshlq_x_n_s8): Remove.
> 	(vshlq_x_n_s16): Remove.
> 	(vshlq_x_n_s32): Remove.
> 	(vshlq_x_n_u8): Remove.
> 	(vshlq_x_n_u16): Remove.
> 	(vshlq_x_n_u32): Remove.
> 	(__arm_vshlq_s8): Remove.
> 	(__arm_vshlq_s16): Remove.
> 	(__arm_vshlq_s32): Remove.
> 	(__arm_vshlq_u8): Remove.
> 	(__arm_vshlq_u16): Remove.
> 	(__arm_vshlq_u32): Remove.
> 	(__arm_vshlq_r_u8): Remove.
> 	(__arm_vshlq_n_u8): Remove.
> 	(__arm_vshlq_r_s8): Remove.
> 	(__arm_vshlq_n_s8): Remove.
> 	(__arm_vshlq_r_u16): Remove.
> 	(__arm_vshlq_n_u16): Remove.
> 	(__arm_vshlq_r_s16): Remove.
> 	(__arm_vshlq_n_s16): Remove.
> 	(__arm_vshlq_r_u32): Remove.
> 	(__arm_vshlq_n_u32): Remove.
> 	(__arm_vshlq_r_s32): Remove.
> 	(__arm_vshlq_n_s32): Remove.
> 	(__arm_vshlq_m_r_u8): Remove.
> 	(__arm_vshlq_m_r_s8): Remove.
> 	(__arm_vshlq_m_r_u16): Remove.
> 	(__arm_vshlq_m_r_s16): Remove.
> 	(__arm_vshlq_m_r_u32): Remove.
> 	(__arm_vshlq_m_r_s32): Remove.
> 	(__arm_vshlq_m_u8): Remove.
> 	(__arm_vshlq_m_s8): Remove.
> 	(__arm_vshlq_m_u16): Remove.
> 	(__arm_vshlq_m_s16): Remove.
> 	(__arm_vshlq_m_u32): Remove.
> 	(__arm_vshlq_m_s32): Remove.
> 	(__arm_vshlq_m_n_s8): Remove.
> 	(__arm_vshlq_m_n_s32): Remove.
> 	(__arm_vshlq_m_n_s16): Remove.
> 	(__arm_vshlq_m_n_u8): Remove.
> 	(__arm_vshlq_m_n_u32): Remove.
> 	(__arm_vshlq_m_n_u16): Remove.
> 	(__arm_vshlq_x_s8): Remove.
> 	(__arm_vshlq_x_s16): Remove.
> 	(__arm_vshlq_x_s32): Remove.
> 	(__arm_vshlq_x_u8): Remove.
> 	(__arm_vshlq_x_u16): Remove.
> 	(__arm_vshlq_x_u32): Remove.
> 	(__arm_vshlq_x_n_s8): Remove.
> 	(__arm_vshlq_x_n_s16): Remove.
> 	(__arm_vshlq_x_n_s32): Remove.
> 	(__arm_vshlq_x_n_u8): Remove.
> 	(__arm_vshlq_x_n_u16): Remove.
> 	(__arm_vshlq_x_n_u32): Remove.
> 	(__arm_vshlq): Remove.
> 	(__arm_vshlq_r): Remove.
> 	(__arm_vshlq_n): Remove.
> 	(__arm_vshlq_m_r): Remove.
> 	(__arm_vshlq_m): Remove.
> 	(__arm_vshlq_m_n): Remove.
> 	(__arm_vshlq_x): Remove.
> 	(__arm_vshlq_x_n): Remove.
> 	(vqshlq): Remove.
> 	(vqshlq_r): Remove.
> 	(vqshlq_n): Remove.
> 	(vqshlq_m_r): Remove.
> 	(vqshlq_m_n): Remove.
> 	(vqshlq_m): Remove.
> 	(vqshlq_u8): Remove.
> 	(vqshlq_r_u8): Remove.
> 	(vqshlq_n_u8): Remove.
> 	(vqshlq_s8): Remove.
> 	(vqshlq_r_s8): Remove.
> 	(vqshlq_n_s8): Remove.
> 	(vqshlq_u16): Remove.
> 	(vqshlq_r_u16): Remove.
> 	(vqshlq_n_u16): Remove.
> 	(vqshlq_s16): Remove.
> 	(vqshlq_r_s16): Remove.
> 	(vqshlq_n_s16): Remove.
> 	(vqshlq_u32): Remove.
> 	(vqshlq_r_u32): Remove.
> 	(vqshlq_n_u32): Remove.
> 	(vqshlq_s32): Remove.
> 	(vqshlq_r_s32): Remove.
> 	(vqshlq_n_s32): Remove.
> 	(vqshlq_m_r_u8): Remove.
> 	(vqshlq_m_r_s8): Remove.
> 	(vqshlq_m_r_u16): Remove.
> 	(vqshlq_m_r_s16): Remove.
> 	(vqshlq_m_r_u32): Remove.
> 	(vqshlq_m_r_s32): Remove.
> 	(vqshlq_m_n_s8): Remove.
> 	(vqshlq_m_n_s32): Remove.
> 	(vqshlq_m_n_s16): Remove.
> 	(vqshlq_m_n_u8): Remove.
> 	(vqshlq_m_n_u32): Remove.
> 	(vqshlq_m_n_u16): Remove.
> 	(vqshlq_m_s8): Remove.
> 	(vqshlq_m_s32): Remove.
> 	(vqshlq_m_s16): Remove.
> 	(vqshlq_m_u8): Remove.
> 	(vqshlq_m_u32): Remove.
> 	(vqshlq_m_u16): Remove.
> 	(__arm_vqshlq_u8): Remove.
> 	(__arm_vqshlq_r_u8): Remove.
> 	(__arm_vqshlq_n_u8): Remove.
> 	(__arm_vqshlq_s8): Remove.
> 	(__arm_vqshlq_r_s8): Remove.
> 	(__arm_vqshlq_n_s8): Remove.
> 	(__arm_vqshlq_u16): Remove.
> 	(__arm_vqshlq_r_u16): Remove.
> 	(__arm_vqshlq_n_u16): Remove.
> 	(__arm_vqshlq_s16): Remove.
> 	(__arm_vqshlq_r_s16): Remove.
> 	(__arm_vqshlq_n_s16): Remove.
> 	(__arm_vqshlq_u32): Remove.
> 	(__arm_vqshlq_r_u32): Remove.
> 	(__arm_vqshlq_n_u32): Remove.
> 	(__arm_vqshlq_s32): Remove.
> 	(__arm_vqshlq_r_s32): Remove.
> 	(__arm_vqshlq_n_s32): Remove.
> 	(__arm_vqshlq_m_r_u8): Remove.
> 	(__arm_vqshlq_m_r_s8): Remove.
> 	(__arm_vqshlq_m_r_u16): Remove.
> 	(__arm_vqshlq_m_r_s16): Remove.
> 	(__arm_vqshlq_m_r_u32): Remove.
> 	(__arm_vqshlq_m_r_s32): Remove.
> 	(__arm_vqshlq_m_n_s8): Remove.
> 	(__arm_vqshlq_m_n_s32): Remove.
> 	(__arm_vqshlq_m_n_s16): Remove.
> 	(__arm_vqshlq_m_n_u8): Remove.
> 	(__arm_vqshlq_m_n_u32): Remove.
> 	(__arm_vqshlq_m_n_u16): Remove.
> 	(__arm_vqshlq_m_s8): Remove.
> 	(__arm_vqshlq_m_s32): Remove.
> 	(__arm_vqshlq_m_s16): Remove.
> 	(__arm_vqshlq_m_u8): Remove.
> 	(__arm_vqshlq_m_u32): Remove.
> 	(__arm_vqshlq_m_u16): Remove.
> 	(__arm_vqshlq): Remove.
> 	(__arm_vqshlq_r): Remove.
> 	(__arm_vqshlq_n): Remove.
> 	(__arm_vqshlq_m_r): Remove.
> 	(__arm_vqshlq_m_n): Remove.
> 	(__arm_vqshlq_m): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |   13 +
>  gcc/config/arm/arm-mve-builtins-base.def |    4 +
>  gcc/config/arm/arm-mve-builtins-base.h   |    2 +
>  gcc/config/arm/arm_mve.h                 | 1552 +---------------------
>  4 files changed, 49 insertions(+), 1522 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index a74119db917..4bebf86f784 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -128,6 +128,17 @@ namespace arm_mve {
>      UNSPEC##_M_S, UNSPEC##_M_U, -1,
> 	\
>      UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
> 
> +  /* Helper for vshl builtins with only unspec codes, _m predicated
> +     and _n and _r overrides.  */
> +#define FUNCTION_WITH_M_N_R(NAME, UNSPEC) FUNCTION
> 	\
> +  (NAME, unspec_mve_function_exact_insn_vshl,
> 	\
> +   (UNSPEC##_S, UNSPEC##_U,						\
> +    UNSPEC##_N_S, UNSPEC##_N_U,
> 	\
> +    UNSPEC##_M_S, UNSPEC##_M_U,
> 	\
> +    UNSPEC##_M_N_S, UNSPEC##_M_N_U,
> 	\
> +    UNSPEC##_M_R_S, UNSPEC##_M_R_U,
> 	\
> +    UNSPEC##_R_S, UNSPEC##_R_U))
> +
>    /* Helper for builtins with only unspec codes, _m predicated
>       overrides, no _n and no floating-point version.  */
>  #define FUNCTION_WITHOUT_N_NO_F(NAME, UNSPEC) FUNCTION
> 		\
> @@ -169,11 +180,13 @@ FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
>  FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
>  FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
> +FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
>  FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
>  FUNCTION (vreinterpretq, vreinterpretq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
>  FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
> +FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
>  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
>  FUNCTION (vuninitializedq, vuninitializedq_impl,)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 9230837fd43..f2e40cda2af 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -32,11 +32,15 @@ DEF_MVE_FUNCTION (vqaddq, binary_opt_n,
> all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer,
> none)
>  DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r"
> forms do not support the "x" predicate
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index d9d45d1925a..5b62de6a922 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -37,11 +37,13 @@ extern const function_base *const vqaddq;
>  extern const function_base *const vqdmulhq;
>  extern const function_base *const vqrdmulhq;
>  extern const function_base *const vqrshlq;
> +extern const function_base *const vqshlq;
>  extern const function_base *const vqsubq;
>  extern const function_base *const vreinterpretq;
>  extern const function_base *const vrhaddq;
>  extern const function_base *const vrmulhq;
>  extern const function_base *const vrshlq;
> +extern const function_base *const vshlq;
>  extern const function_base *const vsubq;
>  extern const function_base *const vuninitializedq;
> 
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 175d9955c33..ad67dcfd024 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -60,7 +60,6 @@
>  #define vshrq(__a, __imm) __arm_vshrq(__a, __imm)
>  #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
>  #define vcmpneq(__a, __b) __arm_vcmpneq(__a, __b)
> -#define vshlq(__a, __b) __arm_vshlq(__a, __b)
>  #define vornq(__a, __b) __arm_vornq(__a, __b)
>  #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
>  #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
> @@ -77,17 +76,12 @@
>  #define vbicq(__a, __b) __arm_vbicq(__a, __b)
>  #define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
>  #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
> -#define vshlq_r(__a, __b) __arm_vshlq_r(__a, __b)
> -#define vqshlq(__a, __b) __arm_vqshlq(__a, __b)
> -#define vqshlq_r(__a, __b) __arm_vqshlq_r(__a, __b)
>  #define vminavq(__a, __b) __arm_vminavq(__a, __b)
>  #define vminaq(__a, __b) __arm_vminaq(__a, __b)
>  #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
>  #define vmaxaq(__a, __b) __arm_vmaxaq(__a, __b)
>  #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
> -#define vshlq_n(__a, __imm) __arm_vshlq_n(__a, __imm)
>  #define vrshrq(__a, __imm) __arm_vrshrq(__a, __imm)
> -#define vqshlq_n(__a, __imm) __arm_vqshlq_n(__a, __imm)
>  #define vcmpltq(__a, __b) __arm_vcmpltq(__a, __b)
>  #define vcmpleq(__a, __b) __arm_vcmpleq(__a, __b)
>  #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
> @@ -148,8 +142,6 @@
>  #define vaddvaq_p(__a, __b, __p) __arm_vaddvaq_p(__a, __b, __p)
>  #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
>  #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
> -#define vshlq_m_r(__a, __b, __p) __arm_vshlq_m_r(__a, __b, __p)
> -#define vqshlq_m_r(__a, __b, __p) __arm_vqshlq_m_r(__a, __b, __p)
>  #define vminavq_p(__a, __b, __p) __arm_vminavq_p(__a, __b, __p)
>  #define vminaq_m(__a, __b, __p) __arm_vminaq_m(__a, __b, __p)
>  #define vmaxavq_p(__a, __b, __p) __arm_vmaxavq_p(__a, __b, __p)
> @@ -216,7 +208,6 @@
>  #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm,
> __p)
>  #define vqshluq_m(__inactive, __a, __imm, __p)
> __arm_vqshluq_m(__inactive, __a, __imm, __p)
>  #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
> -#define vshlq_m(__inactive, __a, __b, __p) __arm_vshlq_m(__inactive, __a,
> __b, __p)
>  #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a,
> __b, __p)
>  #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive,
> __a, __b, __p)
>  #define vcaddq_rot270_m(__inactive, __a, __b, __p)
> __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
> @@ -246,10 +237,7 @@
>  #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b,
> __c, __p)
>  #define vqrdmlsdhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> -#define vqshlq_m_n(__inactive, __a, __imm, __p)
> __arm_vqshlq_m_n(__inactive, __a, __imm, __p)
> -#define vqshlq_m(__inactive, __a, __b, __p) __arm_vqshlq_m(__inactive,
> __a, __b, __p)
>  #define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive,
> __a, __imm, __p)
> -#define vshlq_m_n(__inactive, __a, __imm, __p)
> __arm_vshlq_m_n(__inactive, __a, __imm, __p)
>  #define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive,
> __a, __imm, __p)
>  #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm,
> __p)
>  #define vmlaldavaq_p(__a, __b, __c, __p) __arm_vmlaldavaq_p(__a, __b,
> __c, __p)
> @@ -376,8 +364,6 @@
>  #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
>  #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
>  #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
> -#define vshlq_x(__a, __b, __p) __arm_vshlq_x(__a, __b, __p)
> -#define vshlq_x_n(__a, __imm, __p) __arm_vshlq_x_n(__a, __imm, __p)
>  #define vrshrq_x(__a, __imm, __p) __arm_vrshrq_x(__a, __imm, __p)
>  #define vshrq_x(__a, __imm, __p) __arm_vshrq_x(__a, __imm, __p)
>  #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
> @@ -623,12 +609,6 @@
>  #define vcmpneq_u8(__a, __b) __arm_vcmpneq_u8(__a, __b)
>  #define vcmpneq_u16(__a, __b) __arm_vcmpneq_u16(__a, __b)
>  #define vcmpneq_u32(__a, __b) __arm_vcmpneq_u32(__a, __b)
> -#define vshlq_s8(__a, __b) __arm_vshlq_s8(__a, __b)
> -#define vshlq_s16(__a, __b) __arm_vshlq_s16(__a, __b)
> -#define vshlq_s32(__a, __b) __arm_vshlq_s32(__a, __b)
> -#define vshlq_u8(__a, __b) __arm_vshlq_u8(__a, __b)
> -#define vshlq_u16(__a, __b) __arm_vshlq_u16(__a, __b)
> -#define vshlq_u32(__a, __b) __arm_vshlq_u32(__a, __b)
>  #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
>  #define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
>  #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
> @@ -649,17 +629,12 @@
>  #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
>  #define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
>  #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
> -#define vshlq_r_u8(__a, __b) __arm_vshlq_r_u8(__a, __b)
> -#define vqshlq_u8(__a, __b) __arm_vqshlq_u8(__a, __b)
> -#define vqshlq_r_u8(__a, __b) __arm_vqshlq_r_u8(__a, __b)
>  #define vminavq_s8(__a, __b) __arm_vminavq_s8(__a, __b)
>  #define vminaq_s8(__a, __b) __arm_vminaq_s8(__a, __b)
>  #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
>  #define vmaxaq_s8(__a, __b) __arm_vmaxaq_s8(__a, __b)
>  #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
> -#define vshlq_n_u8(__a,  __imm) __arm_vshlq_n_u8(__a,  __imm)
>  #define vrshrq_n_u8(__a,  __imm) __arm_vrshrq_n_u8(__a,  __imm)
> -#define vqshlq_n_u8(__a,  __imm) __arm_vqshlq_n_u8(__a,  __imm)
>  #define vcmpneq_n_s8(__a, __b) __arm_vcmpneq_n_s8(__a, __b)
>  #define vcmpltq_s8(__a, __b) __arm_vcmpltq_s8(__a, __b)
>  #define vcmpltq_n_s8(__a, __b) __arm_vcmpltq_n_s8(__a, __b)
> @@ -673,9 +648,6 @@
>  #define vcmpeqq_n_s8(__a, __b) __arm_vcmpeqq_n_s8(__a, __b)
>  #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
>  #define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
> -#define vshlq_r_s8(__a, __b) __arm_vshlq_r_s8(__a, __b)
> -#define vqshlq_s8(__a, __b) __arm_vqshlq_s8(__a, __b)
> -#define vqshlq_r_s8(__a, __b) __arm_vqshlq_r_s8(__a, __b)
>  #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
>  #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
>  #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
> @@ -694,9 +666,7 @@
>  #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
>  #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
>  #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
> -#define vshlq_n_s8(__a,  __imm) __arm_vshlq_n_s8(__a,  __imm)
>  #define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
> -#define vqshlq_n_s8(__a,  __imm) __arm_vqshlq_n_s8(__a,  __imm)
>  #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
>  #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
>  #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
> @@ -717,17 +687,12 @@
>  #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
>  #define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
>  #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
> -#define vshlq_r_u16(__a, __b) __arm_vshlq_r_u16(__a, __b)
> -#define vqshlq_u16(__a, __b) __arm_vqshlq_u16(__a, __b)
> -#define vqshlq_r_u16(__a, __b) __arm_vqshlq_r_u16(__a, __b)
>  #define vminavq_s16(__a, __b) __arm_vminavq_s16(__a, __b)
>  #define vminaq_s16(__a, __b) __arm_vminaq_s16(__a, __b)
>  #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
>  #define vmaxaq_s16(__a, __b) __arm_vmaxaq_s16(__a, __b)
>  #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
> -#define vshlq_n_u16(__a,  __imm) __arm_vshlq_n_u16(__a,  __imm)
>  #define vrshrq_n_u16(__a,  __imm) __arm_vrshrq_n_u16(__a,  __imm)
> -#define vqshlq_n_u16(__a,  __imm) __arm_vqshlq_n_u16(__a,  __imm)
>  #define vcmpneq_n_s16(__a, __b) __arm_vcmpneq_n_s16(__a, __b)
>  #define vcmpltq_s16(__a, __b) __arm_vcmpltq_s16(__a, __b)
>  #define vcmpltq_n_s16(__a, __b) __arm_vcmpltq_n_s16(__a, __b)
> @@ -741,9 +706,6 @@
>  #define vcmpeqq_n_s16(__a, __b) __arm_vcmpeqq_n_s16(__a, __b)
>  #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
>  #define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
> -#define vshlq_r_s16(__a, __b) __arm_vshlq_r_s16(__a, __b)
> -#define vqshlq_s16(__a, __b) __arm_vqshlq_s16(__a, __b)
> -#define vqshlq_r_s16(__a, __b) __arm_vqshlq_r_s16(__a, __b)
>  #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
>  #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
>  #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
> @@ -762,9 +724,7 @@
>  #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
>  #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
>  #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
> -#define vshlq_n_s16(__a,  __imm) __arm_vshlq_n_s16(__a,  __imm)
>  #define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
> -#define vqshlq_n_s16(__a,  __imm) __arm_vqshlq_n_s16(__a,  __imm)
>  #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
>  #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
>  #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
> @@ -785,17 +745,12 @@
>  #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
>  #define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
>  #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
> -#define vshlq_r_u32(__a, __b) __arm_vshlq_r_u32(__a, __b)
> -#define vqshlq_u32(__a, __b) __arm_vqshlq_u32(__a, __b)
> -#define vqshlq_r_u32(__a, __b) __arm_vqshlq_r_u32(__a, __b)
>  #define vminavq_s32(__a, __b) __arm_vminavq_s32(__a, __b)
>  #define vminaq_s32(__a, __b) __arm_vminaq_s32(__a, __b)
>  #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
>  #define vmaxaq_s32(__a, __b) __arm_vmaxaq_s32(__a, __b)
>  #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
> -#define vshlq_n_u32(__a,  __imm) __arm_vshlq_n_u32(__a,  __imm)
>  #define vrshrq_n_u32(__a,  __imm) __arm_vrshrq_n_u32(__a,  __imm)
> -#define vqshlq_n_u32(__a,  __imm) __arm_vqshlq_n_u32(__a,  __imm)
>  #define vcmpneq_n_s32(__a, __b) __arm_vcmpneq_n_s32(__a, __b)
>  #define vcmpltq_s32(__a, __b) __arm_vcmpltq_s32(__a, __b)
>  #define vcmpltq_n_s32(__a, __b) __arm_vcmpltq_n_s32(__a, __b)
> @@ -809,9 +764,6 @@
>  #define vcmpeqq_n_s32(__a, __b) __arm_vcmpeqq_n_s32(__a, __b)
>  #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
>  #define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
> -#define vshlq_r_s32(__a, __b) __arm_vshlq_r_s32(__a, __b)
> -#define vqshlq_s32(__a, __b) __arm_vqshlq_s32(__a, __b)
> -#define vqshlq_r_s32(__a, __b) __arm_vqshlq_r_s32(__a, __b)
>  #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
>  #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
>  #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
> @@ -830,9 +782,7 @@
>  #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
>  #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
>  #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
> -#define vshlq_n_s32(__a,  __imm) __arm_vshlq_n_s32(__a,  __imm)
>  #define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
> -#define vqshlq_n_s32(__a,  __imm) __arm_vqshlq_n_s32(__a,  __imm)
>  #define vqmovntq_u16(__a, __b) __arm_vqmovntq_u16(__a, __b)
>  #define vqmovnbq_u16(__a, __b) __arm_vqmovnbq_u16(__a, __b)
>  #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
> @@ -1013,8 +963,6 @@
>  #define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
>  #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
>  #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
> -#define vshlq_m_r_u8(__a, __b, __p) __arm_vshlq_m_r_u8(__a, __b, __p)
> -#define vqshlq_m_r_u8(__a, __b, __p) __arm_vqshlq_m_r_u8(__a, __b,
> __p)
>  #define vminavq_p_s8(__a, __b, __p) __arm_vminavq_p_s8(__a, __b, __p)
>  #define vminaq_m_s8(__a, __b, __p) __arm_vminaq_m_s8(__a, __b, __p)
>  #define vmaxavq_p_s8(__a, __b, __p) __arm_vmaxavq_p_s8(__a, __b, __p)
> @@ -1031,9 +979,7 @@
>  #define vcmpgeq_m_n_s8(__a, __b, __p) __arm_vcmpgeq_m_n_s8(__a,
> __b, __p)
>  #define vcmpeqq_m_s8(__a, __b, __p) __arm_vcmpeqq_m_s8(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s8(__a, __b, __p) __arm_vcmpeqq_m_n_s8(__a,
> __b, __p)
> -#define vshlq_m_r_s8(__a, __b, __p) __arm_vshlq_m_r_s8(__a, __b, __p)
>  #define vrev64q_m_s8(__inactive, __a, __p)
> __arm_vrev64q_m_s8(__inactive, __a, __p)
> -#define vqshlq_m_r_s8(__a, __b, __p) __arm_vqshlq_m_r_s8(__a, __b, __p)
>  #define vqnegq_m_s8(__inactive, __a, __p) __arm_vqnegq_m_s8(__inactive,
> __a, __p)
>  #define vqabsq_m_s8(__inactive, __a, __p) __arm_vqabsq_m_s8(__inactive,
> __a, __p)
>  #define vnegq_m_s8(__inactive, __a, __p) __arm_vnegq_m_s8(__inactive,
> __a, __p)
> @@ -1092,8 +1038,6 @@
>  #define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b,
> __p)
>  #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,
> __imm)
>  #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,
> __imm)
> -#define vshlq_m_r_u16(__a, __b, __p) __arm_vshlq_m_r_u16(__a, __b,
> __p)
> -#define vqshlq_m_r_u16(__a, __b, __p) __arm_vqshlq_m_r_u16(__a, __b,
> __p)
>  #define vminavq_p_s16(__a, __b, __p) __arm_vminavq_p_s16(__a, __b,
> __p)
>  #define vminaq_m_s16(__a, __b, __p) __arm_vminaq_m_s16(__a, __b, __p)
>  #define vmaxavq_p_s16(__a, __b, __p) __arm_vmaxavq_p_s16(__a, __b,
> __p)
> @@ -1110,9 +1054,7 @@
>  #define vcmpgeq_m_n_s16(__a, __b, __p) __arm_vcmpgeq_m_n_s16(__a,
> __b, __p)
>  #define vcmpeqq_m_s16(__a, __b, __p) __arm_vcmpeqq_m_s16(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s16(__a, __b, __p) __arm_vcmpeqq_m_n_s16(__a,
> __b, __p)
> -#define vshlq_m_r_s16(__a, __b, __p) __arm_vshlq_m_r_s16(__a, __b, __p)
>  #define vrev64q_m_s16(__inactive, __a, __p)
> __arm_vrev64q_m_s16(__inactive, __a, __p)
> -#define vqshlq_m_r_s16(__a, __b, __p) __arm_vqshlq_m_r_s16(__a, __b,
> __p)
>  #define vqnegq_m_s16(__inactive, __a, __p)
> __arm_vqnegq_m_s16(__inactive, __a, __p)
>  #define vqabsq_m_s16(__inactive, __a, __p)
> __arm_vqabsq_m_s16(__inactive, __a, __p)
>  #define vnegq_m_s16(__inactive, __a, __p) __arm_vnegq_m_s16(__inactive,
> __a, __p)
> @@ -1171,8 +1113,6 @@
>  #define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b,
> __p)
>  #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,
> __imm)
>  #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,
> __imm)
> -#define vshlq_m_r_u32(__a, __b, __p) __arm_vshlq_m_r_u32(__a, __b,
> __p)
> -#define vqshlq_m_r_u32(__a, __b, __p) __arm_vqshlq_m_r_u32(__a, __b,
> __p)
>  #define vminavq_p_s32(__a, __b, __p) __arm_vminavq_p_s32(__a, __b,
> __p)
>  #define vminaq_m_s32(__a, __b, __p) __arm_vminaq_m_s32(__a, __b, __p)
>  #define vmaxavq_p_s32(__a, __b, __p) __arm_vmaxavq_p_s32(__a, __b,
> __p)
> @@ -1189,9 +1129,7 @@
>  #define vcmpgeq_m_n_s32(__a, __b, __p) __arm_vcmpgeq_m_n_s32(__a,
> __b, __p)
>  #define vcmpeqq_m_s32(__a, __b, __p) __arm_vcmpeqq_m_s32(__a, __b,
> __p)
>  #define vcmpeqq_m_n_s32(__a, __b, __p) __arm_vcmpeqq_m_n_s32(__a,
> __b, __p)
> -#define vshlq_m_r_s32(__a, __b, __p) __arm_vshlq_m_r_s32(__a, __b, __p)
>  #define vrev64q_m_s32(__inactive, __a, __p)
> __arm_vrev64q_m_s32(__inactive, __a, __p)
> -#define vqshlq_m_r_s32(__a, __b, __p) __arm_vqshlq_m_r_s32(__a, __b,
> __p)
>  #define vqnegq_m_s32(__inactive, __a, __p)
> __arm_vqnegq_m_s32(__inactive, __a, __p)
>  #define vqabsq_m_s32(__inactive, __a, __p)
> __arm_vqabsq_m_s32(__inactive, __a, __p)
>  #define vnegq_m_s32(__inactive, __a, __p) __arm_vnegq_m_s32(__inactive,
> __a, __p)
> @@ -1429,26 +1367,20 @@
>  #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vabavq_p_s8(__a, __b, __c, __p) __arm_vabavq_p_s8(__a, __b, __c,
> __p)
>  #define vsriq_m_n_u8(__a, __b,  __imm, __p) __arm_vsriq_m_n_u8(__a,
> __b,  __imm, __p)
> -#define vshlq_m_u8(__inactive, __a, __b, __p)
> __arm_vshlq_m_u8(__inactive, __a, __b, __p)
>  #define vabavq_p_u8(__a, __b, __c, __p) __arm_vabavq_p_u8(__a, __b, __c,
> __p)
> -#define vshlq_m_s8(__inactive, __a, __b, __p)
> __arm_vshlq_m_s8(__inactive, __a, __b, __p)
>  #define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
> __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
>  #define vsriq_m_n_s16(__a, __b,  __imm, __p) __arm_vsriq_m_n_s16(__a,
> __b,  __imm, __p)
>  #define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
> __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
>  #define vqshluq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vqshluq_m_n_s16(__inactive, __a,  __imm, __p)
>  #define vabavq_p_s16(__a, __b, __c, __p) __arm_vabavq_p_s16(__a, __b,
> __c, __p)
>  #define vsriq_m_n_u16(__a, __b,  __imm, __p) __arm_vsriq_m_n_u16(__a,
> __b,  __imm, __p)
> -#define vshlq_m_u16(__inactive, __a, __b, __p)
> __arm_vshlq_m_u16(__inactive, __a, __b, __p)
>  #define vabavq_p_u16(__a, __b, __c, __p) __arm_vabavq_p_u16(__a, __b,
> __c, __p)
> -#define vshlq_m_s16(__inactive, __a, __b, __p)
> __arm_vshlq_m_s16(__inactive, __a, __b, __p)
>  #define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
> __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
>  #define vsriq_m_n_s32(__a, __b,  __imm, __p) __arm_vsriq_m_n_s32(__a,
> __b,  __imm, __p)
>  #define vqshluq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vqshluq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vabavq_p_s32(__a, __b, __c, __p) __arm_vabavq_p_s32(__a, __b,
> __c, __p)
>  #define vsriq_m_n_u32(__a, __b,  __imm, __p) __arm_vsriq_m_n_u32(__a,
> __b,  __imm, __p)
> -#define vshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vshlq_m_u32(__inactive, __a, __b, __p)
>  #define vabavq_p_u32(__a, __b, __c, __p) __arm_vabavq_p_u32(__a, __b,
> __c, __p)
> -#define vshlq_m_s32(__inactive, __a, __b, __p)
> __arm_vshlq_m_s32(__inactive, __a, __b, __p)
>  #define vbicq_m_s8(__inactive, __a, __b, __p)
> __arm_vbicq_m_s8(__inactive, __a, __b, __p)
>  #define vbicq_m_s32(__inactive, __a, __b, __p)
> __arm_vbicq_m_s32(__inactive, __a, __b, __p)
>  #define vbicq_m_s16(__inactive, __a, __b, __p)
> __arm_vbicq_m_s16(__inactive, __a, __b, __p)
> @@ -1572,30 +1504,12 @@
>  #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> -#define vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s8(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s32(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_s16(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_u8(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_n_u32(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_u32(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vqshlq_m_n_u16(__inactive, __a,  __imm, __p)
> -#define vqshlq_m_s8(__inactive, __a, __b, __p)
> __arm_vqshlq_m_s8(__inactive, __a, __b, __p)
> -#define vqshlq_m_s32(__inactive, __a, __b, __p)
> __arm_vqshlq_m_s32(__inactive, __a, __b, __p)
> -#define vqshlq_m_s16(__inactive, __a, __b, __p)
> __arm_vqshlq_m_s16(__inactive, __a, __b, __p)
> -#define vqshlq_m_u8(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u8(__inactive, __a, __b, __p)
> -#define vqshlq_m_u32(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u32(__inactive, __a, __b, __p)
> -#define vqshlq_m_u16(__inactive, __a, __b, __p)
> __arm_vqshlq_m_u16(__inactive, __a, __b, __p)
>  #define vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
>  #define vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_s8(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_s32(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_s16(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_u8(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_u32(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_u32(__inactive, __a,  __imm, __p)
> -#define vshlq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vshlq_m_n_u16(__inactive, __a,  __imm, __p)
>  #define vshrq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vshrq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s32(__inactive, __a,  __imm, __p)
>  #define vshrq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s16(__inactive, __a,  __imm, __p)
> @@ -2146,18 +2060,6 @@
>  #define vshlltq_x_n_s16(__a,  __imm, __p) __arm_vshlltq_x_n_s16(__a,
> __imm, __p)
>  #define vshlltq_x_n_u8(__a,  __imm, __p) __arm_vshlltq_x_n_u8(__a,
> __imm, __p)
>  #define vshlltq_x_n_u16(__a,  __imm, __p) __arm_vshlltq_x_n_u16(__a,
> __imm, __p)
> -#define vshlq_x_s8(__a, __b, __p) __arm_vshlq_x_s8(__a, __b, __p)
> -#define vshlq_x_s16(__a, __b, __p) __arm_vshlq_x_s16(__a, __b, __p)
> -#define vshlq_x_s32(__a, __b, __p) __arm_vshlq_x_s32(__a, __b, __p)
> -#define vshlq_x_u8(__a, __b, __p) __arm_vshlq_x_u8(__a, __b, __p)
> -#define vshlq_x_u16(__a, __b, __p) __arm_vshlq_x_u16(__a, __b, __p)
> -#define vshlq_x_u32(__a, __b, __p) __arm_vshlq_x_u32(__a, __b, __p)
> -#define vshlq_x_n_s8(__a,  __imm, __p) __arm_vshlq_x_n_s8(__a,  __imm,
> __p)
> -#define vshlq_x_n_s16(__a,  __imm, __p) __arm_vshlq_x_n_s16(__a,
> __imm, __p)
> -#define vshlq_x_n_s32(__a,  __imm, __p) __arm_vshlq_x_n_s32(__a,
> __imm, __p)
> -#define vshlq_x_n_u8(__a,  __imm, __p) __arm_vshlq_x_n_u8(__a,  __imm,
> __p)
> -#define vshlq_x_n_u16(__a,  __imm, __p) __arm_vshlq_x_n_u16(__a,
> __imm, __p)
> -#define vshlq_x_n_u32(__a,  __imm, __p) __arm_vshlq_x_n_u32(__a,
> __imm, __p)
>  #define vrshrq_x_n_s8(__a,  __imm, __p) __arm_vrshrq_x_n_s8(__a,
> __imm, __p)
>  #define vrshrq_x_n_s16(__a,  __imm, __p) __arm_vrshrq_x_n_s16(__a,
> __imm, __p)
>  #define vrshrq_x_n_s32(__a,  __imm, __p) __arm_vrshrq_x_n_s32(__a,
> __imm, __p)
> @@ -3000,48 +2902,6 @@ __arm_vcmpneq_u32 (uint32x4_t __a, uint32x4_t
> __b)
>    return __builtin_mve_vcmpneq_v4si ((int32x4_t)__a, (int32x4_t)__b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vshlq_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vshlq_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vshlq_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_u8 (uint8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vshlq_uv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_u16 (uint16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vshlq_uv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_u32 (uint32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vshlq_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
> @@ -3184,27 +3044,6 @@ __arm_vaddvaq_u8 (uint32_t __a, uint8x16_t
> __b)
>    return __builtin_mve_vaddvaq_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_u8 (uint8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_uv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vqshlq_uv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_u8 (uint8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s8 (uint8_t __a, int8x16_t __b)
> @@ -3240,13 +3079,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
>    return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_u8 (uint8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_uv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_u8 (uint8x16_t __a, const int __imm)
> @@ -3254,13 +3086,6 @@ __arm_vrshrq_n_u8 (uint8x16_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_uv16qi (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_u8 (uint8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_uv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s8 (int8x16_t __a, int8_t __b)
> @@ -3352,27 +3177,6 @@ __arm_vaddvq_p_s8 (int8x16_t __a,
> mve_pred16_t __p)
>    return __builtin_mve_vaddvq_p_sv16qi (__a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_s8 (int8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vqshlq_sv16qi (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_s8 (int8x16_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
> @@ -3499,13 +3303,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
>    return __builtin_mve_vaddvaq_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_s8 (int8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_sv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_s8 (int8x16_t __a, const int __imm)
> @@ -3513,13 +3310,6 @@ __arm_vrshrq_n_s8 (int8x16_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_sv16qi (__a, __imm);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_s8 (int8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_sv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
> @@ -3662,27 +3452,6 @@ __arm_vaddvaq_u16 (uint32_t __a, uint16x8_t
> __b)
>    return __builtin_mve_vaddvaq_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_u16 (uint16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_uv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vqshlq_uv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_u16 (uint16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s16 (uint16_t __a, int16x8_t __b)
> @@ -3718,13 +3487,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t
> __b)
>    return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_u16 (uint16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_uv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_u16 (uint16x8_t __a, const int __imm)
> @@ -3732,13 +3494,6 @@ __arm_vrshrq_n_u16 (uint16x8_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_uv8hi (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_u16 (uint16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_uv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s16 (int16x8_t __a, int16_t __b)
> @@ -3830,27 +3585,6 @@ __arm_vaddvq_p_s16 (int16x8_t __a,
> mve_pred16_t __p)
>    return __builtin_mve_vaddvq_p_sv8hi (__a, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_s16 (int16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vqshlq_sv8hi (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_s16 (int16x8_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
> @@ -3977,13 +3711,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
>    return __builtin_mve_vaddvaq_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_s16 (int16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_sv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_s16 (int16x8_t __a, const int __imm)
> @@ -3991,13 +3718,6 @@ __arm_vrshrq_n_s16 (int16x8_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_sv8hi (__a, __imm);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_s16 (int16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_sv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
> @@ -4140,27 +3860,6 @@ __arm_vaddvaq_u32 (uint32_t __a, uint32x4_t
> __b)
>    return __builtin_mve_vaddvaq_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_u32 (uint32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_uv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vqshlq_uv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_u32 (uint32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_s32 (uint32_t __a, int32x4_t __b)
> @@ -4196,13 +3895,6 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t
> __b)
>    return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_u32 (uint32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_uv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_u32 (uint32x4_t __a, const int __imm)
> @@ -4210,13 +3902,6 @@ __arm_vrshrq_n_u32 (uint32x4_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_uv4si (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_u32 (uint32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_uv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s32 (int32x4_t __a, int32_t __b)
> @@ -4308,27 +3993,6 @@ __arm_vaddvq_p_s32 (int32x4_t __a,
> mve_pred16_t __p)
>    return __builtin_mve_vaddvq_p_sv4si (__a, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r_s32 (int32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vshlq_r_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vqshlq_sv4si (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r_s32 (int32x4_t __a, int32_t __b)
> -{
> -  return __builtin_mve_vqshlq_r_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
> @@ -4455,13 +4119,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
>    return __builtin_mve_vaddvaq_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n_s32 (int32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshlq_n_sv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_n_s32 (int32x4_t __a, const int __imm)
> @@ -4469,13 +4126,6 @@ __arm_vrshrq_n_s32 (int32x4_t __a, const int
> __imm)
>    return __builtin_mve_vrshrq_n_sv4si (__a, __imm);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n_s32 (int32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vqshlq_n_sv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovntq_u16 (uint8x16_t __a, uint16x8_t __b)
> @@ -5272,20 +4922,6 @@ __arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t
> __b, const int __imm)
>    return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_uv16qi (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_uv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s8 (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
> @@ -5398,13 +5034,6 @@ __arm_vcmpeqq_m_n_s8 (int8x16_t __a, int8_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vcmpeqq_m_n_sv16qi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_sv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t
> __p)
> @@ -5412,13 +5041,6 @@ __arm_vrev64q_m_s8 (int8x16_t __inactive,
> int8x16_t __a, mve_pred16_t __p)
>    return __builtin_mve_vrev64q_m_sv16qi (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_sv16qi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t
> __p)
> @@ -5826,20 +5448,6 @@ __arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t
> __b, const int __imm)
>    return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_uv8hi (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_uv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s16 (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
> @@ -5952,13 +5560,6 @@ __arm_vcmpeqq_m_n_s16 (int16x8_t __a, int16_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vcmpeqq_m_n_sv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_sv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t
> __p)
> @@ -5966,13 +5567,6 @@ __arm_vrev64q_m_s16 (int16x8_t __inactive,
> int16x8_t __a, mve_pred16_t __p)
>    return __builtin_mve_vrev64q_m_sv8hi (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_sv8hi (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t
> __p)
> @@ -6379,20 +5973,6 @@ __arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t
> __b, const int __imm)
>    return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_uv4si (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_uv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p_s32 (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
> @@ -6505,13 +6085,6 @@ __arm_vcmpeqq_m_n_s32 (int32x4_t __a, int32_t
> __b, mve_pred16_t __p)
>    return __builtin_mve_vcmpeqq_m_n_sv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_r_sv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t
> __p)
> @@ -6519,13 +6092,6 @@ __arm_vrev64q_m_s32 (int32x4_t __inactive,
> int32x4_t __a, mve_pred16_t __p)
>    return __builtin_mve_vrev64q_m_sv4si (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_r_sv4si (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t
> __p)
> @@ -7527,13 +7093,6 @@ __arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t
> __b, const int __imm, mve_pred16_
>    return __builtin_mve_vsriq_m_n_uv16qi (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c,
> mve_pred16_t __p)
> @@ -7541,13 +7100,6 @@ __arm_vabavq_p_u8 (uint32_t __a, uint8x16_t
> __b, uint8x16_t __c, mve_pred16_t __
>    return __builtin_mve_vabavq_p_uv16qi (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsriq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -7576,13 +7128,6 @@ __arm_vsriq_m_n_u16 (uint16x8_t __a,
> uint16x8_t __b, const int __imm, mve_pred16
>    return __builtin_mve_vsriq_m_n_uv8hi (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c,
> mve_pred16_t __p)
> @@ -7590,13 +7135,6 @@ __arm_vabavq_p_u16 (uint32_t __a, uint16x8_t
> __b, uint16x8_t __c, mve_pred16_t _
>    return __builtin_mve_vabavq_p_uv8hi (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsriq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -7625,13 +7163,6 @@ __arm_vsriq_m_n_u32 (uint32x4_t __a,
> uint32x4_t __b, const int __imm, mve_pred16
>    return __builtin_mve_vsriq_m_n_uv4si (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c,
> mve_pred16_t __p)
> @@ -7639,13 +7170,6 @@ __arm_vabavq_p_u32 (uint32_t __a, uint32x4_t
> __b, uint32x4_t __c, mve_pred16_t _
>    return __builtin_mve_vabavq_p_uv4si (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> @@ -8507,90 +8031,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve
>    return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_sv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_sv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_sv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_uv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_uv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_n_uv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshlq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -8633,48 +8073,6 @@ __arm_vrshrq_m_n_u16 (uint16x8_t __inactive,
> uint16x8_t __a, const int __imm, mv
>    return __builtin_mve_vrshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_sv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_sv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_sv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_uv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_uv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_uv8hi (__inactive, __a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -11981,163 +11379,79 @@ __arm_vrev64q_x_s32 (int32x4_t __a,
> mve_pred16_t __p)
> 
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrev64q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrev64q_m_uv16qi (__arm_vuninitializedq_u8 (),
> __a, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrev64q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrev64q_m_uv8hi (__arm_vuninitializedq_u16 (),
> __a, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshllbq_m_n_sv16qi (__arm_vuninitializedq_s16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshllbq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshllbq_m_n_sv8hi (__arm_vuninitializedq_s32 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshllbq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t
> __p)
> -{
> -  return __builtin_mve_vshllbq_m_n_uv16qi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshllbq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t
> __p)
> -{
> -  return __builtin_mve_vshllbq_m_n_uv8hi (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlltq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlltq_m_n_sv16qi (__arm_vuninitializedq_s16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlltq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlltq_m_n_sv8hi (__arm_vuninitializedq_s32 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlltq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlltq_m_n_uv16qi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlltq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t
> __p)
> -{
> -  return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> +__arm_vrev64q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __b, __p);
> +  return __builtin_mve_vrev64q_m_uv16qi (__arm_vuninitializedq_u8 (),
> __a, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> +__extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> +__arm_vrev64q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __b, __p);
> +  return __builtin_mve_vrev64q_m_uv8hi (__arm_vuninitializedq_u16 (),
> __a, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> +__extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> +__arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_sv4si (__arm_vuninitializedq_s32 (), __a,
> __b, __p);
> +  return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (),
> __a, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> +__extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_u8 (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> +__arm_vshllbq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_uv16qi (__arm_vuninitializedq_u8 (), __a,
> __b, __p);
> +  return __builtin_mve_vshllbq_m_n_sv16qi (__arm_vuninitializedq_s16 (),
> __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> +__extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_u16 (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> +__arm_vshllbq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t
> __p)
>  {
> -  return __builtin_mve_vshlq_m_uv8hi (__arm_vuninitializedq_u16 (), __a,
> __b, __p);
> +  return __builtin_mve_vshllbq_m_n_sv8hi (__arm_vuninitializedq_s32 (),
> __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> +__extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_u32 (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> +__arm_vshllbq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t
> __p)
>  {
> -  return __builtin_mve_vshlq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
> +  return __builtin_mve_vshllbq_m_n_uv16qi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> +__extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> +__arm_vshllbq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t
> __p)
>  {
> -  return __builtin_mve_vshlq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __imm, __p);
> +  return __builtin_mve_vshllbq_m_n_uv8hi (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
>  }
> 
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
> +__arm_vshlltq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __imm, __p);
> +  return __builtin_mve_vshlltq_m_n_sv16qi (__arm_vuninitializedq_s16 (),
> __a, __imm, __p);
>  }
> 
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshlq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a,
> __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> +__arm_vshlltq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
>  {
> -  return __builtin_mve_vshlq_m_n_uv16qi (__arm_vuninitializedq_u8 (),
> __a, __imm, __p);
> +  return __builtin_mve_vshlltq_m_n_sv8hi (__arm_vuninitializedq_s32 (),
> __a, __imm, __p);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
> +__arm_vshlltq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t
> __p)
>  {
> -  return __builtin_mve_vshlq_m_n_uv8hi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
> +  return __builtin_mve_vshlltq_m_n_uv16qi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
>  }
> 
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
> +__arm_vshlltq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t
> __p)
>  {
> -  return __builtin_mve_vshlq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a,
> __imm, __p);
> +  return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
>  }
> 
>  __extension__ extern __inline int8x16_t
> @@ -16275,48 +15589,6 @@ __arm_vcmpneq (uint32x4_t __a, uint32x4_t
> __b)
>   return __arm_vcmpneq_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vshlq_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vshlq_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vshlq_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (uint8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vshlq_u8 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (uint16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vshlq_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq (uint32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vshlq_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (uint8x16_t __a, uint8x16_t __b)
> @@ -16457,27 +15729,6 @@ __arm_vaddvaq (uint32_t __a, uint8x16_t __b)
>   return __arm_vaddvaq_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (uint8x16_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_u8 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (uint8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vqshlq_u8 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (uint8x16_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq (uint8_t __a, int8x16_t __b)
> @@ -16513,13 +15764,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (uint8x16_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_u8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (uint8x16_t __a, const int __imm)
> @@ -16527,13 +15771,6 @@ __arm_vrshrq (uint8x16_t __a, const int __imm)
>   return __arm_vrshrq_n_u8 (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (uint8x16_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_u8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int8x16_t __a, int8_t __b)
> @@ -16625,27 +15862,6 @@ __arm_vaddvq_p (int8x16_t __a,
> mve_pred16_t __p)
>   return __arm_vaddvq_p_s8 (__a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (int8x16_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vqshlq_s8 (__a, __b);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (int8x16_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int8x16_t __a, int8x16_t __b)
> @@ -16772,13 +15988,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
>   return __arm_vaddvaq_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (int8x16_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_s8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (int8x16_t __a, const int __imm)
> @@ -16786,13 +15995,6 @@ __arm_vrshrq (int8x16_t __a, const int __imm)
>   return __arm_vrshrq_n_s8 (__a, __imm);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (int8x16_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_s8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (uint16x8_t __a, uint16x8_t __b)
> @@ -16933,27 +16135,6 @@ __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
>   return __arm_vaddvaq_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (uint16x8_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (uint16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vqshlq_u16 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (uint16x8_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_u16 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq (uint16_t __a, int16x8_t __b)
> @@ -16989,13 +16170,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (uint16x8_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_u16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (uint16x8_t __a, const int __imm)
> @@ -17003,13 +16177,6 @@ __arm_vrshrq (uint16x8_t __a, const int __imm)
>   return __arm_vrshrq_n_u16 (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (uint16x8_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_u16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int16x8_t __a, int16_t __b)
> @@ -17101,27 +16268,6 @@ __arm_vaddvq_p (int16x8_t __a,
> mve_pred16_t __p)
>   return __arm_vaddvq_p_s16 (__a, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (int16x8_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vqshlq_s16 (__a, __b);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (int16x8_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int16x8_t __a, int16x8_t __b)
> @@ -17248,13 +16394,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
>   return __arm_vaddvaq_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (int16x8_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_s16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (int16x8_t __a, const int __imm)
> @@ -17262,13 +16401,6 @@ __arm_vrshrq (int16x8_t __a, const int __imm)
>   return __arm_vrshrq_n_s16 (__a, __imm);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (int16x8_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_s16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (uint32x4_t __a, uint32x4_t __b)
> @@ -17409,27 +16541,6 @@ __arm_vaddvaq (uint32_t __a, uint32x4_t __b)
>   return __arm_vaddvaq_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (uint32x4_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_u32 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (uint32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vqshlq_u32 (__a, __b);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (uint32x4_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq (uint32_t __a, int32x4_t __b)
> @@ -17465,13 +16576,6 @@ __arm_vbrsrq (uint32x4_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (uint32x4_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_u32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (uint32x4_t __a, const int __imm)
> @@ -17479,13 +16583,6 @@ __arm_vrshrq (uint32x4_t __a, const int __imm)
>   return __arm_vrshrq_n_u32 (__a, __imm);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (uint32x4_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_u32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int32x4_t __a, int32_t __b)
> @@ -17577,27 +16674,6 @@ __arm_vaddvq_p (int32x4_t __a,
> mve_pred16_t __p)
>   return __arm_vaddvq_p_s32 (__a, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_r (int32x4_t __a, int32_t __b)
> -{
> - return __arm_vshlq_r_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vqshlq_s32 (__a, __b);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_r (int32x4_t __a, int32_t __b)
> -{
> - return __arm_vqshlq_r_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (int32x4_t __a, int32x4_t __b)
> @@ -17724,13 +16800,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
>   return __arm_vaddvaq_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_n (int32x4_t __a, const int __imm)
> -{
> - return __arm_vshlq_n_s32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq (int32x4_t __a, const int __imm)
> @@ -17738,13 +16807,6 @@ __arm_vrshrq (int32x4_t __a, const int __imm)
>   return __arm_vrshrq_n_s32 (__a, __imm);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_n (int32x4_t __a, const int __imm)
> -{
> - return __arm_vqshlq_n_s32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovntq (uint8x16_t __a, uint16x8_t __b)
> @@ -18501,20 +17563,6 @@ __arm_vsliq (uint8x16_t __a, uint8x16_t __b,
> const int __imm)
>   return __arm_vsliq_n_u8 (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_r_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_u8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint8_t __a, int8x16_t __b, mve_pred16_t __p)
> @@ -18627,13 +17675,6 @@ __arm_vcmpeqq_m (int8x16_t __a, int8_t __b,
> mve_pred16_t __p)
>   return __arm_vcmpeqq_m_n_s8 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_r_s8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
> @@ -18641,13 +17682,6 @@ __arm_vrev64q_m (int8x16_t __inactive,
> int8x16_t __a, mve_pred16_t __p)
>   return __arm_vrev64q_m_s8 (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (int8x16_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_s8 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
> @@ -19054,20 +18088,6 @@ __arm_vsliq (uint16x8_t __a, uint16x8_t __b,
> const int __imm)
>   return __arm_vsliq_n_u16 (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_r_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_u16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint16_t __a, int16x8_t __b, mve_pred16_t __p)
> @@ -19175,16 +18195,9 @@ __arm_vcmpeqq_m (int16x8_t __a, int16x8_t
> __b, mve_pred16_t __p)
> 
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vcmpeqq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vcmpeqq_m_n_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> +__arm_vcmpeqq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
>  {
> - return __arm_vshlq_m_r_s16 (__a, __b, __p);
> + return __arm_vcmpeqq_m_n_s16 (__a, __b, __p);
>  }
> 
>  __extension__ extern __inline int16x8_t
> @@ -19194,13 +18207,6 @@ __arm_vrev64q_m (int16x8_t __inactive,
> int16x8_t __a, mve_pred16_t __p)
>   return __arm_vrev64q_m_s16 (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (int16x8_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_s16 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
> @@ -19607,20 +18613,6 @@ __arm_vsliq (uint32x4_t __a, uint32x4_t __b,
> const int __imm)
>   return __arm_vsliq_n_u32 (__a, __b, __imm);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_r_u32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vminavq_p (uint32_t __a, int32x4_t __b, mve_pred16_t __p)
> @@ -19733,13 +18725,6 @@ __arm_vcmpeqq_m (int32x4_t __a, int32_t __b,
> mve_pred16_t __p)
>   return __arm_vcmpeqq_m_n_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_r_s32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
> @@ -19747,13 +18732,6 @@ __arm_vrev64q_m (int32x4_t __inactive,
> int32x4_t __a, mve_pred16_t __p)
>   return __arm_vrev64q_m_s32 (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_r (int32x4_t __a, int32_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_r_s32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqnegq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
> @@ -20755,13 +19733,6 @@ __arm_vsriq_m (uint8x16_t __a, uint8x16_t
> __b, const int __imm, mve_pred16_t __p
>   return __arm_vsriq_m_n_u8 (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p (uint32_t __a, uint8x16_t __b, uint8x16_t __c,
> mve_pred16_t __p)
> @@ -20769,13 +19740,6 @@ __arm_vabavq_p (uint32_t __a, uint8x16_t __b,
> uint8x16_t __c, mve_pred16_t __p)
>   return __arm_vabavq_p_u8 (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsriq_m (int16x8_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -20804,13 +19768,6 @@ __arm_vsriq_m (uint16x8_t __a, uint16x8_t
> __b, const int __imm, mve_pred16_t __p
>   return __arm_vsriq_m_n_u16 (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p (uint32_t __a, uint16x8_t __b, uint16x8_t __c,
> mve_pred16_t __p)
> @@ -20818,13 +19775,6 @@ __arm_vabavq_p (uint32_t __a, uint16x8_t __b,
> uint16x8_t __c, mve_pred16_t __p)
>   return __arm_vabavq_p_u16 (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsriq_m (int32x4_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -20853,13 +19803,6 @@ __arm_vsriq_m (uint32x4_t __a, uint32x4_t
> __b, const int __imm, mve_pred16_t __p
>   return __arm_vsriq_m_n_u32 (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabavq_p (uint32_t __a, uint32x4_t __b, uint32x4_t __c,
> mve_pred16_t __p)
> @@ -20867,13 +19810,6 @@ __arm_vabavq_p (uint32_t __a, uint32x4_t __b,
> uint32x4_t __c, mve_pred16_t __p)
>   return __arm_vabavq_p_u32 (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> @@ -21735,90 +20671,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pre
>   return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_s8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_s32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_s16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (uint8x16_t __inactive, uint8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_u8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (uint32x4_t __inactive, uint32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_u32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m_n (uint16x8_t __inactive, uint16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_n_u16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (uint8x16_t __inactive, uint8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (uint32x4_t __inactive, uint32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshlq_m (uint16x8_t __inactive, uint16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vqshlq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -21861,48 +20713,6 @@ __arm_vrshrq_m (uint16x8_t __inactive,
> uint16x8_t __a, const int __imm, mve_pred
>   return __arm_vrshrq_m_n_u16 (__inactive, __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_s8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_s32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_s16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (uint8x16_t __inactive, uint8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_u8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (uint32x4_t __inactive, uint32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_u32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_m_n (uint16x8_t __inactive, uint16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshlq_m_n_u16 (__inactive, __a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -24787,90 +23597,6 @@ __arm_vshlltq_x (uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
>   return __arm_vshlltq_x_n_u16 (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_s8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (uint8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (uint16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x (uint32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_u32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_s8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_s16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_s32 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_u8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (uint16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_u16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlq_x_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlq_x_n_u32 (__a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
> @@ -28165,16 +26891,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_f16_u16
> (__ARM_mve_coerce(__p0, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_f32_u32
> (__ARM_mve_coerce(__p0, uint32x4_t)));})
> 
> -#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
>  #define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> @@ -28434,24 +27150,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]:
> __arm_vminnmvq_f16 (__ARM_mve_coerce2(p0, double),
> __ARM_mve_coerce(__p1, float16x8_t)), \
>    int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]:
> __arm_vminnmvq_f32 (__ARM_mve_coerce2(p0, double),
> __ARM_mve_coerce(__p1, float32x4_t)));})
> 
> -#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
> -#define __arm_vshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vshlltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlltq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> @@ -28490,34 +27188,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
>    int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1));})
> 
> -#define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
> -#define __arm_vqshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
> -#define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28756,24 +27426,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> -#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
> -#define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_m_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -30170,44 +28822,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> 
> -#define __arm_vshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
> -#define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
> -#define __arm_vqshlq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqshlq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshlq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshlq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqshlq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshlq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshlq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> -
> -#define __arm_vqshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> @@ -30223,24 +28837,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> 
> -#define __arm_vshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
> -#define __arm_vqshlq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -30588,15 +29184,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t)), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t)));})
> 
> -#define __arm_vqshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshlq_m_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshlq_m_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vqshlq_m_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vqshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vqshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vrev64q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -30607,15 +29194,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> -#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -31514,16 +30092,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_x_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_x_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2));})
> 
> -#define __arm_vshlq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
>  #define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> @@ -31547,15 +30115,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlltq_x_n_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlltq_x_n_u16
> (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
> 
> -#define __arm_vshlq_x_n(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_x_n_s16
> (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_x_n_s32
> (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_x_n_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_x_n_u16
> (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_x_n_u32
> (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vdwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u8 ((uint32_t)
> __p1, p2, p3, p4), \
> @@ -31771,27 +30330,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2,
> int), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2,
> int), p3));})
> 
> -#define __arm_vqshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vqshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vqshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
> -
> -#define __arm_vqshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vqshlq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vqshlq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vqshlq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vqshlq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vqshlq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vqshlq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
>  #define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -32044,36 +30582,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshluq_m_n_s16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshluq_m_n_s32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> 
> -#define __arm_vshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vshlq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t),
> p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vshlq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vshlq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_int8x16_t]: __arm_vshlq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_int16x8_t]: __arm_vshlq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_int32x4_t]: __arm_vshlq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3));})
> -
> -#define __arm_vshlq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshlq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshlq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshlq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vshlq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshlq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshlq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
> -
> -#define __arm_vshlq_m_r(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlq_m_r_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlq_m_r_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlq_m_r_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlq_m_r_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlq_m_r_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlq_m_r_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
>  #define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq
  2023-05-05  8:39 ` [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq Christophe Lyon
@ 2023-05-05 10:58   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:58 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq
> 
> Factorize vmaxq and vminq so that they use the same pattern.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MAX_MIN_SU): New.
> 	(max_min_su_str): New.
> 	(max_min_supf): New.
> 	* config/arm/mve.md (mve_vmaxq_s<mode>,
> mve_vmaxq_u<mode>)
> 	(mve_vminq_s<mode>, mve_vminq_u<mode>): Merge into ...
> 	(mve_<max_min_su_str>q_<max_min_supf><mode>): ... this.
> ---
>  gcc/config/arm/iterators.md | 11 ++++++++++
>  gcc/config/arm/mve.md       | 44 +++++--------------------------------
>  2 files changed, 16 insertions(+), 39 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 3133642ea82..9ff61e0573b 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -330,6 +330,9 @@ (define_code_iterator FCVT [unsigned_float float])
>  ;; Saturating addition, subtraction
>  (define_code_iterator SSPLUSMINUS [ss_plus ss_minus])
> 
> +;; Max/Min iterator, to factorize MVE patterns
> +(define_code_iterator MAX_MIN_SU [smax umax smin umin])
> +
>  ;; MVE integer binary operations.
>  (define_code_iterator MVE_INT_BINARY_RTX [plus minus mult])
> 
> @@ -1271,6 +1274,14 @@ (define_code_attr float_sup [(unsigned_float "u")
> (float "s")])
> 
>  (define_code_attr float_SUP [(unsigned_float "U") (float "S")])
> 
> +;; max/min for MVE
> +(define_code_attr max_min_su_str [(smax "vmax") (umax "vmax") (smin
> "vmin") (umin "vmin")])
> +
> +(define_code_attr max_min_supf [
> +		 (smax "s") (umax "u")
> +		 (smin "s") (umin "u")
> +		 ])
> +
>  ;;----------------------------------------------------------------------------
>  ;; Int attributes
>  ;;----------------------------------------------------------------------------
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index c8cb4e430ac..44409b40e5f 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1106,29 +1106,20 @@ (define_insn "mve_vmaxavq_s<mode>"
>  ])
> 
>  ;;
> -;; [vmaxq_u, vmaxq_s])
> +;; [vmaxq_u, vmaxq_s]
> +;; [vminq_s, vminq_u]
>  ;;
> -(define_insn "mve_vmaxq_s<mode>"
> +(define_insn "mve_<max_min_su_str>q_<max_min_supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(smax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> +	(MAX_MIN_SU:MVE_2 (match_operand:MVE_2 1
> "s_register_operand" "w")
>  		    (match_operand:MVE_2 2 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vmax.%#<V_s_elem>\t%q0, %q1, %q2"
> +  "<max_min_su_str>.<max_min_supf>%#<V_sz_elem>\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -(define_insn "mve_vmaxq_u<mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(umax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> -		    (match_operand:MVE_2 2 "s_register_operand" "w")))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vmax.%#<V_u_elem>\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> 
>  ;;
>  ;; [vmaxvq_u, vmaxvq_s])
> @@ -1175,31 +1166,6 @@ (define_insn "mve_vminavq_s<mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vminq_s, vminq_u])
> -;;
> -(define_insn "mve_vminq_s<mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(smin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> -		    (match_operand:MVE_2 2 "s_register_operand" "w")))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vmin.%#<V_s_elem>\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -(define_insn "mve_vminq_u<mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(umin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
> -		    (match_operand:MVE_2 2 "s_register_operand" "w")))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vmin.%#<V_u_elem>\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vminvq_u, vminvq_s])
>  ;;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 14/23] arm: [MVE intrinsics] rework vmaxq vminq
  2023-05-05  8:39 ` [PATCH 14/23] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-05 10:59   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 10:59 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 14/23] arm: [MVE intrinsics] rework vmaxq vminq
> 
> Implement vmaxq and vminq using the new MVE builtins framework.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc
> (FUNCTION_WITH_RTX_M_NO_F): New.
> 	(vmaxq, vminq): New.
> 	* config/arm/arm-mve-builtins-base.def (vmaxq, vminq): New.
> 	* config/arm/arm-mve-builtins-base.h (vmaxq, vminq): New.
> 	* config/arm/arm_mve.h (vminq): Remove.
> 	(vmaxq): Remove.
> 	(vmaxq_m): Remove.
> 	(vminq_m): Remove.
> 	(vminq_x): Remove.
> 	(vmaxq_x): Remove.
> 	(vminq_u8): Remove.
> 	(vmaxq_u8): Remove.
> 	(vminq_s8): Remove.
> 	(vmaxq_s8): Remove.
> 	(vminq_u16): Remove.
> 	(vmaxq_u16): Remove.
> 	(vminq_s16): Remove.
> 	(vmaxq_s16): Remove.
> 	(vminq_u32): Remove.
> 	(vmaxq_u32): Remove.
> 	(vminq_s32): Remove.
> 	(vmaxq_s32): Remove.
> 	(vmaxq_m_s8): Remove.
> 	(vmaxq_m_s32): Remove.
> 	(vmaxq_m_s16): Remove.
> 	(vmaxq_m_u8): Remove.
> 	(vmaxq_m_u32): Remove.
> 	(vmaxq_m_u16): Remove.
> 	(vminq_m_s8): Remove.
> 	(vminq_m_s32): Remove.
> 	(vminq_m_s16): Remove.
> 	(vminq_m_u8): Remove.
> 	(vminq_m_u32): Remove.
> 	(vminq_m_u16): Remove.
> 	(vminq_x_s8): Remove.
> 	(vminq_x_s16): Remove.
> 	(vminq_x_s32): Remove.
> 	(vminq_x_u8): Remove.
> 	(vminq_x_u16): Remove.
> 	(vminq_x_u32): Remove.
> 	(vmaxq_x_s8): Remove.
> 	(vmaxq_x_s16): Remove.
> 	(vmaxq_x_s32): Remove.
> 	(vmaxq_x_u8): Remove.
> 	(vmaxq_x_u16): Remove.
> 	(vmaxq_x_u32): Remove.
> 	(__arm_vminq_u8): Remove.
> 	(__arm_vmaxq_u8): Remove.
> 	(__arm_vminq_s8): Remove.
> 	(__arm_vmaxq_s8): Remove.
> 	(__arm_vminq_u16): Remove.
> 	(__arm_vmaxq_u16): Remove.
> 	(__arm_vminq_s16): Remove.
> 	(__arm_vmaxq_s16): Remove.
> 	(__arm_vminq_u32): Remove.
> 	(__arm_vmaxq_u32): Remove.
> 	(__arm_vminq_s32): Remove.
> 	(__arm_vmaxq_s32): Remove.
> 	(__arm_vmaxq_m_s8): Remove.
> 	(__arm_vmaxq_m_s32): Remove.
> 	(__arm_vmaxq_m_s16): Remove.
> 	(__arm_vmaxq_m_u8): Remove.
> 	(__arm_vmaxq_m_u32): Remove.
> 	(__arm_vmaxq_m_u16): Remove.
> 	(__arm_vminq_m_s8): Remove.
> 	(__arm_vminq_m_s32): Remove.
> 	(__arm_vminq_m_s16): Remove.
> 	(__arm_vminq_m_u8): Remove.
> 	(__arm_vminq_m_u32): Remove.
> 	(__arm_vminq_m_u16): Remove.
> 	(__arm_vminq_x_s8): Remove.
> 	(__arm_vminq_x_s16): Remove.
> 	(__arm_vminq_x_s32): Remove.
> 	(__arm_vminq_x_u8): Remove.
> 	(__arm_vminq_x_u16): Remove.
> 	(__arm_vminq_x_u32): Remove.
> 	(__arm_vmaxq_x_s8): Remove.
> 	(__arm_vmaxq_x_s16): Remove.
> 	(__arm_vmaxq_x_s32): Remove.
> 	(__arm_vmaxq_x_u8): Remove.
> 	(__arm_vmaxq_x_u16): Remove.
> 	(__arm_vmaxq_x_u32): Remove.
> 	(__arm_vminq): Remove.
> 	(__arm_vmaxq): Remove.
> 	(__arm_vmaxq_m): Remove.
> 	(__arm_vminq_m): Remove.
> 	(__arm_vminq_x): Remove.
> 	(__arm_vmaxq_x): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  11 +
>  gcc/config/arm/arm-mve-builtins-base.def |   2 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   2 +
>  gcc/config/arm/arm_mve.h                 | 628 -----------------------
>  4 files changed, 15 insertions(+), 628 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index 4bebf86f784..1839d5cb1a5 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -110,6 +110,15 @@ namespace arm_mve {
>      UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,
> 	\
>      UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
> 
> +  /* Helper for builtins with RTX codes, _m predicated override, but
> +     no floating-point versions.  */
> +#define FUNCTION_WITH_RTX_M_NO_F(NAME, RTX_S, RTX_U, UNSPEC)
> FUNCTION	\
> +  (NAME, unspec_based_mve_function_exact_insn,
> 	\
> +   (RTX_S, RTX_U, UNKNOWN,						\
> +    -1, -1, -1,								\
> +    UNSPEC##_M_S, UNSPEC##_M_U, -1,
> 	\
> +    -1, -1, -1))
> +
>    /* Helper for builtins without RTX codes, no _m predicated and no _n
>       overrides.  */
>  #define FUNCTION_WITHOUT_M_N(NAME, UNSPEC) FUNCTION
> 		\
> @@ -173,6 +182,8 @@ FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
>  FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
>  FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
>  FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
> +FUNCTION_WITH_RTX_M_NO_F (vmaxq, SMAX, UMAX, VMAXQ)
> +FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ)
>  FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
>  FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
>  FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index f2e40cda2af..3b42bf46e81 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -25,6 +25,8 @@ DEF_MVE_FUNCTION (vcreateq, create,
> all_integer_with_64, none)
>  DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vminq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index 5b62de6a922..81d10f4a8f4 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -30,6 +30,8 @@ extern const function_base *const vcreateq;
>  extern const function_base *const veorq;
>  extern const function_base *const vhaddq;
>  extern const function_base *const vhsubq;
> +extern const function_base *const vmaxq;
> +extern const function_base *const vminq;
>  extern const function_base *const vmulhq;
>  extern const function_base *const vmulq;
>  extern const function_base *const vorrq;
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index ad67dcfd024..5fbea52c8ef 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -65,9 +65,7 @@
>  #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
>  #define vmladavq(__a, __b) __arm_vmladavq(__a, __b)
>  #define vminvq(__a, __b) __arm_vminvq(__a, __b)
> -#define vminq(__a, __b) __arm_vminq(__a, __b)
>  #define vmaxvq(__a, __b) __arm_vmaxvq(__a, __b)
> -#define vmaxq(__a, __b) __arm_vmaxq(__a, __b)
>  #define vcmphiq(__a, __b) __arm_vcmphiq(__a, __b)
>  #define vcmpeqq(__a, __b) __arm_vcmpeqq(__a, __b)
>  #define vcmpcsq(__a, __b) __arm_vcmpcsq(__a, __b)
> @@ -214,8 +212,6 @@
>  #define vcaddq_rot90_m(__inactive, __a, __b, __p)
> __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
>  #define vhcaddq_rot270_m(__inactive, __a, __b, __p)
> __arm_vhcaddq_rot270_m(__inactive, __a, __b, __p)
>  #define vhcaddq_rot90_m(__inactive, __a, __b, __p)
> __arm_vhcaddq_rot90_m(__inactive, __a, __b, __p)
> -#define vmaxq_m(__inactive, __a, __b, __p) __arm_vmaxq_m(__inactive,
> __a, __b, __p)
> -#define vminq_m(__inactive, __a, __b, __p) __arm_vminq_m(__inactive,
> __a, __b, __p)
>  #define vmladavaq_p(__a, __b, __c, __p) __arm_vmladavaq_p(__a, __b, __c,
> __p)
>  #define vmladavaxq_p(__a, __b, __c, __p) __arm_vmladavaxq_p(__a, __b,
> __c, __p)
>  #define vmlaq_m(__a, __b, __c, __p) __arm_vmlaq_m(__a, __b, __c, __p)
> @@ -339,8 +335,6 @@
>  #define viwdupq_x_u8(__a, __b, __imm, __p) __arm_viwdupq_x_u8(__a,
> __b, __imm, __p)
>  #define viwdupq_x_u16(__a, __b, __imm, __p) __arm_viwdupq_x_u16(__a,
> __b, __imm, __p)
>  #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a,
> __b, __imm, __p)
> -#define vminq_x(__a, __b, __p) __arm_vminq_x(__a, __b, __p)
> -#define vmaxq_x(__a, __b, __p) __arm_vmaxq_x(__a, __b, __p)
>  #define vabsq_x(__a, __p) __arm_vabsq_x(__a, __p)
>  #define vclsq_x(__a, __p) __arm_vclsq_x(__a, __p)
>  #define vclzq_x(__a, __p) __arm_vclzq_x(__a, __p)
> @@ -614,9 +608,7 @@
>  #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
>  #define vmladavq_u8(__a, __b) __arm_vmladavq_u8(__a, __b)
>  #define vminvq_u8(__a, __b) __arm_vminvq_u8(__a, __b)
> -#define vminq_u8(__a, __b) __arm_vminq_u8(__a, __b)
>  #define vmaxvq_u8(__a, __b) __arm_vmaxvq_u8(__a, __b)
> -#define vmaxq_u8(__a, __b) __arm_vmaxq_u8(__a, __b)
>  #define vcmpneq_n_u8(__a, __b) __arm_vcmpneq_n_u8(__a, __b)
>  #define vcmphiq_u8(__a, __b) __arm_vcmphiq_u8(__a, __b)
>  #define vcmphiq_n_u8(__a, __b) __arm_vcmphiq_n_u8(__a, __b)
> @@ -656,9 +648,7 @@
>  #define vmladavxq_s8(__a, __b) __arm_vmladavxq_s8(__a, __b)
>  #define vmladavq_s8(__a, __b) __arm_vmladavq_s8(__a, __b)
>  #define vminvq_s8(__a, __b) __arm_vminvq_s8(__a, __b)
> -#define vminq_s8(__a, __b) __arm_vminq_s8(__a, __b)
>  #define vmaxvq_s8(__a, __b) __arm_vmaxvq_s8(__a, __b)
> -#define vmaxq_s8(__a, __b) __arm_vmaxq_s8(__a, __b)
>  #define vhcaddq_rot90_s8(__a, __b) __arm_vhcaddq_rot90_s8(__a, __b)
>  #define vhcaddq_rot270_s8(__a, __b) __arm_vhcaddq_rot270_s8(__a, __b)
>  #define vcaddq_rot90_s8(__a, __b) __arm_vcaddq_rot90_s8(__a, __b)
> @@ -672,9 +662,7 @@
>  #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
>  #define vmladavq_u16(__a, __b) __arm_vmladavq_u16(__a, __b)
>  #define vminvq_u16(__a, __b) __arm_vminvq_u16(__a, __b)
> -#define vminq_u16(__a, __b) __arm_vminq_u16(__a, __b)
>  #define vmaxvq_u16(__a, __b) __arm_vmaxvq_u16(__a, __b)
> -#define vmaxq_u16(__a, __b) __arm_vmaxq_u16(__a, __b)
>  #define vcmpneq_n_u16(__a, __b) __arm_vcmpneq_n_u16(__a, __b)
>  #define vcmphiq_u16(__a, __b) __arm_vcmphiq_u16(__a, __b)
>  #define vcmphiq_n_u16(__a, __b) __arm_vcmphiq_n_u16(__a, __b)
> @@ -714,9 +702,7 @@
>  #define vmladavxq_s16(__a, __b) __arm_vmladavxq_s16(__a, __b)
>  #define vmladavq_s16(__a, __b) __arm_vmladavq_s16(__a, __b)
>  #define vminvq_s16(__a, __b) __arm_vminvq_s16(__a, __b)
> -#define vminq_s16(__a, __b) __arm_vminq_s16(__a, __b)
>  #define vmaxvq_s16(__a, __b) __arm_vmaxvq_s16(__a, __b)
> -#define vmaxq_s16(__a, __b) __arm_vmaxq_s16(__a, __b)
>  #define vhcaddq_rot90_s16(__a, __b) __arm_vhcaddq_rot90_s16(__a, __b)
>  #define vhcaddq_rot270_s16(__a, __b) __arm_vhcaddq_rot270_s16(__a,
> __b)
>  #define vcaddq_rot90_s16(__a, __b) __arm_vcaddq_rot90_s16(__a, __b)
> @@ -730,9 +716,7 @@
>  #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
>  #define vmladavq_u32(__a, __b) __arm_vmladavq_u32(__a, __b)
>  #define vminvq_u32(__a, __b) __arm_vminvq_u32(__a, __b)
> -#define vminq_u32(__a, __b) __arm_vminq_u32(__a, __b)
>  #define vmaxvq_u32(__a, __b) __arm_vmaxvq_u32(__a, __b)
> -#define vmaxq_u32(__a, __b) __arm_vmaxq_u32(__a, __b)
>  #define vcmpneq_n_u32(__a, __b) __arm_vcmpneq_n_u32(__a, __b)
>  #define vcmphiq_u32(__a, __b) __arm_vcmphiq_u32(__a, __b)
>  #define vcmphiq_n_u32(__a, __b) __arm_vcmphiq_n_u32(__a, __b)
> @@ -772,9 +756,7 @@
>  #define vmladavxq_s32(__a, __b) __arm_vmladavxq_s32(__a, __b)
>  #define vmladavq_s32(__a, __b) __arm_vmladavq_s32(__a, __b)
>  #define vminvq_s32(__a, __b) __arm_vminvq_s32(__a, __b)
> -#define vminq_s32(__a, __b) __arm_vminq_s32(__a, __b)
>  #define vmaxvq_s32(__a, __b) __arm_vmaxvq_s32(__a, __b)
> -#define vmaxq_s32(__a, __b) __arm_vmaxq_s32(__a, __b)
>  #define vhcaddq_rot90_s32(__a, __b) __arm_vhcaddq_rot90_s32(__a, __b)
>  #define vhcaddq_rot270_s32(__a, __b) __arm_vhcaddq_rot270_s32(__a,
> __b)
>  #define vcaddq_rot90_s32(__a, __b) __arm_vcaddq_rot90_s32(__a, __b)
> @@ -1411,18 +1393,6 @@
>  #define vhcaddq_rot90_m_s8(__inactive, __a, __b, __p)
> __arm_vhcaddq_rot90_m_s8(__inactive, __a, __b, __p)
>  #define vhcaddq_rot90_m_s32(__inactive, __a, __b, __p)
> __arm_vhcaddq_rot90_m_s32(__inactive, __a, __b, __p)
>  #define vhcaddq_rot90_m_s16(__inactive, __a, __b, __p)
> __arm_vhcaddq_rot90_m_s16(__inactive, __a, __b, __p)
> -#define vmaxq_m_s8(__inactive, __a, __b, __p)
> __arm_vmaxq_m_s8(__inactive, __a, __b, __p)
> -#define vmaxq_m_s32(__inactive, __a, __b, __p)
> __arm_vmaxq_m_s32(__inactive, __a, __b, __p)
> -#define vmaxq_m_s16(__inactive, __a, __b, __p)
> __arm_vmaxq_m_s16(__inactive, __a, __b, __p)
> -#define vmaxq_m_u8(__inactive, __a, __b, __p)
> __arm_vmaxq_m_u8(__inactive, __a, __b, __p)
> -#define vmaxq_m_u32(__inactive, __a, __b, __p)
> __arm_vmaxq_m_u32(__inactive, __a, __b, __p)
> -#define vmaxq_m_u16(__inactive, __a, __b, __p)
> __arm_vmaxq_m_u16(__inactive, __a, __b, __p)
> -#define vminq_m_s8(__inactive, __a, __b, __p)
> __arm_vminq_m_s8(__inactive, __a, __b, __p)
> -#define vminq_m_s32(__inactive, __a, __b, __p)
> __arm_vminq_m_s32(__inactive, __a, __b, __p)
> -#define vminq_m_s16(__inactive, __a, __b, __p)
> __arm_vminq_m_s16(__inactive, __a, __b, __p)
> -#define vminq_m_u8(__inactive, __a, __b, __p)
> __arm_vminq_m_u8(__inactive, __a, __b, __p)
> -#define vminq_m_u32(__inactive, __a, __b, __p)
> __arm_vminq_m_u32(__inactive, __a, __b, __p)
> -#define vminq_m_u16(__inactive, __a, __b, __p)
> __arm_vminq_m_u16(__inactive, __a, __b, __p)
>  #define vmladavaq_p_s8(__a, __b, __c, __p) __arm_vmladavaq_p_s8(__a,
> __b, __c, __p)
>  #define vmladavaq_p_s32(__a, __b, __c, __p) __arm_vmladavaq_p_s32(__a,
> __b, __c, __p)
>  #define vmladavaq_p_s16(__a, __b, __c, __p) __arm_vmladavaq_p_s16(__a,
> __b, __c, __p)
> @@ -1943,18 +1913,6 @@
>  #define vdupq_x_n_u8(__a, __p) __arm_vdupq_x_n_u8(__a, __p)
>  #define vdupq_x_n_u16(__a, __p) __arm_vdupq_x_n_u16(__a, __p)
>  #define vdupq_x_n_u32(__a, __p) __arm_vdupq_x_n_u32(__a, __p)
> -#define vminq_x_s8(__a, __b, __p) __arm_vminq_x_s8(__a, __b, __p)
> -#define vminq_x_s16(__a, __b, __p) __arm_vminq_x_s16(__a, __b, __p)
> -#define vminq_x_s32(__a, __b, __p) __arm_vminq_x_s32(__a, __b, __p)
> -#define vminq_x_u8(__a, __b, __p) __arm_vminq_x_u8(__a, __b, __p)
> -#define vminq_x_u16(__a, __b, __p) __arm_vminq_x_u16(__a, __b, __p)
> -#define vminq_x_u32(__a, __b, __p) __arm_vminq_x_u32(__a, __b, __p)
> -#define vmaxq_x_s8(__a, __b, __p) __arm_vmaxq_x_s8(__a, __b, __p)
> -#define vmaxq_x_s16(__a, __b, __p) __arm_vmaxq_x_s16(__a, __b, __p)
> -#define vmaxq_x_s32(__a, __b, __p) __arm_vmaxq_x_s32(__a, __b, __p)
> -#define vmaxq_x_u8(__a, __b, __p) __arm_vmaxq_x_u8(__a, __b, __p)
> -#define vmaxq_x_u16(__a, __b, __p) __arm_vmaxq_x_u16(__a, __b, __p)
> -#define vmaxq_x_u32(__a, __b, __p) __arm_vmaxq_x_u32(__a, __b, __p)
>  #define vabsq_x_s8(__a, __p) __arm_vabsq_x_s8(__a, __p)
>  #define vabsq_x_s16(__a, __p) __arm_vabsq_x_s16(__a, __p)
>  #define vabsq_x_s32(__a, __p) __arm_vabsq_x_s32(__a, __p)
> @@ -2937,13 +2895,6 @@ __arm_vminvq_u8 (uint8_t __a, uint8x16_t __b)
>    return __builtin_mve_vminvq_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_u8 (uint8x16_t __a, uint8x16_t __b)
> -{
> -  return __builtin_mve_vminq_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_u8 (uint8_t __a, uint8x16_t __b)
> @@ -2951,13 +2902,6 @@ __arm_vmaxvq_u8 (uint8_t __a, uint8x16_t __b)
>    return __builtin_mve_vmaxvq_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
> -{
> -  return __builtin_mve_vmaxq_uv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_u8 (uint8x16_t __a, uint8_t __b)
> @@ -3233,13 +3177,6 @@ __arm_vminvq_s8 (int8_t __a, int8x16_t __b)
>    return __builtin_mve_vminvq_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vminq_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_s8 (int8_t __a, int8x16_t __b)
> @@ -3247,13 +3184,6 @@ __arm_vmaxvq_s8 (int8_t __a, int8x16_t __b)
>    return __builtin_mve_vmaxvq_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_s8 (int8x16_t __a, int8x16_t __b)
> -{
> -  return __builtin_mve_vmaxq_sv16qi (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
> @@ -3345,13 +3275,6 @@ __arm_vminvq_u16 (uint16_t __a, uint16x8_t
> __b)
>    return __builtin_mve_vminvq_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_u16 (uint16x8_t __a, uint16x8_t __b)
> -{
> -  return __builtin_mve_vminq_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_u16 (uint16_t __a, uint16x8_t __b)
> @@ -3359,13 +3282,6 @@ __arm_vmaxvq_u16 (uint16_t __a, uint16x8_t
> __b)
>    return __builtin_mve_vmaxvq_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
> -{
> -  return __builtin_mve_vmaxq_uv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_u16 (uint16x8_t __a, uint16_t __b)
> @@ -3641,13 +3557,6 @@ __arm_vminvq_s16 (int16_t __a, int16x8_t __b)
>    return __builtin_mve_vminvq_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vminq_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_s16 (int16_t __a, int16x8_t __b)
> @@ -3655,13 +3564,6 @@ __arm_vmaxvq_s16 (int16_t __a, int16x8_t __b)
>    return __builtin_mve_vmaxvq_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_s16 (int16x8_t __a, int16x8_t __b)
> -{
> -  return __builtin_mve_vmaxq_sv8hi (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
> @@ -3753,13 +3655,6 @@ __arm_vminvq_u32 (uint32_t __a, uint32x4_t
> __b)
>    return __builtin_mve_vminvq_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_u32 (uint32x4_t __a, uint32x4_t __b)
> -{
> -  return __builtin_mve_vminq_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_u32 (uint32_t __a, uint32x4_t __b)
> @@ -3767,13 +3662,6 @@ __arm_vmaxvq_u32 (uint32_t __a, uint32x4_t
> __b)
>    return __builtin_mve_vmaxvq_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
> -{
> -  return __builtin_mve_vmaxq_uv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_u32 (uint32x4_t __a, uint32_t __b)
> @@ -4049,13 +3937,6 @@ __arm_vminvq_s32 (int32_t __a, int32x4_t __b)
>    return __builtin_mve_vminvq_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vminq_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq_s32 (int32_t __a, int32x4_t __b)
> @@ -4063,13 +3944,6 @@ __arm_vmaxvq_s32 (int32_t __a, int32x4_t __b)
>    return __builtin_mve_vmaxvq_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_s32 (int32x4_t __a, int32x4_t __b)
> -{
> -  return __builtin_mve_vmaxq_sv4si (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
> @@ -7380,90 +7254,6 @@ __arm_vhcaddq_rot90_m_s16 (int16x8_t
> __inactive, int16x8_t __a, int16x8_t __b, m
>    return __builtin_mve_vhcaddq_rot90_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv8hi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv16qi (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv4si (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t
> __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv8hi (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmladavaq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c,
> mve_pred16_t __p)
> @@ -10635,90 +10425,6 @@ __arm_vdupq_x_n_u32 (uint32_t __a,
> mve_pred16_t __p)
>    return __builtin_mve_vdupq_m_n_uv4si (__arm_vuninitializedq_u32 (),
> __a, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_sv4si (__arm_vuninitializedq_s32 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv16qi (__arm_vuninitializedq_u8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv8hi (__arm_vuninitializedq_u16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vminq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_sv4si (__arm_vuninitializedq_s32 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv16qi (__arm_vuninitializedq_u8 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv8hi (__arm_vuninitializedq_u16 (), __a,
> __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vmaxq_m_uv4si (__arm_vuninitializedq_u32 (), __a,
> __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x_s8 (int8x16_t __a, mve_pred16_t __p)
> @@ -15624,13 +15330,6 @@ __arm_vminvq (uint8_t __a, uint8x16_t __b)
>   return __arm_vminvq_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (uint8x16_t __a, uint8x16_t __b)
> -{
> - return __arm_vminq_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (uint8_t __a, uint8x16_t __b)
> @@ -15638,13 +15337,6 @@ __arm_vmaxvq (uint8_t __a, uint8x16_t __b)
>   return __arm_vmaxvq_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (uint8x16_t __a, uint8x16_t __b)
> -{
> - return __arm_vmaxq_u8 (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (uint8x16_t __a, uint8_t __b)
> @@ -15918,13 +15610,6 @@ __arm_vminvq (int8_t __a, int8x16_t __b)
>   return __arm_vminvq_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vminq_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (int8_t __a, int8x16_t __b)
> @@ -15932,13 +15617,6 @@ __arm_vmaxvq (int8_t __a, int8x16_t __b)
>   return __arm_vmaxvq_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (int8x16_t __a, int8x16_t __b)
> -{
> - return __arm_vmaxq_s8 (__a, __b);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90 (int8x16_t __a, int8x16_t __b)
> @@ -16030,13 +15708,6 @@ __arm_vminvq (uint16_t __a, uint16x8_t __b)
>   return __arm_vminvq_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (uint16x8_t __a, uint16x8_t __b)
> -{
> - return __arm_vminq_u16 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (uint16_t __a, uint16x8_t __b)
> @@ -16044,13 +15715,6 @@ __arm_vmaxvq (uint16_t __a, uint16x8_t __b)
>   return __arm_vmaxvq_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (uint16x8_t __a, uint16x8_t __b)
> -{
> - return __arm_vmaxq_u16 (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (uint16x8_t __a, uint16_t __b)
> @@ -16324,13 +15988,6 @@ __arm_vminvq (int16_t __a, int16x8_t __b)
>   return __arm_vminvq_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vminq_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (int16_t __a, int16x8_t __b)
> @@ -16338,13 +15995,6 @@ __arm_vmaxvq (int16_t __a, int16x8_t __b)
>   return __arm_vmaxvq_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (int16x8_t __a, int16x8_t __b)
> -{
> - return __arm_vmaxq_s16 (__a, __b);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90 (int16x8_t __a, int16x8_t __b)
> @@ -16436,13 +16086,6 @@ __arm_vminvq (uint32_t __a, uint32x4_t __b)
>   return __arm_vminvq_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (uint32x4_t __a, uint32x4_t __b)
> -{
> - return __arm_vminq_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline uint32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (uint32_t __a, uint32x4_t __b)
> @@ -16450,13 +16093,6 @@ __arm_vmaxvq (uint32_t __a, uint32x4_t __b)
>   return __arm_vmaxvq_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (uint32x4_t __a, uint32x4_t __b)
> -{
> - return __arm_vmaxq_u32 (__a, __b);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (uint32x4_t __a, uint32_t __b)
> @@ -16730,13 +16366,6 @@ __arm_vminvq (int32_t __a, int32x4_t __b)
>   return __arm_vminvq_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vminq_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmaxvq (int32_t __a, int32x4_t __b)
> @@ -16744,13 +16373,6 @@ __arm_vmaxvq (int32_t __a, int32x4_t __b)
>   return __arm_vmaxvq_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq (int32x4_t __a, int32x4_t __b)
> -{
> - return __arm_vmaxq_s32 (__a, __b);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vhcaddq_rot90 (int32x4_t __a, int32x4_t __b)
> @@ -20020,90 +19642,6 @@ __arm_vhcaddq_rot90_m (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_p
>   return __arm_vhcaddq_rot90_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vmaxq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_s8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_s32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_s16 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_u8 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_u32 (__inactive, __a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b,
> mve_pred16_t __p)
> -{
> - return __arm_vminq_m_u16 (__inactive, __a, __b, __p);
> -}
> -
>  __extension__ extern __inline int32_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmladavaq_p (int32_t __a, int8x16_t __b, int8x16_t __c,
> mve_pred16_t __p)
> @@ -22806,90 +22344,6 @@ __arm_viwdupq_x_u32 (uint32_t *__a,
> uint32_t __b, const int __imm, mve_pred16_t
>   return __arm_viwdupq_x_wb_u32 (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_s8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vminq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vminq_x_u32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_s8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_s16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_u8 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_u16 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmaxq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vmaxq_x_u32 (__a, __b, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vabsq_x (int8x16_t __a, mve_pred16_t __p)
> @@ -27274,16 +26728,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vhcaddq_rot90_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
>    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vhcaddq_rot90_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> 
> -#define __arm_vminq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vminq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vminq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vminq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vminq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vminaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27291,16 +26735,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> 
> -#define __arm_vmaxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmaxq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmaxq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmaxq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmaxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmaxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmaxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vmaxaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28867,16 +28301,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmullbq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmullbq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> 
> -#define __arm_vminq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vminq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vminq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vminq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vminq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vminaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28884,16 +28308,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminaq_s16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminaq_s32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)));})
> 
> -#define __arm_vmaxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmaxq_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmaxq_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmaxq_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmaxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmaxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmaxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
>  #define __arm_vmaxaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -30608,28 +30022,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3));})
> 
> -#define __arm_vmaxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vmaxq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vmaxq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vmaxq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_uint8x16_t]: __arm_vmaxq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_uint16x8_t]: __arm_vmaxq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vmaxq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> -#define __arm_vminq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> -  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve
> _type_int8x16_t]: __arm_vminq_m_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int16x8_t]: __arm_vminq_m_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int32x4_t]: __arm_vminq_m_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32x4_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_m
> ve_type_uint8x16_t]: __arm_vminq_m_u8 (__ARM_mve_coerce(__p0,
> uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_m
> ve_type_uint16x8_t]: __arm_vminq_m_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_m
> ve_type_uint32x4_t]: __arm_vminq_m_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
>  #define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -31068,26 +30460,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]:
> __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
>    int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]:
> __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
> 
> -#define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vmaxq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vmaxq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vmaxq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmaxq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmaxq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmaxq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
> -#define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  __typeof(p2) __p2 = (p2); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vminq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2, int8x16_t), p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vminq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2, int16x8_t), p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vminq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2, int32x4_t), p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vminq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t),
> __ARM_mve_coerce(__p2, uint8x16_t), p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vminq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16x8_t), p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vminq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32x4_t), p3));})
> -
>  #define __arm_vminvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape
  2023-05-05  8:39 ` [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape Christophe Lyon
@ 2023-05-05 11:00   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:00 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape
> 
> This patch adds the binary_rshift_narrow shape description.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_rshift_narrow):
> 	New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_rshift_narrow):
> New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 47 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 48 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index 4ecb612ece5..88934e1ca15 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -617,6 +617,53 @@ struct binary_lshift_r_def : public
> overloaded_base<0>
>  };
>  SHAPE (binary_lshift_r)
> 
> +/* <T0:half>_t vfoo[_n_t0](<T0:half>_t, <T0>_t, const int)
> +
> +   Narrowing right shifts.
> +   Check that 'imm' is in the [1..#bits/2] range.
> +
> +   Example: vqrshrnbq.
> +   int8x16_t [__arm_]vqrshrnbq[_n_s16](int8x16_t a, int16x8_t b, const int
> imm)
> +   int8x16_t [__arm_]vqrshrnbq_m[_n_s16](int8x16_t a, int16x8_t b, const int
> imm, mve_pred16_t p)  */
> +struct binary_rshift_narrow_def : public overloaded_base<0>
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_n,
> preserve_user_namespace);
> +    build_all (b, "vh0,vh0,v0,ss32", group, MODE_n,
> preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (3, i, nargs)
> +	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
> +	|| !r.require_integer_immediate (i))
> +      return error_mark_node;
> +
> +    type_suffix_index narrow_suffix
> +      = find_type_suffix (type_suffixes[type].tclass,
> +			  type_suffixes[type].element_bits / 2);
> +
> +    if (!r.require_matching_vector_type (0, narrow_suffix))
> +      return error_mark_node;
> +
> +    return r.resolve_to (r.mode_suffix_id, type);
> +  }
> +
> +  bool
> +  check (function_checker &c) const override
> +  {
> +    unsigned int bits = c.type_suffix (0).element_bits;
> +    return c.require_immediate_range (2, 1, bits / 2);
> +  }
> +};
> +SHAPE (binary_rshift_narrow)
> +
>  /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
> 
>     where there are N arguments in total.
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index 25d9b60a670..d72686d187b 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -40,6 +40,7 @@ namespace arm_mve
>      extern const function_shape *const binary_opt_n;
>      extern const function_shape *const binary_orrq;
>      extern const function_shape *const binary_round_lshift;
> +    extern const function_shape *const binary_rshift_narrow;
>      extern const function_shape *const create;
>      extern const function_shape *const inherent;
>      extern const function_shape *const unary_convert;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
  2023-05-05  8:39 ` [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq Christophe Lyon
@ 2023-05-05 11:00   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:00 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq
> vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
> 
> Factorize vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq, vshrntq, vshrnbq,
> vrshrnbq and vrshrntq so that they use the same pattern.
> 
> Introduce <isu> iterator for *shrn* so that we can use the same
> pattern despite the different "s", "u" and "i" suffixes.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_SHRN_N, MVE_SHRN_M_N): New.
> 	(mve_insn): Add vqrshrnb, vqrshrnt, vqshrnb, vqshrnt, vrshrnb,
> 	vrshrnt, vshrnb, vshrnt.
> 	(isu): New.
> 	* config/arm/mve.md (mve_vqrshrnbq_n_<supf><mode>)
> 	(mve_vqrshrntq_n_<supf><mode>,
> mve_vqshrnbq_n_<supf><mode>)
> 	(mve_vqshrntq_n_<supf><mode>, mve_vrshrnbq_n_<supf><mode>)
> 	(mve_vrshrntq_n_<supf><mode>, mve_vshrnbq_n_<supf><mode>)
> 	(mve_vshrntq_n_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
> 	(mve_vqrshrnbq_m_n_<supf><mode>,
> mve_vqrshrntq_m_n_<supf><mode>)
> 	(mve_vqshrnbq_m_n_<supf><mode>,
> mve_vqshrntq_m_n_<supf><mode>)
> 	(mve_vrshrnbq_m_n_<supf><mode>,
> mve_vrshrntq_m_n_<supf><mode>)
> 	(mve_vshrnbq_m_n_<supf><mode>,
> mve_vshrntq_m_n_<supf><mode>):
> 	Merge into ...
> 	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
> ---
>  gcc/config/arm/iterators.md |  57 ++++++++
>  gcc/config/arm/mve.md       | 270 ++++--------------------------------
>  2 files changed, 85 insertions(+), 242 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 9ff61e0573b..d64c924a513 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -468,6 +468,28 @@ (define_int_iterator MVE_RSHIFT_N   [
>  		     VRSHLQ_N_S VRSHLQ_N_U
>  		     ])
> 
> +(define_int_iterator MVE_SHRN_N [
> +		     VQRSHRNBQ_N_S VQRSHRNBQ_N_U
> +		     VQRSHRNTQ_N_S VQRSHRNTQ_N_U
> +		     VQSHRNBQ_N_S VQSHRNBQ_N_U
> +		     VQSHRNTQ_N_S VQSHRNTQ_N_U
> +		     VRSHRNBQ_N_S VRSHRNBQ_N_U
> +		     VRSHRNTQ_N_S VRSHRNTQ_N_U
> +		     VSHRNBQ_N_S VSHRNBQ_N_U
> +		     VSHRNTQ_N_S VSHRNTQ_N_U
> +		     ])
> +
> +(define_int_iterator MVE_SHRN_M_N [
> +		     VQRSHRNBQ_M_N_S VQRSHRNBQ_M_N_U
> +		     VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U
> +		     VQSHRNBQ_M_N_S VQSHRNBQ_M_N_U
> +		     VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U
> +		     VRSHRNBQ_M_N_S VRSHRNBQ_M_N_U
> +		     VRSHRNTQ_M_N_S VRSHRNTQ_M_N_U
> +		     VSHRNBQ_M_N_S VSHRNBQ_M_N_U
> +		     VSHRNTQ_M_N_S VSHRNTQ_M_N_U
> +		     ])
> +
>  (define_int_iterator MVE_FP_M_BINARY   [
>  		     VABDQ_M_F
>  		     VADDQ_M_F
> @@ -568,12 +590,20 @@ (define_int_attr mve_insn [
>  		 (VQRSHLQ_M_S "vqrshl") (VQRSHLQ_M_U "vqrshl")
>  		 (VQRSHLQ_N_S "vqrshl") (VQRSHLQ_N_U "vqrshl")
>  		 (VQRSHLQ_S "vqrshl") (VQRSHLQ_U "vqrshl")
> +		 (VQRSHRNBQ_M_N_S "vqrshrnb") (VQRSHRNBQ_M_N_U
> "vqrshrnb")
> +		 (VQRSHRNBQ_N_S "vqrshrnb") (VQRSHRNBQ_N_U
> "vqrshrnb")
> +		 (VQRSHRNTQ_M_N_S "vqrshrnt") (VQRSHRNTQ_M_N_U
> "vqrshrnt")
> +		 (VQRSHRNTQ_N_S "vqrshrnt") (VQRSHRNTQ_N_U "vqrshrnt")
>  		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
>  		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
>  		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
>  		 (VQSHLQ_N_S "vqshl") (VQSHLQ_N_U "vqshl")
>  		 (VQSHLQ_R_S "vqshl") (VQSHLQ_R_U "vqshl")
>  		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
> +		 (VQSHRNBQ_M_N_S "vqshrnb") (VQSHRNBQ_M_N_U
> "vqshrnb")
> +		 (VQSHRNBQ_N_S "vqshrnb") (VQSHRNBQ_N_U "vqshrnb")
> +		 (VQSHRNTQ_M_N_S "vqshrnt") (VQSHRNTQ_M_N_U
> "vqshrnt")
> +		 (VQSHRNTQ_N_S "vqshrnt") (VQSHRNTQ_N_U "vqshrnt")
>  		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
>  		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
>  		 (VQSUBQ_N_S "vqsub") (VQSUBQ_N_U "vqsub")
> @@ -586,17 +616,44 @@ (define_int_attr mve_insn [
>  		 (VRSHLQ_M_S "vrshl") (VRSHLQ_M_U "vrshl")
>  		 (VRSHLQ_N_S "vrshl") (VRSHLQ_N_U "vrshl")
>  		 (VRSHLQ_S "vrshl") (VRSHLQ_U "vrshl")
> +		 (VRSHRNBQ_M_N_S "vrshrnb") (VRSHRNBQ_M_N_U
> "vrshrnb")
> +		 (VRSHRNBQ_N_S "vrshrnb") (VRSHRNBQ_N_U "vrshrnb")
> +		 (VRSHRNTQ_M_N_S "vrshrnt") (VRSHRNTQ_M_N_U
> "vrshrnt")
> +		 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
>  		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
>  		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
>  		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
>  		 (VSHLQ_N_S "vshl") (VSHLQ_N_U "vshl")
>  		 (VSHLQ_R_S "vshl") (VSHLQ_R_U "vshl")
>  		 (VSHLQ_S "vshl") (VSHLQ_U "vshl")
> +		 (VSHRNBQ_M_N_S "vshrnb") (VSHRNBQ_M_N_U "vshrnb")
> +		 (VSHRNBQ_N_S "vshrnb") (VSHRNBQ_N_U "vshrnb")
> +		 (VSHRNTQ_M_N_S "vshrnt") (VSHRNTQ_M_N_U "vshrnt")
> +		 (VSHRNTQ_N_S "vshrnt") (VSHRNTQ_N_U "vshrnt")
>  		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub")
> (VSUBQ_M_N_F "vsub")
>  		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F
> "vsub")
>  		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F
> "vsub")
>  		 ])
> 
> +(define_int_attr isu    [
> +		 (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
> +		 (VQRSHRNBQ_N_S "s") (VQRSHRNBQ_N_U "u")
> +		 (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
> +		 (VQRSHRNTQ_N_S "s") (VQRSHRNTQ_N_U "u")
> +		 (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
> +		 (VQSHRNBQ_N_S "s") (VQSHRNBQ_N_U "u")
> +		 (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
> +		 (VQSHRNTQ_N_S "s") (VQSHRNTQ_N_U "u")
> +		 (VRSHRNBQ_M_N_S "i") (VRSHRNBQ_M_N_U "i")
> +		 (VRSHRNBQ_N_S "i") (VRSHRNBQ_N_U "i")
> +		 (VRSHRNTQ_M_N_S "i") (VRSHRNTQ_M_N_U "i")
> +		 (VRSHRNTQ_N_S "i") (VRSHRNTQ_N_U "i")
> +		 (VSHRNBQ_M_N_S "i") (VSHRNBQ_M_N_U "i")
> +		 (VSHRNBQ_N_S "i") (VSHRNBQ_N_U "i")
> +		 (VSHRNTQ_M_N_S "i") (VSHRNTQ_M_N_U "i")
> +		 (VSHRNTQ_N_S "i") (VSHRNTQ_N_U "i")
> +		 ])
> +
>  ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
>  ;; a stack pointer operand.  The minus operation is a candidate for an rsub
>  ;; and hence only plus is supported.
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 44409b40e5f..d64a075c7bb 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -2162,21 +2162,30 @@ (define_insn
> "mve_vcvtq_m_to_f_<supf><mode>"
>    "vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>	 %q0, %q2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> +
>  ;;
> -;; [vqrshrnbq_n_u, vqrshrnbq_n_s])
> +;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
> +;; [vqrshrntq_n_u, vqrshrntq_n_s]
> +;; [vqshrnbq_n_u, vqshrnbq_n_s]
> +;; [vqshrntq_n_u, vqshrntq_n_s]
> +;; [vrshrnbq_n_s, vrshrnbq_n_u]
> +;; [vrshrntq_n_u, vrshrntq_n_s]
> +;; [vshrnbq_n_u, vshrnbq_n_s]
> +;; [vshrntq_n_s, vshrntq_n_u]
>  ;;
> -(define_insn "mve_vqrshrnbq_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
>    [
>     (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
>  	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
>  				 (match_operand:MVE_5 2
> "s_register_operand" "w")
>  				 (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQRSHRNBQ_N))
> +	 MVE_SHRN_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vqrshrnb.<supf>%#<V_sz_elem>	%q0, %q2, %3"
> +  "<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
>    [(set_attr "type" "mve_move")
>  ])
> +
>  ;;
>  ;; [vqrshrunbq_n_s])
>  ;;
> @@ -2192,6 +2201,7 @@ (define_insn "mve_vqrshrunbq_n_s<mode>"
>    "vqrshrunb.s%#<V_sz_elem>\t%q0, %q2, %3"
>    [(set_attr "type" "mve_move")
>  ])
> +
>  ;;
>  ;; [vrmlaldavhaq_s vrmlaldavhaq_u])
>  ;;
> @@ -3992,22 +4002,6 @@ (define_insn "mve_vqmovuntq_m_s<mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vqrshrntq_n_u, vqrshrntq_n_s])
> -;;
> -(define_insn "mve_vqrshrntq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQRSHRNTQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqrshrnt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vqrshruntq_n_s])
>  ;;
> @@ -4024,38 +4018,6 @@ (define_insn "mve_vqrshruntq_n_s<mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vqshrnbq_n_u, vqshrnbq_n_s])
> -;;
> -(define_insn "mve_vqshrnbq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQSHRNBQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqshrnb.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vqshrntq_n_u, vqshrntq_n_s])
> -;;
> -(define_insn "mve_vqshrntq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQSHRNTQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqshrnt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vqshrunbq_n_s])
>  ;;
> @@ -4296,70 +4258,6 @@ (define_insn "mve_vrndxq_m_f<mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vrshrnbq_n_s, vrshrnbq_n_u])
> -;;
> -(define_insn "mve_vrshrnbq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VRSHRNBQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vrshrnb.i%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vrshrntq_n_u, vrshrntq_n_s])
> -;;
> -(define_insn "mve_vrshrntq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VRSHRNTQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vrshrnt.i%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vshrnbq_n_u, vshrnbq_n_s])
> -;;
> -(define_insn "mve_vshrnbq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VSHRNBQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vshrnb.i%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vshrntq_n_s, vshrntq_n_u])
> -;;
> -(define_insn "mve_vshrntq_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -				 (match_operand:MVE_5 2
> "s_register_operand" "w")
> -				 (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VSHRNTQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vshrnt.i%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vcvtmq_m_s, vcvtmq_m_u])
>  ;;
> @@ -4992,70 +4890,26 @@ (define_insn
> "mve_vmlaldavaxq_p_<supf><mode>"
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s])
> -;;
> -(define_insn "mve_vqrshrnbq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQRSHRNBQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqrshrnbt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u])
> -;;
> -(define_insn "mve_vqrshrntq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQRSHRNTQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqrshrntt.<supf>%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s])
> -;;
> -(define_insn "mve_vqshrnbq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQSHRNBQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\n\tvqshrnbt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqshrntq_m_n_s, vqshrntq_m_n_u])
> +;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s]
> +;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u]
> +;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s]
> +;; [vqshrntq_m_n_s, vqshrntq_m_n_u]
> +;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s]
> +;; [vrshrntq_m_n_u, vrshrntq_m_n_s]
> +;; [vshrnbq_m_n_s, vshrnbq_m_n_u]
> +;; [vshrntq_m_n_s, vshrntq_m_n_u]
>  ;;
> -(define_insn "mve_vqshrntq_m_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
>    [
>     (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
>  	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQSHRNTQ_M_N))
> +				 (match_operand:MVE_5 2
> "s_register_operand" "w")
> +				 (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> +				 (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> +	 MVE_SHRN_M_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vqshrntt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> +  "vpst\;<mve_insn>t.<isu>%#<V_sz_elem>\t%q0, %q2, %3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -5076,40 +4930,6 @@ (define_insn "mve_vrmlaldavhaq_p_sv4si"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s])
> -;;
> -(define_insn "mve_vrshrnbq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VRSHRNBQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vrshrnbt.i%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vrshrntq_m_n_u, vrshrntq_m_n_s])
> -;;
> -(define_insn "mve_vrshrntq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VRSHRNTQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vrshrntt.i%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vshllbq_m_n_u, vshllbq_m_n_s])
>  ;;
> @@ -5144,40 +4964,6 @@ (define_insn "mve_vshlltq_m_n_<supf><mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vshrnbq_m_n_s, vshrnbq_m_n_u])
> -;;
> -(define_insn "mve_vshrnbq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VSHRNBQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vshrnbt.i%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vshrntq_m_n_s, vshrntq_m_n_u])
> -;;
> -(define_insn "mve_vshrntq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VSHRNTQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vshrntt.i%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vmlsldavaq_p_s])
>  ;;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
  2023-05-05  8:39 ` [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq " Christophe Lyon
@ 2023-05-05 11:02   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:02 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq
> vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq
> 
> Implement vshrnbq, vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq,
> vqrshrnbq, vqrshrntq using the new MVE builtins framework.

Ok with a style nit...

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (FUNCTION_ONLY_N_NO_F):
> New.
> 	(vshrnbq, vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq)
> 	(vqrshrnbq, vqrshrntq): New.
> 	* config/arm/arm-mve-builtins-base.def (vshrnbq, vshrntq)
> 	(vrshrnbq, vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq):
> 	New.
> 	* config/arm/arm-mve-builtins-base.h (vshrnbq, vshrntq, vrshrnbq)
> 	(vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq, vqrshrntq): New.
> 	* config/arm/arm-mve-builtins.cc
> 	(function_instance::has_inactive_argument): Handle vshrnbq,
> 	vshrntq, vrshrnbq, vrshrntq, vqshrnbq, vqshrntq, vqrshrnbq,
> 	vqrshrntq.
> 	* config/arm/arm_mve.h (vshrnbq): Remove.
> 	(vshrntq): Remove.
> 	(vshrnbq_m): Remove.
> 	(vshrntq_m): Remove.
> 	(vshrnbq_n_s16): Remove.
> 	(vshrntq_n_s16): Remove.
> 	(vshrnbq_n_u16): Remove.
> 	(vshrntq_n_u16): Remove.
> 	(vshrnbq_n_s32): Remove.
> 	(vshrntq_n_s32): Remove.
> 	(vshrnbq_n_u32): Remove.
> 	(vshrntq_n_u32): Remove.
> 	(vshrnbq_m_n_s32): Remove.
> 	(vshrnbq_m_n_s16): Remove.
> 	(vshrnbq_m_n_u32): Remove.
> 	(vshrnbq_m_n_u16): Remove.
> 	(vshrntq_m_n_s32): Remove.
> 	(vshrntq_m_n_s16): Remove.
> 	(vshrntq_m_n_u32): Remove.
> 	(vshrntq_m_n_u16): Remove.
> 	(__arm_vshrnbq_n_s16): Remove.
> 	(__arm_vshrntq_n_s16): Remove.
> 	(__arm_vshrnbq_n_u16): Remove.
> 	(__arm_vshrntq_n_u16): Remove.
> 	(__arm_vshrnbq_n_s32): Remove.
> 	(__arm_vshrntq_n_s32): Remove.
> 	(__arm_vshrnbq_n_u32): Remove.
> 	(__arm_vshrntq_n_u32): Remove.
> 	(__arm_vshrnbq_m_n_s32): Remove.
> 	(__arm_vshrnbq_m_n_s16): Remove.
> 	(__arm_vshrnbq_m_n_u32): Remove.
> 	(__arm_vshrnbq_m_n_u16): Remove.
> 	(__arm_vshrntq_m_n_s32): Remove.
> 	(__arm_vshrntq_m_n_s16): Remove.
> 	(__arm_vshrntq_m_n_u32): Remove.
> 	(__arm_vshrntq_m_n_u16): Remove.
> 	(__arm_vshrnbq): Remove.
> 	(__arm_vshrntq): Remove.
> 	(__arm_vshrnbq_m): Remove.
> 	(__arm_vshrntq_m): Remove.
> 	(vrshrnbq): Remove.
> 	(vrshrntq): Remove.
> 	(vrshrnbq_m): Remove.
> 	(vrshrntq_m): Remove.
> 	(vrshrnbq_n_s16): Remove.
> 	(vrshrntq_n_s16): Remove.
> 	(vrshrnbq_n_u16): Remove.
> 	(vrshrntq_n_u16): Remove.
> 	(vrshrnbq_n_s32): Remove.
> 	(vrshrntq_n_s32): Remove.
> 	(vrshrnbq_n_u32): Remove.
> 	(vrshrntq_n_u32): Remove.
> 	(vrshrnbq_m_n_s32): Remove.
> 	(vrshrnbq_m_n_s16): Remove.
> 	(vrshrnbq_m_n_u32): Remove.
> 	(vrshrnbq_m_n_u16): Remove.
> 	(vrshrntq_m_n_s32): Remove.
> 	(vrshrntq_m_n_s16): Remove.
> 	(vrshrntq_m_n_u32): Remove.
> 	(vrshrntq_m_n_u16): Remove.
> 	(__arm_vrshrnbq_n_s16): Remove.
> 	(__arm_vrshrntq_n_s16): Remove.
> 	(__arm_vrshrnbq_n_u16): Remove.
> 	(__arm_vrshrntq_n_u16): Remove.
> 	(__arm_vrshrnbq_n_s32): Remove.
> 	(__arm_vrshrntq_n_s32): Remove.
> 	(__arm_vrshrnbq_n_u32): Remove.
> 	(__arm_vrshrntq_n_u32): Remove.
> 	(__arm_vrshrnbq_m_n_s32): Remove.
> 	(__arm_vrshrnbq_m_n_s16): Remove.
> 	(__arm_vrshrnbq_m_n_u32): Remove.
> 	(__arm_vrshrnbq_m_n_u16): Remove.
> 	(__arm_vrshrntq_m_n_s32): Remove.
> 	(__arm_vrshrntq_m_n_s16): Remove.
> 	(__arm_vrshrntq_m_n_u32): Remove.
> 	(__arm_vrshrntq_m_n_u16): Remove.
> 	(__arm_vrshrnbq): Remove.
> 	(__arm_vrshrntq): Remove.
> 	(__arm_vrshrnbq_m): Remove.
> 	(__arm_vrshrntq_m): Remove.
> 	(vqshrnbq): Remove.
> 	(vqshrntq): Remove.
> 	(vqshrnbq_m): Remove.
> 	(vqshrntq_m): Remove.
> 	(vqshrnbq_n_s16): Remove.
> 	(vqshrntq_n_s16): Remove.
> 	(vqshrnbq_n_u16): Remove.
> 	(vqshrntq_n_u16): Remove.
> 	(vqshrnbq_n_s32): Remove.
> 	(vqshrntq_n_s32): Remove.
> 	(vqshrnbq_n_u32): Remove.
> 	(vqshrntq_n_u32): Remove.
> 	(vqshrnbq_m_n_s32): Remove.
> 	(vqshrnbq_m_n_s16): Remove.
> 	(vqshrnbq_m_n_u32): Remove.
> 	(vqshrnbq_m_n_u16): Remove.
> 	(vqshrntq_m_n_s32): Remove.
> 	(vqshrntq_m_n_s16): Remove.
> 	(vqshrntq_m_n_u32): Remove.
> 	(vqshrntq_m_n_u16): Remove.
> 	(__arm_vqshrnbq_n_s16): Remove.
> 	(__arm_vqshrntq_n_s16): Remove.
> 	(__arm_vqshrnbq_n_u16): Remove.
> 	(__arm_vqshrntq_n_u16): Remove.
> 	(__arm_vqshrnbq_n_s32): Remove.
> 	(__arm_vqshrntq_n_s32): Remove.
> 	(__arm_vqshrnbq_n_u32): Remove.
> 	(__arm_vqshrntq_n_u32): Remove.
> 	(__arm_vqshrnbq_m_n_s32): Remove.
> 	(__arm_vqshrnbq_m_n_s16): Remove.
> 	(__arm_vqshrnbq_m_n_u32): Remove.
> 	(__arm_vqshrnbq_m_n_u16): Remove.
> 	(__arm_vqshrntq_m_n_s32): Remove.
> 	(__arm_vqshrntq_m_n_s16): Remove.
> 	(__arm_vqshrntq_m_n_u32): Remove.
> 	(__arm_vqshrntq_m_n_u16): Remove.
> 	(__arm_vqshrnbq): Remove.
> 	(__arm_vqshrntq): Remove.
> 	(__arm_vqshrnbq_m): Remove.
> 	(__arm_vqshrntq_m): Remove.
> 	(vqrshrnbq): Remove.
> 	(vqrshrntq): Remove.
> 	(vqrshrnbq_m): Remove.
> 	(vqrshrntq_m): Remove.
> 	(vqrshrnbq_n_s16): Remove.
> 	(vqrshrnbq_n_u16): Remove.
> 	(vqrshrnbq_n_s32): Remove.
> 	(vqrshrnbq_n_u32): Remove.
> 	(vqrshrntq_n_s16): Remove.
> 	(vqrshrntq_n_u16): Remove.
> 	(vqrshrntq_n_s32): Remove.
> 	(vqrshrntq_n_u32): Remove.
> 	(vqrshrnbq_m_n_s32): Remove.
> 	(vqrshrnbq_m_n_s16): Remove.
> 	(vqrshrnbq_m_n_u32): Remove.
> 	(vqrshrnbq_m_n_u16): Remove.
> 	(vqrshrntq_m_n_s32): Remove.
> 	(vqrshrntq_m_n_s16): Remove.
> 	(vqrshrntq_m_n_u32): Remove.
> 	(vqrshrntq_m_n_u16): Remove.
> 	(__arm_vqrshrnbq_n_s16): Remove.
> 	(__arm_vqrshrnbq_n_u16): Remove.
> 	(__arm_vqrshrnbq_n_s32): Remove.
> 	(__arm_vqrshrnbq_n_u32): Remove.
> 	(__arm_vqrshrntq_n_s16): Remove.
> 	(__arm_vqrshrntq_n_u16): Remove.
> 	(__arm_vqrshrntq_n_s32): Remove.
> 	(__arm_vqrshrntq_n_u32): Remove.
> 	(__arm_vqrshrnbq_m_n_s32): Remove.
> 	(__arm_vqrshrnbq_m_n_s16): Remove.
> 	(__arm_vqrshrnbq_m_n_u32): Remove.
> 	(__arm_vqrshrnbq_m_n_u16): Remove.
> 	(__arm_vqrshrntq_m_n_s32): Remove.
> 	(__arm_vqrshrntq_m_n_s16): Remove.
> 	(__arm_vqrshrntq_m_n_u32): Remove.
> 	(__arm_vqrshrntq_m_n_u16): Remove.
> 	(__arm_vqrshrnbq): Remove.
> 	(__arm_vqrshrntq): Remove.
> 	(__arm_vqrshrnbq_m): Remove.
> 	(__arm_vqrshrntq_m): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |   17 +
>  gcc/config/arm/arm-mve-builtins-base.def |    8 +
>  gcc/config/arm/arm-mve-builtins-base.h   |    8 +
>  gcc/config/arm/arm-mve-builtins.cc       |   11 +-
>  gcc/config/arm/arm_mve.h                 | 1196 +---------------------
>  5 files changed, 65 insertions(+), 1175 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index 1839d5cb1a5..c95abe70239 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -175,6 +175,15 @@ namespace arm_mve {
>      UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,
> 	\
>      -1, -1, -1))
> 
> +  /* Helper for builtins with only unspec codes, _m predicated
> +     overrides, only _n version, no floating-point.  */
> +#define FUNCTION_ONLY_N_NO_F(NAME, UNSPEC) FUNCTION
> 		\
> +  (NAME, unspec_mve_function_exact_insn,				\
> +   (-1, -1, -1,								\
> +    UNSPEC##_N_S, UNSPEC##_N_U, -1,					\
> +    -1, -1, -1,								\
> +    UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
> +
>  FUNCTION_WITHOUT_N (vabdq, VABDQ)
>  FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
>  FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
> @@ -192,12 +201,20 @@ FUNCTION_WITH_M_N_NO_U_F (vqdmulhq,
> VQDMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
>  FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
>  FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
> +FUNCTION_ONLY_N_NO_F (vqrshrnbq, VQRSHRNBQ)
> +FUNCTION_ONLY_N_NO_F (vqrshrntq, VQRSHRNTQ)
> +FUNCTION_ONLY_N_NO_F (vqshrnbq, VQSHRNBQ)
> +FUNCTION_ONLY_N_NO_F (vqshrntq, VQSHRNTQ)
>  FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
>  FUNCTION (vreinterpretq, vreinterpretq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
>  FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
> +FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
> +FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
>  FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
> +FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
> +FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
>  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
>  FUNCTION (vuninitializedq, vuninitializedq_impl,)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 3b42bf46e81..3dd40086663 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -34,15 +34,23 @@ DEF_MVE_FUNCTION (vqaddq, binary_opt_n,
> all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vqrshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vqrshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vqshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vqshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer,
> none)
>  DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r"
> forms do not support the "x" predicate
> +DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index 81d10f4a8f4..9e11ac83681 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -39,13 +39,21 @@ extern const function_base *const vqaddq;
>  extern const function_base *const vqdmulhq;
>  extern const function_base *const vqrdmulhq;
>  extern const function_base *const vqrshlq;
> +extern const function_base *const vqrshrnbq;
> +extern const function_base *const vqrshrntq;
>  extern const function_base *const vqshlq;
> +extern const function_base *const vqshrnbq;
> +extern const function_base *const vqshrntq;
>  extern const function_base *const vqsubq;
>  extern const function_base *const vreinterpretq;
>  extern const function_base *const vrhaddq;
>  extern const function_base *const vrmulhq;
>  extern const function_base *const vrshlq;
> +extern const function_base *const vrshrnbq;
> +extern const function_base *const vrshrntq;
>  extern const function_base *const vshlq;
> +extern const function_base *const vshrnbq;
> +extern const function_base *const vshrntq;
>  extern const function_base *const vsubq;
>  extern const function_base *const vuninitializedq;
> 
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index c25b1be9903..667bbc58483 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -672,7 +672,16 @@ function_instance::has_inactive_argument () const
>    if (mode_suffix_id == MODE_r
>        || (base == functions::vorrq && mode_suffix_id == MODE_n)
>        || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
> -      || (base == functions::vrshlq && mode_suffix_id == MODE_n))
> +      || base == functions::vqrshrnbq
> +      || base == functions::vqrshrntq
> +      || base == functions::vqshrnbq
> +      || base == functions::vqshrntq
> +      || (base == functions::vrshlq && mode_suffix_id == MODE_n)
> +      || base == functions::vrshrnbq
> +      || base == functions::vrshrntq
> +      || base == functions::vshrnbq
> +      || base == functions::vshrntq
> +      )

... The ')' should be on the previous line.
Thanks,
Kyrill

>      return false;
> 
>    return true;
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 5fbea52c8ef..ed7852e2460 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -113,7 +113,6 @@
>  #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
>  #define vabavq(__a, __b, __c) __arm_vabavq(__a, __b, __c)
>  #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
> -#define vqrshrnbq(__a, __b, __imm) __arm_vqrshrnbq(__a, __b, __imm)
>  #define vqrshrunbq(__a, __b, __imm) __arm_vqrshrunbq(__a, __b, __imm)
>  #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
>  #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
> @@ -176,13 +175,6 @@
>  #define vrmlaldavhxq_p(__a, __b, __p) __arm_vrmlaldavhxq_p(__a, __b,
> __p)
>  #define vrmlsldavhq_p(__a, __b, __p) __arm_vrmlsldavhq_p(__a, __b, __p)
>  #define vrmlsldavhxq_p(__a, __b, __p) __arm_vrmlsldavhxq_p(__a, __b,
> __p)
> -#define vqrshrntq(__a, __b, __imm) __arm_vqrshrntq(__a, __b, __imm)
> -#define vqshrnbq(__a, __b, __imm) __arm_vqshrnbq(__a, __b, __imm)
> -#define vqshrntq(__a, __b, __imm) __arm_vqshrntq(__a, __b, __imm)
> -#define vrshrnbq(__a, __b, __imm) __arm_vrshrnbq(__a, __b, __imm)
> -#define vrshrntq(__a, __b, __imm) __arm_vrshrntq(__a, __b, __imm)
> -#define vshrnbq(__a, __b, __imm) __arm_vshrnbq(__a, __b, __imm)
> -#define vshrntq(__a, __b, __imm) __arm_vshrntq(__a, __b, __imm)
>  #define vmlaldavaq(__a, __b, __c) __arm_vmlaldavaq(__a, __b, __c)
>  #define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c)
>  #define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c)
> @@ -244,24 +236,16 @@
>  #define vmulltq_poly_m(__inactive, __a, __b, __p)
> __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
>  #define vqdmullbq_m(__inactive, __a, __b, __p)
> __arm_vqdmullbq_m(__inactive, __a, __b, __p)
>  #define vqdmulltq_m(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m(__inactive, __a, __b, __p)
> -#define vqrshrnbq_m(__a, __b, __imm, __p) __arm_vqrshrnbq_m(__a, __b,
> __imm, __p)
> -#define vqrshrntq_m(__a, __b, __imm, __p) __arm_vqrshrntq_m(__a, __b,
> __imm, __p)
>  #define vqrshrunbq_m(__a, __b, __imm, __p) __arm_vqrshrunbq_m(__a,
> __b, __imm, __p)
>  #define vqrshruntq_m(__a, __b, __imm, __p) __arm_vqrshruntq_m(__a,
> __b, __imm, __p)
> -#define vqshrnbq_m(__a, __b, __imm, __p) __arm_vqshrnbq_m(__a, __b,
> __imm, __p)
> -#define vqshrntq_m(__a, __b, __imm, __p) __arm_vqshrntq_m(__a, __b,
> __imm, __p)
>  #define vqshrunbq_m(__a, __b, __imm, __p) __arm_vqshrunbq_m(__a,
> __b, __imm, __p)
>  #define vqshruntq_m(__a, __b, __imm, __p) __arm_vqshruntq_m(__a, __b,
> __imm, __p)
>  #define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a,
> __b, __c, __p)
>  #define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a,
> __b, __c, __p)
>  #define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a,
> __b, __c, __p)
>  #define vrmlsldavhaxq_p(__a, __b, __c, __p) __arm_vrmlsldavhaxq_p(__a,
> __b, __c, __p)
> -#define vrshrnbq_m(__a, __b, __imm, __p) __arm_vrshrnbq_m(__a, __b,
> __imm, __p)
> -#define vrshrntq_m(__a, __b, __imm, __p) __arm_vrshrntq_m(__a, __b,
> __imm, __p)
>  #define vshllbq_m(__inactive, __a, __imm, __p)
> __arm_vshllbq_m(__inactive, __a, __imm, __p)
>  #define vshlltq_m(__inactive, __a, __imm, __p) __arm_vshlltq_m(__inactive,
> __a, __imm, __p)
> -#define vshrnbq_m(__a, __b, __imm, __p) __arm_vshrnbq_m(__a, __b,
> __imm, __p)
> -#define vshrntq_m(__a, __b, __imm, __p) __arm_vshrntq_m(__a, __b,
> __imm, __p)
>  #define vstrbq_scatter_offset(__base, __offset, __value)
> __arm_vstrbq_scatter_offset(__base, __offset, __value)
>  #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
>  #define vstrwq_scatter_base(__addr, __offset, __value)
> __arm_vstrwq_scatter_base(__addr, __offset, __value)
> @@ -905,10 +889,6 @@
>  #define vcvtq_m_f16_u16(__inactive, __a, __p)
> __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
>  #define vcvtq_m_f32_s32(__inactive, __a, __p)
> __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
>  #define vcvtq_m_f32_u32(__inactive, __a, __p)
> __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
> -#define vqrshrnbq_n_s16(__a, __b,  __imm) __arm_vqrshrnbq_n_s16(__a,
> __b,  __imm)
> -#define vqrshrnbq_n_u16(__a, __b,  __imm) __arm_vqrshrnbq_n_u16(__a,
> __b,  __imm)
> -#define vqrshrnbq_n_s32(__a, __b,  __imm) __arm_vqrshrnbq_n_s32(__a,
> __b,  __imm)
> -#define vqrshrnbq_n_u32(__a, __b,  __imm) __arm_vqrshrnbq_n_u32(__a,
> __b,  __imm)
>  #define vqrshrunbq_n_s16(__a, __b,  __imm)
> __arm_vqrshrunbq_n_s16(__a, __b,  __imm)
>  #define vqrshrunbq_n_s32(__a, __b,  __imm)
> __arm_vqrshrunbq_n_s32(__a, __b,  __imm)
>  #define vrmlaldavhaq_s32(__a, __b, __c) __arm_vrmlaldavhaq_s32(__a,
> __b, __c)
> @@ -1167,13 +1147,6 @@
>  #define vrev16q_m_u8(__inactive, __a, __p)
> __arm_vrev16q_m_u8(__inactive, __a, __p)
>  #define vrmlaldavhq_p_u32(__a, __b, __p) __arm_vrmlaldavhq_p_u32(__a,
> __b, __p)
>  #define vmvnq_m_n_s16(__inactive,  __imm, __p)
> __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
> -#define vqrshrntq_n_s16(__a, __b,  __imm) __arm_vqrshrntq_n_s16(__a,
> __b,  __imm)
> -#define vqshrnbq_n_s16(__a, __b,  __imm) __arm_vqshrnbq_n_s16(__a,
> __b,  __imm)
> -#define vqshrntq_n_s16(__a, __b,  __imm) __arm_vqshrntq_n_s16(__a,
> __b,  __imm)
> -#define vrshrnbq_n_s16(__a, __b,  __imm) __arm_vrshrnbq_n_s16(__a,
> __b,  __imm)
> -#define vrshrntq_n_s16(__a, __b,  __imm) __arm_vrshrntq_n_s16(__a, __b,
> __imm)
> -#define vshrnbq_n_s16(__a, __b,  __imm) __arm_vshrnbq_n_s16(__a, __b,
> __imm)
> -#define vshrntq_n_s16(__a, __b,  __imm) __arm_vshrntq_n_s16(__a, __b,
> __imm)
>  #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
>  #define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a,
> __b, __c)
>  #define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a,
> __b, __c)
> @@ -1239,13 +1212,6 @@
>  #define vcvtq_m_u16_f16(__inactive, __a, __p)
> __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
>  #define vqmovunbq_m_s16(__a, __b, __p) __arm_vqmovunbq_m_s16(__a,
> __b, __p)
>  #define vqmovuntq_m_s16(__a, __b, __p) __arm_vqmovuntq_m_s16(__a,
> __b, __p)
> -#define vqrshrntq_n_u16(__a, __b,  __imm) __arm_vqrshrntq_n_u16(__a,
> __b,  __imm)
> -#define vqshrnbq_n_u16(__a, __b,  __imm) __arm_vqshrnbq_n_u16(__a,
> __b,  __imm)
> -#define vqshrntq_n_u16(__a, __b,  __imm) __arm_vqshrntq_n_u16(__a,
> __b,  __imm)
> -#define vrshrnbq_n_u16(__a, __b,  __imm) __arm_vrshrnbq_n_u16(__a,
> __b,  __imm)
> -#define vrshrntq_n_u16(__a, __b,  __imm) __arm_vrshrntq_n_u16(__a, __b,
> __imm)
> -#define vshrnbq_n_u16(__a, __b,  __imm) __arm_vshrnbq_n_u16(__a, __b,
> __imm)
> -#define vshrntq_n_u16(__a, __b,  __imm) __arm_vshrntq_n_u16(__a, __b,
> __imm)
>  #define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b,
> __c)
>  #define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b,
> __p)
>  #define vmovlbq_m_u8(__inactive, __a, __p)
> __arm_vmovlbq_m_u8(__inactive, __a, __p)
> @@ -1256,13 +1222,6 @@
>  #define vqmovntq_m_u16(__a, __b, __p) __arm_vqmovntq_m_u16(__a,
> __b, __p)
>  #define vrev32q_m_u8(__inactive, __a, __p)
> __arm_vrev32q_m_u8(__inactive, __a, __p)
>  #define vmvnq_m_n_s32(__inactive,  __imm, __p)
> __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
> -#define vqrshrntq_n_s32(__a, __b,  __imm) __arm_vqrshrntq_n_s32(__a,
> __b,  __imm)
> -#define vqshrnbq_n_s32(__a, __b,  __imm) __arm_vqshrnbq_n_s32(__a,
> __b,  __imm)
> -#define vqshrntq_n_s32(__a, __b,  __imm) __arm_vqshrntq_n_s32(__a,
> __b,  __imm)
> -#define vrshrnbq_n_s32(__a, __b,  __imm) __arm_vrshrnbq_n_s32(__a,
> __b,  __imm)
> -#define vrshrntq_n_s32(__a, __b,  __imm) __arm_vrshrntq_n_s32(__a, __b,
> __imm)
> -#define vshrnbq_n_s32(__a, __b,  __imm) __arm_vshrnbq_n_s32(__a, __b,
> __imm)
> -#define vshrntq_n_s32(__a, __b,  __imm) __arm_vshrntq_n_s32(__a, __b,
> __imm)
>  #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
>  #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a,
> __b, __c)
>  #define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a,
> __b, __c)
> @@ -1328,13 +1287,6 @@
>  #define vcvtq_m_u32_f32(__inactive, __a, __p)
> __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
>  #define vqmovunbq_m_s32(__a, __b, __p) __arm_vqmovunbq_m_s32(__a,
> __b, __p)
>  #define vqmovuntq_m_s32(__a, __b, __p) __arm_vqmovuntq_m_s32(__a,
> __b, __p)
> -#define vqrshrntq_n_u32(__a, __b,  __imm) __arm_vqrshrntq_n_u32(__a,
> __b,  __imm)
> -#define vqshrnbq_n_u32(__a, __b,  __imm) __arm_vqshrnbq_n_u32(__a,
> __b,  __imm)
> -#define vqshrntq_n_u32(__a, __b,  __imm) __arm_vqshrntq_n_u32(__a,
> __b,  __imm)
> -#define vrshrnbq_n_u32(__a, __b,  __imm) __arm_vrshrnbq_n_u32(__a,
> __b,  __imm)
> -#define vrshrntq_n_u32(__a, __b,  __imm) __arm_vrshrntq_n_u32(__a, __b,
> __imm)
> -#define vshrnbq_n_u32(__a, __b,  __imm) __arm_vshrnbq_n_u32(__a, __b,
> __imm)
> -#define vshrntq_n_u32(__a, __b,  __imm) __arm_vshrntq_n_u32(__a, __b,
> __imm)
>  #define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b,
> __c)
>  #define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b,
> __p)
>  #define vmovlbq_m_u16(__inactive, __a, __p)
> __arm_vmovlbq_m_u16(__inactive, __a, __p)
> @@ -1514,26 +1466,10 @@
>  #define vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
>  #define vqdmulltq_m_s32(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_s32(__inactive, __a, __b, __p)
>  #define vqdmulltq_m_s16(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_s16(__inactive, __a, __b, __p)
> -#define vqrshrnbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshrnbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqrshrnbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshrnbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqrshrnbq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vqrshrnbq_m_n_u32(__a, __b,  __imm, __p)
> -#define vqrshrnbq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vqrshrnbq_m_n_u16(__a, __b,  __imm, __p)
> -#define vqrshrntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshrntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqrshrntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshrntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqrshrntq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vqrshrntq_m_n_u32(__a, __b,  __imm, __p)
> -#define vqrshrntq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vqrshrntq_m_n_u16(__a, __b,  __imm, __p)
>  #define vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
>  #define vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
>  #define vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
>  #define vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqshrnbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshrnbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqshrnbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqshrnbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqshrnbq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vqshrnbq_m_n_u32(__a, __b,  __imm, __p)
> -#define vqshrnbq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vqshrnbq_m_n_u16(__a, __b,  __imm, __p)
> -#define vqshrntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshrntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqshrntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqshrntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqshrntq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vqshrntq_m_n_u32(__a, __b,  __imm, __p)
> -#define vqshrntq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vqshrntq_m_n_u16(__a, __b,  __imm, __p)
>  #define vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
>  #define vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
>  #define vqshruntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshruntq_m_n_s32(__a, __b,  __imm, __p)
> @@ -1543,14 +1479,6 @@
>  #define vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
> __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
>  #define vrmlsldavhaq_p_s32(__a, __b, __c, __p)
> __arm_vrmlsldavhaq_p_s32(__a, __b, __c, __p)
>  #define vrmlsldavhaxq_p_s32(__a, __b, __c, __p)
> __arm_vrmlsldavhaxq_p_s32(__a, __b, __c, __p)
> -#define vrshrnbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vrshrnbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vrshrnbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vrshrnbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vrshrnbq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vrshrnbq_m_n_u32(__a, __b,  __imm, __p)
> -#define vrshrnbq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vrshrnbq_m_n_u16(__a, __b,  __imm, __p)
> -#define vrshrntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vrshrntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vrshrntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vrshrntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vrshrntq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vrshrntq_m_n_u32(__a, __b,  __imm, __p)
> -#define vrshrntq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vrshrntq_m_n_u16(__a, __b,  __imm, __p)
>  #define vshllbq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vshllbq_m_n_s8(__inactive, __a,  __imm, __p)
>  #define vshllbq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vshllbq_m_n_s16(__inactive, __a,  __imm, __p)
>  #define vshllbq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vshllbq_m_n_u8(__inactive, __a,  __imm, __p)
> @@ -1559,14 +1487,6 @@
>  #define vshlltq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vshlltq_m_n_s16(__inactive, __a,  __imm, __p)
>  #define vshlltq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vshlltq_m_n_u8(__inactive, __a,  __imm, __p)
>  #define vshlltq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vshlltq_m_n_u16(__inactive, __a,  __imm, __p)
> -#define vshrnbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vshrnbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vshrnbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vshrnbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vshrnbq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vshrnbq_m_n_u32(__a, __b,  __imm, __p)
> -#define vshrnbq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vshrnbq_m_n_u16(__a, __b,  __imm, __p)
> -#define vshrntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vshrntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vshrntq_m_n_u32(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_u32(__a, __b,  __imm, __p)
> -#define vshrntq_m_n_u16(__a, __b,  __imm, __p)
> __arm_vshrntq_m_n_u16(__a, __b,  __imm, __p)
>  #define vbicq_m_f32(__inactive, __a, __b, __p)
> __arm_vbicq_m_f32(__inactive, __a, __b, __p)
>  #define vbicq_m_f16(__inactive, __a, __b, __p)
> __arm_vbicq_m_f16(__inactive, __a, __b, __p)
>  #define vbrsrq_m_n_f32(__inactive, __a, __b, __p)
> __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
> @@ -4525,34 +4445,6 @@ __arm_vbicq_m_n_u32 (uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
>    return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrnbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrnbq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrnbq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrnbq_n_uv4si (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
> @@ -6316,55 +6208,6 @@ __arm_vmvnq_m_n_s16 (int16x8_t __inactive,
> const int __imm, mve_pred16_t __p)
>    return __builtin_mve_vmvnq_m_n_sv8hi (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrntq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrnbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrntq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrnbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrntq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrnbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrntq_n_sv8hi (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
> @@ -6512,55 +6355,6 @@ __arm_vqmovuntq_m_s16 (uint8x16_t __a,
> int16x8_t __b, mve_pred16_t __p)
>    return __builtin_mve_vqmovuntq_m_sv8hi (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrntq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrnbq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrntq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrnbq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrntq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrnbq_n_uv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrntq_n_uv8hi (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
> @@ -6631,55 +6425,6 @@ __arm_vmvnq_m_n_s32 (int32x4_t __inactive,
> const int __imm, mve_pred16_t __p)
>    return __builtin_mve_vmvnq_m_n_sv4si (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrntq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrnbq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrntq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrnbq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrntq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrnbq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrntq_n_sv4si (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
> @@ -6827,55 +6572,6 @@ __arm_vqmovuntq_m_s32 (uint16x8_t __a,
> int32x4_t __b, mve_pred16_t __p)
>    return __builtin_mve_vqmovuntq_m_sv4si (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrntq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrnbq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrntq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrnbq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vrshrntq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrnbq_n_uv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vshrntq_n_uv4si (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
> @@ -8101,62 +7797,6 @@ __arm_vqdmulltq_m_s16 (int32x4_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_p
>    return __builtin_mve_vqdmulltq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrnbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrnbq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrntq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrntq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int
> __imm, mve_pred16_t __p)
> @@ -8185,62 +7825,6 @@ __arm_vqrshruntq_m_n_s16 (uint8x16_t __a,
> int16x8_t __b, const int __imm, mve_pr
>    return __builtin_mve_vqrshruntq_m_n_sv8hi (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrnbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrnbq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrntq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrntq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int
> __imm, mve_pred16_t __p)
> @@ -8304,62 +7888,6 @@ __arm_vrmlsldavhaxq_p_s32 (int64_t __a,
> int32x4_t __b, int32x4_t __c, mve_pred16
>    return __builtin_mve_vrmlsldavhaxq_p_sv4si (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrnbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrnbq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrntq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrntq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshllbq_m_n_s8 (int16x8_t __inactive, int8x16_t __a, const int
> __imm, mve_pred16_t __p)
> @@ -8416,62 +7944,6 @@ __arm_vshlltq_m_n_u16 (uint32x4_t __inactive,
> uint16x8_t __a, const int __imm, m
>    return __builtin_mve_vshlltq_m_n_uv8hi (__inactive, __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrnbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrnbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrnbq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrnbq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m_n_s32 (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m_n_s16 (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m_n_u32 (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrntq_m_n_uv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m_n_u16 (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrntq_m_n_uv8hi (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset,
> int8x16_t __value)
> @@ -16926,34 +16398,6 @@ __arm_vbicq_m_n (uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
>   return __arm_vbicq_m_n_u32 (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshrnbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshrnbq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshrnbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshrnbq_n_u32 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
> @@ -18704,55 +18148,6 @@ __arm_vmvnq_m (int16x8_t __inactive, const
> int __imm, mve_pred16_t __p)
>   return __arm_vmvnq_m_n_s16 (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshrntq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqshrnbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqshrntq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vrshrnbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vrshrntq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vshrnbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq (int8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vshrntq_n_s16 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq (int64_t __a, int16x8_t __b, int16x8_t __c)
> @@ -18900,55 +18295,6 @@ __arm_vqmovuntq_m (uint8x16_t __a,
> int16x8_t __b, mve_pred16_t __p)
>   return __arm_vqmovuntq_m_s16 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshrntq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vqshrnbq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vqshrntq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vrshrnbq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vrshrntq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vshrnbq_n_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq (uint8x16_t __a, uint16x8_t __b, const int __imm)
> -{
> - return __arm_vshrntq_n_u16 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
> @@ -19019,55 +18365,6 @@ __arm_vmvnq_m (int32x4_t __inactive, const
> int __imm, mve_pred16_t __p)
>   return __arm_vmvnq_m_n_s32 (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshrntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshrnbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshrntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vrshrnbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vrshrntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vshrnbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq (int16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vshrntq_n_s32 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vmlaldavaq (int64_t __a, int32x4_t __b, int32x4_t __c)
> @@ -19152,116 +18449,67 @@ __arm_vmovntq_m (int16x8_t __a, int32x4_t
> __b, mve_pred16_t __p)
>   return __arm_vmovntq_m_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqmovnbq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqmovnbq_m_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqmovntq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
> -{
> - return __arm_vqmovntq_m_s32 (__a, __b, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrev32q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
> -{
> - return __arm_vrev32q_m_s16 (__inactive, __a, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t
> __p)
> -{
> - return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshruntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> +__extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
> +__arm_vqmovnbq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
>  {
> - return __arm_vqmovunbq_m_s32 (__a, __b, __p);
> + return __arm_vqmovnbq_m_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> +__extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqmovuntq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
> +__arm_vqmovntq_m (int16x8_t __a, int32x4_t __b, mve_pred16_t __p)
>  {
> - return __arm_vqmovuntq_m_s32 (__a, __b, __p);
> + return __arm_vqmovntq_m_s32 (__a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> +__extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vrev32q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
>  {
> - return __arm_vqrshrntq_n_u32 (__a, __b, __imm);
> + return __arm_vrev32q_m_s16 (__inactive, __a, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> +__extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t
> __p)
>  {
> - return __arm_vqshrnbq_n_u32 (__a, __b, __imm);
> + return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
>  {
> - return __arm_vqshrntq_n_u32 (__a, __b, __imm);
> + return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
>  {
> - return __arm_vrshrnbq_n_u32 (__a, __b, __imm);
> + return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
>  {
> - return __arm_vrshrntq_n_u32 (__a, __b, __imm);
> + return __arm_vqshruntq_n_s32 (__a, __b, __imm);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
>  {
> - return __arm_vshrnbq_n_u32 (__a, __b, __imm);
> + return __arm_vqmovunbq_m_s32 (__a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq (uint16x8_t __a, uint32x4_t __b, const int __imm)
> +__arm_vqmovuntq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
>  {
> - return __arm_vshrntq_n_u32 (__a, __b, __imm);
> + return __arm_vqmovuntq_m_s32 (__a, __b, __p);
>  }
> 
>  __extension__ extern __inline uint64_t
> @@ -20489,62 +19737,6 @@ __arm_vqdmulltq_m (int32x4_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pred1
>   return __arm_vqdmulltq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrnbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrnbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrnbq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrnbq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrntq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrntq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -20573,62 +19765,6 @@ __arm_vqrshruntq_m (uint8x16_t __a,
> int16x8_t __b, const int __imm, mve_pred16_t
>   return __arm_vqrshruntq_m_n_s16 (__a, __b, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrnbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrnbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrnbq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrnbq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrntq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrntq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -20692,62 +19828,6 @@ __arm_vrmlsldavhaxq_p (int64_t __a, int32x4_t
> __b, int32x4_t __c, mve_pred16_t _
>   return __arm_vrmlsldavhaxq_p_s32 (__a, __b, __c, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrnbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrnbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrnbq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrnbq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrntq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrntq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vshllbq_m (int16x8_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> @@ -20804,62 +19884,6 @@ __arm_vshlltq_m (uint32x4_t __inactive,
> uint16x8_t __a, const int __imm, mve_pre
>   return __arm_vshlltq_m_n_u16 (__inactive, __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrnbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrnbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrnbq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrnbq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrnbq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m (int16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m (int8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m (uint16x8_t __a, uint32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrntq_m_n_u32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrntq_m (uint8x16_t __a, uint16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrntq_m_n_u16 (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline void
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t
> __value)
> @@ -26775,14 +25799,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vqrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27006,14 +26022,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]:
> __arm_vmovltq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmovltq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2));})
> 
> -#define __arm_vshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27350,14 +26358,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]:
> __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t),
> __ARM_mve_coerce2(p1, double)), \
>    int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]:
> __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t),
> __ARM_mve_coerce2(p1, double)));})
> 
> -#define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27370,22 +26370,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> 
> -#define __arm_vqshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vqshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27420,14 +26404,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqmovuntq_m_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqmovuntq_m_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> 
> -#define __arm_vqrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28568,14 +27544,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vqrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28885,22 +27853,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vmovntq_m_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vmovntq_m_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> -#define __arm_vshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vrev32q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28921,36 +27873,12 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2));})
> 
> -#define __arm_vqshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> 
> -#define __arm_vqrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -#define __arm_vqshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrnbq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrnbq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrnbq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrnbq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
>  #define __arm_vqmovuntq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -29474,22 +28402,6 @@ extern void *__ARM_undef;
> 
>  #endif /* MVE Integer.  */
> 
> -#define __arm_vshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> -
> -
> -#define __arm_vrshrntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrntq_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrntq_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrntq_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrntq_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2));})
> 
> 
>  #define __arm_vmvnq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
> @@ -29798,22 +28710,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]:
> __arm_vshllbq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshllbq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
> 
> -#define __arm_vshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
> -#define __arm_vshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vshlltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -29822,14 +28718,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]:
> __arm_vshlltq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshlltq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
> 
> -#define __arm_vrshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vqshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -29842,22 +28730,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> 
> -#define __arm_vqrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
> -#define __arm_vqrshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqrshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqrshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vqrshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -29870,30 +28742,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> 
> -#define __arm_vqshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
> -#define __arm_vqshrntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrntq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrntq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vqshrntq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vqshrntq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
> -#define __arm_vrshrnbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrnbq_m_n_s16 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrnbq_m_n_s32 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrnbq_m_n_u16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrnbq_m_n_u32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vmlaldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape
  2023-05-05  8:39 ` [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape Christophe Lyon
@ 2023-05-05 11:03   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:03 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 18/23] arm: [MVE intrinsics] add
> binary_rshift_narrow_unsigned shape
> 
> This patch adds the binary_rshift_narrow_unsigned shape description.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc
> 	(binary_rshift_narrow_unsigned): New.
> 	* config/arm/arm-mve-builtins-shapes.h
> 	(binary_rshift_narrow_unsigned): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 48 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 49 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index 88934e1ca15..e3bf586565c 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -664,6 +664,54 @@ struct binary_rshift_narrow_def : public
> overloaded_base<0>
>  };
>  SHAPE (binary_rshift_narrow)
> 
> +/* <uT0:half>_t vfoo[_n_t0](<uT0:half>_t, <T0>_t, const int)
> +
> +   Vector saturating rounding shift right and narrow.
> +   Check that 'imm' is in the [1..#bits/2] range.
> +
> +   Example: vqshrunbq.
> +   uint8x16_t [__arm_]vqshrunbq[_n_s16](uint8x16_t a, int16x8_t b, const int
> imm)
> +   uint8x16_t [__arm_]vqshrunbq_m[_n_s16](uint8x16_t a, int16x8_t b,
> const int imm, mve_pred16_t p)  */
> +struct binary_rshift_narrow_unsigned_def : public overloaded_base<0>
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_n,
> preserve_user_namespace);
> +    build_all (b, "vhu0,vhu0,v0,ss32", group, MODE_n,
> preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (3, i, nargs)
> +	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
> +	|| !r.require_integer_immediate (i))
> +      return error_mark_node;
> +
> +    type_suffix_index narrow_suffix
> +      = find_type_suffix (TYPE_unsigned,
> +			  type_suffixes[type].element_bits / 2);
> +
> +    if (!r.require_matching_vector_type (0, narrow_suffix))
> +      return error_mark_node;
> +
> +    return r.resolve_to (r.mode_suffix_id, type);
> +  }
> +
> +  bool
> +  check (function_checker &c) const override
> +  {
> +    unsigned int bits = c.type_suffix (0).element_bits;
> +    return c.require_immediate_range (2, 1, bits / 2);
> +  }
> +
> +};
> +SHAPE (binary_rshift_narrow_unsigned)
> +
>  /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
> 
>     where there are N arguments in total.
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index d72686d187b..ca1c1017e8e 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -41,6 +41,7 @@ namespace arm_mve
>      extern const function_shape *const binary_orrq;
>      extern const function_shape *const binary_round_lshift;
>      extern const function_shape *const binary_rshift_narrow;
> +    extern const function_shape *const binary_rshift_narrow_unsigned;
>      extern const function_shape *const create;
>      extern const function_shape *const inherent;
>      extern const function_shape *const unary_convert;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt
  2023-05-05  8:39 ` [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt Christophe Lyon
@ 2023-05-05 11:04   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:04 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt
> vqshrunb vqshrunt
> 
> Factorize vqrshrunb, vqrshrunt, vqshrunb, vqshrunt so that they use
> existing patterns.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_SHRN_N): Add VQRSHRUNBQ,
> 	VQRSHRUNTQ, VQSHRUNBQ, VQSHRUNTQ.
> 	(MVE_SHRN_M_N): Likewise.
> 	(mve_insn): Add vqrshrunb, vqrshrunt, vqshrunb, vqshrunt.
> 	(isu): Add VQRSHRUNBQ, VQRSHRUNTQ, VQSHRUNBQ, VQSHRUNTQ.
> 	(supf): Likewise.
> 	* config/arm/mve.md (mve_vqrshrunbq_n_s<mode>): Remove.
> 	(mve_vqrshruntq_n_s<mode>): Remove.
> 	(mve_vqshrunbq_n_s<mode>): Remove.
> 	(mve_vqshruntq_n_s<mode>): Remove.
> 	(mve_vqrshrunbq_m_n_s<mode>): Remove.
> 	(mve_vqrshruntq_m_n_s<mode>): Remove.
> 	(mve_vqshrunbq_m_n_s<mode>): Remove.
> 	(mve_vqshruntq_m_n_s<mode>): Remove.
> ---
>  gcc/config/arm/iterators.md |  32 +++++++++
>  gcc/config/arm/mve.md       | 140 +++---------------------------------
>  2 files changed, 40 insertions(+), 132 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index d64c924a513..583206dac9e 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -471,8 +471,12 @@ (define_int_iterator MVE_RSHIFT_N   [
>  (define_int_iterator MVE_SHRN_N [
>  		     VQRSHRNBQ_N_S VQRSHRNBQ_N_U
>  		     VQRSHRNTQ_N_S VQRSHRNTQ_N_U
> +		     VQRSHRUNBQ_N_S
> +		     VQRSHRUNTQ_N_S
>  		     VQSHRNBQ_N_S VQSHRNBQ_N_U
>  		     VQSHRNTQ_N_S VQSHRNTQ_N_U
> +		     VQSHRUNBQ_N_S
> +		     VQSHRUNTQ_N_S
>  		     VRSHRNBQ_N_S VRSHRNBQ_N_U
>  		     VRSHRNTQ_N_S VRSHRNTQ_N_U
>  		     VSHRNBQ_N_S VSHRNBQ_N_U
> @@ -482,8 +486,12 @@ (define_int_iterator MVE_SHRN_N [
>  (define_int_iterator MVE_SHRN_M_N [
>  		     VQRSHRNBQ_M_N_S VQRSHRNBQ_M_N_U
>  		     VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U
> +		     VQRSHRUNBQ_M_N_S
> +		     VQRSHRUNTQ_M_N_S
>  		     VQSHRNBQ_M_N_S VQSHRNBQ_M_N_U
>  		     VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U
> +		     VQSHRUNBQ_M_N_S
> +		     VQSHRUNTQ_M_N_S
>  		     VRSHRNBQ_M_N_S VRSHRNBQ_M_N_U
>  		     VRSHRNTQ_M_N_S VRSHRNTQ_M_N_U
>  		     VSHRNBQ_M_N_S VSHRNBQ_M_N_U
> @@ -594,6 +602,10 @@ (define_int_attr mve_insn [
>  		 (VQRSHRNBQ_N_S "vqrshrnb") (VQRSHRNBQ_N_U
> "vqrshrnb")
>  		 (VQRSHRNTQ_M_N_S "vqrshrnt") (VQRSHRNTQ_M_N_U
> "vqrshrnt")
>  		 (VQRSHRNTQ_N_S "vqrshrnt") (VQRSHRNTQ_N_U "vqrshrnt")
> +		 (VQRSHRUNBQ_M_N_S "vqrshrunb")
> +		 (VQRSHRUNBQ_N_S "vqrshrunb")
> +		 (VQRSHRUNTQ_M_N_S "vqrshrunt")
> +		 (VQRSHRUNTQ_N_S "vqrshrunt")
>  		 (VQSHLQ_M_N_S "vqshl") (VQSHLQ_M_N_U "vqshl")
>  		 (VQSHLQ_M_R_S "vqshl") (VQSHLQ_M_R_U "vqshl")
>  		 (VQSHLQ_M_S "vqshl") (VQSHLQ_M_U "vqshl")
> @@ -604,6 +616,10 @@ (define_int_attr mve_insn [
>  		 (VQSHRNBQ_N_S "vqshrnb") (VQSHRNBQ_N_U "vqshrnb")
>  		 (VQSHRNTQ_M_N_S "vqshrnt") (VQSHRNTQ_M_N_U
> "vqshrnt")
>  		 (VQSHRNTQ_N_S "vqshrnt") (VQSHRNTQ_N_U "vqshrnt")
> +		 (VQSHRUNBQ_M_N_S "vqshrunb")
> +		 (VQSHRUNBQ_N_S "vqshrunb")
> +		 (VQSHRUNTQ_M_N_S "vqshrunt")
> +		 (VQSHRUNTQ_N_S "vqshrunt")
>  		 (VQSUBQ_M_N_S "vqsub") (VQSUBQ_M_N_U "vqsub")
>  		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
>  		 (VQSUBQ_N_S "vqsub") (VQSUBQ_N_U "vqsub")
> @@ -640,10 +656,18 @@ (define_int_attr isu    [
>  		 (VQRSHRNBQ_N_S "s") (VQRSHRNBQ_N_U "u")
>  		 (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
>  		 (VQRSHRNTQ_N_S "s") (VQRSHRNTQ_N_U "u")
> +		 (VQRSHRUNBQ_M_N_S "s")
> +		 (VQRSHRUNBQ_N_S "s")
> +		 (VQRSHRUNTQ_M_N_S "s")
> +		 (VQRSHRUNTQ_N_S "s")
>  		 (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
>  		 (VQSHRNBQ_N_S "s") (VQSHRNBQ_N_U "u")
>  		 (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
>  		 (VQSHRNTQ_N_S "s") (VQSHRNTQ_N_U "u")
> +		 (VQSHRUNBQ_M_N_S "s")
> +		 (VQSHRUNBQ_N_S "s")
> +		 (VQSHRUNTQ_M_N_S "s")
> +		 (VQSHRUNTQ_N_S "s")
>  		 (VRSHRNBQ_M_N_S "i") (VRSHRNBQ_M_N_U "i")
>  		 (VRSHRNBQ_N_S "i") (VRSHRNBQ_N_U "i")
>  		 (VRSHRNTQ_M_N_S "i") (VRSHRNTQ_M_N_U "i")
> @@ -1816,6 +1840,14 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VQRDMULHQ_M_N_S "s")
>  		       (VQDMULHQ_S "s")
>  		       (VQRDMULHQ_S "s")
> +		       (VQRSHRUNBQ_M_N_S "s")
> +		       (VQRSHRUNBQ_N_S "s")
> +		       (VQRSHRUNTQ_M_N_S "s")
> +		       (VQRSHRUNTQ_N_S "s")
> +		       (VQSHRUNBQ_M_N_S "s")
> +		       (VQSHRUNBQ_N_S "s")
> +		       (VQSHRUNTQ_M_N_S "s")
> +		       (VQSHRUNTQ_N_S "s")
>  		       ])
> 
>  ;; Both kinds of return insn.
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index d64a075c7bb..20ce7ecb3d6 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -2166,8 +2166,12 @@ (define_insn
> "mve_vcvtq_m_to_f_<supf><mode>"
>  ;;
>  ;; [vqrshrnbq_n_u, vqrshrnbq_n_s]
>  ;; [vqrshrntq_n_u, vqrshrntq_n_s]
> +;; [vqrshrunbq_n_s]
> +;; [vqrshruntq_n_s]
>  ;; [vqshrnbq_n_u, vqshrnbq_n_s]
>  ;; [vqshrntq_n_u, vqshrntq_n_s]
> +;; [vqshrunbq_n_s]
> +;; [vqshruntq_n_s]
>  ;; [vrshrnbq_n_s, vrshrnbq_n_u]
>  ;; [vrshrntq_n_u, vrshrntq_n_s]
>  ;; [vshrnbq_n_u, vshrnbq_n_s]
> @@ -2186,22 +2190,6 @@ (define_insn
> "@mve_<mve_insn>q_n_<supf><mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vqrshrunbq_n_s])
> -;;
> -(define_insn "mve_vqrshrunbq_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -				 (match_operand:MVE_5 2
> "s_register_operand" "w")
> -				 (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQRSHRUNBQ_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqrshrunb.s%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vrmlaldavhaq_s vrmlaldavhaq_u])
>  ;;
> @@ -4002,54 +3990,6 @@ (define_insn "mve_vqmovuntq_m_s<mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vqrshruntq_n_s])
> -;;
> -(define_insn "mve_vqrshruntq_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQRSHRUNTQ_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqrshrunt.s%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vqshrunbq_n_s])
> -;;
> -(define_insn "mve_vqshrunbq_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQSHRUNBQ_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqshrunb.s%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vqshruntq_n_s])
> -;;
> -(define_insn "mve_vqshruntq_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")]
> -	 VQSHRUNTQ_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vqshrunt.s%#<V_sz_elem>	%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vrev32q_m_f])
>  ;;
> @@ -4892,8 +4832,12 @@ (define_insn
> "mve_vmlaldavaxq_p_<supf><mode>"
>  ;;
>  ;; [vqrshrnbq_m_n_u, vqrshrnbq_m_n_s]
>  ;; [vqrshrntq_m_n_s, vqrshrntq_m_n_u]
> +;; [vqrshrunbq_m_n_s]
> +;; [vqrshruntq_m_n_s]
>  ;; [vqshrnbq_m_n_u, vqshrnbq_m_n_s]
>  ;; [vqshrntq_m_n_s, vqshrntq_m_n_u]
> +;; [vqshrunbq_m_n_s]
> +;; [vqshruntq_m_n_s]
>  ;; [vrshrnbq_m_n_u, vrshrnbq_m_n_s]
>  ;; [vrshrntq_m_n_u, vrshrntq_m_n_s]
>  ;; [vshrnbq_m_n_s, vshrnbq_m_n_u]
> @@ -5100,74 +5044,6 @@ (define_insn "mve_vqdmulltq_m_s<mode>"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> -;;
> -;; [vqrshrunbq_m_n_s])
> -;;
> -(define_insn "mve_vqrshrunbq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQRSHRUNBQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqrshrunbt.s%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqrshruntq_m_n_s])
> -;;
> -(define_insn "mve_vqrshruntq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQRSHRUNTQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqrshruntt.s%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqshrunbq_m_n_s])
> -;;
> -(define_insn "mve_vqshrunbq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQSHRUNBQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqshrunbt.s%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vqshruntq_m_n_s])
> -;;
> -(define_insn "mve_vqshruntq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
> -	(unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_5 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred3>"
> "<MVE_constraint3>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VQSHRUNTQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vqshruntt.s%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
>  ;;
>  ;; [vrmlaldavhaq_p_u])
>  ;;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq
  2023-05-05  8:39 ` [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq Christophe Lyon
@ 2023-05-05 11:05   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:05 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq
> vqshrunbq vqshruntq
> 
> Implement vqrshrunbq, vqrshruntq, vqshrunbq, vqshruntq using the new
> MVE builtins framework.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc
> (FUNCTION_ONLY_N_NO_U_F): New.
> 	(vqshrunbq, vqshruntq, vqrshrunbq, vqrshruntq): New.
> 	* config/arm/arm-mve-builtins-base.def (vqshrunbq, vqshruntq)
> 	(vqrshrunbq, vqrshruntq): New.
> 	* config/arm/arm-mve-builtins-base.h (vqshrunbq, vqshruntq)
> 	(vqrshrunbq, vqrshruntq): New.
> 	* config/arm/arm-mve-builtins.cc
> 	(function_instance::has_inactive_argument): Handle vqshrunbq,
> 	vqshruntq, vqrshrunbq, vqrshruntq.
> 	* config/arm/arm_mve.h (vqrshrunbq): Remove.
> 	(vqrshruntq): Remove.
> 	(vqrshrunbq_m): Remove.
> 	(vqrshruntq_m): Remove.
> 	(vqrshrunbq_n_s16): Remove.
> 	(vqrshrunbq_n_s32): Remove.
> 	(vqrshruntq_n_s16): Remove.
> 	(vqrshruntq_n_s32): Remove.
> 	(vqrshrunbq_m_n_s32): Remove.
> 	(vqrshrunbq_m_n_s16): Remove.
> 	(vqrshruntq_m_n_s32): Remove.
> 	(vqrshruntq_m_n_s16): Remove.
> 	(__arm_vqrshrunbq_n_s16): Remove.
> 	(__arm_vqrshrunbq_n_s32): Remove.
> 	(__arm_vqrshruntq_n_s16): Remove.
> 	(__arm_vqrshruntq_n_s32): Remove.
> 	(__arm_vqrshrunbq_m_n_s32): Remove.
> 	(__arm_vqrshrunbq_m_n_s16): Remove.
> 	(__arm_vqrshruntq_m_n_s32): Remove.
> 	(__arm_vqrshruntq_m_n_s16): Remove.
> 	(__arm_vqrshrunbq): Remove.
> 	(__arm_vqrshruntq): Remove.
> 	(__arm_vqrshrunbq_m): Remove.
> 	(__arm_vqrshruntq_m): Remove.
> 	(vqshrunbq): Remove.
> 	(vqshruntq): Remove.
> 	(vqshrunbq_m): Remove.
> 	(vqshruntq_m): Remove.
> 	(vqshrunbq_n_s16): Remove.
> 	(vqshruntq_n_s16): Remove.
> 	(vqshrunbq_n_s32): Remove.
> 	(vqshruntq_n_s32): Remove.
> 	(vqshrunbq_m_n_s32): Remove.
> 	(vqshrunbq_m_n_s16): Remove.
> 	(vqshruntq_m_n_s32): Remove.
> 	(vqshruntq_m_n_s16): Remove.
> 	(__arm_vqshrunbq_n_s16): Remove.
> 	(__arm_vqshruntq_n_s16): Remove.
> 	(__arm_vqshrunbq_n_s32): Remove.
> 	(__arm_vqshruntq_n_s32): Remove.
> 	(__arm_vqshrunbq_m_n_s32): Remove.
> 	(__arm_vqshrunbq_m_n_s16): Remove.
> 	(__arm_vqshruntq_m_n_s32): Remove.
> 	(__arm_vqshruntq_m_n_s16): Remove.
> 	(__arm_vqshrunbq): Remove.
> 	(__arm_vqshruntq): Remove.
> 	(__arm_vqshrunbq_m): Remove.
> 	(__arm_vqshruntq_m): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |  13 +
>  gcc/config/arm/arm-mve-builtins-base.def |   4 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   4 +
>  gcc/config/arm/arm-mve-builtins.cc       |   4 +
>  gcc/config/arm/arm_mve.h                 | 320 -----------------------
>  5 files changed, 25 insertions(+), 320 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index c95abe70239..e7d2e0abffc 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -184,6 +184,15 @@ namespace arm_mve {
>      -1, -1, -1,								\
>      UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
> 
> +  /* Helper for builtins with only unspec codes, _m predicated
> +     overrides, only _n version, no unsigned, no floating-point.  */
> +#define FUNCTION_ONLY_N_NO_U_F(NAME, UNSPEC) FUNCTION
> 		\
> +  (NAME, unspec_mve_function_exact_insn,				\
> +   (-1, -1, -1,								\
> +    UNSPEC##_N_S, -1, -1,						\
> +    -1, -1, -1,								\
> +    UNSPEC##_M_N_S, -1, -1))
> +
>  FUNCTION_WITHOUT_N (vabdq, VABDQ)
>  FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
>  FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
> @@ -203,8 +212,12 @@ FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq,
> VQRDMULHQ)
>  FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
>  FUNCTION_ONLY_N_NO_F (vqrshrnbq, VQRSHRNBQ)
>  FUNCTION_ONLY_N_NO_F (vqrshrntq, VQRSHRNTQ)
> +FUNCTION_ONLY_N_NO_U_F (vqrshrunbq, VQRSHRUNBQ)
> +FUNCTION_ONLY_N_NO_U_F (vqrshruntq, VQRSHRUNTQ)
>  FUNCTION_ONLY_N_NO_F (vqshrnbq, VQSHRNBQ)
>  FUNCTION_ONLY_N_NO_F (vqshrntq, VQSHRNTQ)
> +FUNCTION_ONLY_N_NO_U_F (vqshrunbq, VQSHRUNBQ)
> +FUNCTION_ONLY_N_NO_U_F (vqshruntq, VQSHRUNTQ)
>  FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
>  FUNCTION (vreinterpretq, vreinterpretq_impl,)
>  FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 3dd40086663..50cb2d055e9 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -36,10 +36,14 @@ DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n,
> all_signed, m_or_none)
>  DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqrshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vqrshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vqrshrunbq, binary_rshift_narrow_unsigned,
> signed_16_32, m_or_none)
> +DEF_MVE_FUNCTION (vqrshruntq, binary_rshift_narrow_unsigned,
> signed_16_32, m_or_none)
>  DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vqshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vqshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vqshrunbq, binary_rshift_narrow_unsigned,
> signed_16_32, m_or_none)
> +DEF_MVE_FUNCTION (vqshruntq, binary_rshift_narrow_unsigned,
> signed_16_32, m_or_none)
>  DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
>  DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer,
> none)
>  DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index 9e11ac83681..fcac772bc5b 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -41,9 +41,13 @@ extern const function_base *const vqrdmulhq;
>  extern const function_base *const vqrshlq;
>  extern const function_base *const vqrshrnbq;
>  extern const function_base *const vqrshrntq;
> +extern const function_base *const vqrshrunbq;
> +extern const function_base *const vqrshruntq;
>  extern const function_base *const vqshlq;
>  extern const function_base *const vqshrnbq;
>  extern const function_base *const vqshrntq;
> +extern const function_base *const vqshrunbq;
> +extern const function_base *const vqshruntq;
>  extern const function_base *const vqsubq;
>  extern const function_base *const vreinterpretq;
>  extern const function_base *const vrhaddq;
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-
> builtins.cc
> index 667bbc58483..4fc6160a794 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -674,8 +674,12 @@ function_instance::has_inactive_argument () const
>        || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
>        || base == functions::vqrshrnbq
>        || base == functions::vqrshrntq
> +      || base == functions::vqrshrunbq
> +      || base == functions::vqrshruntq
>        || base == functions::vqshrnbq
>        || base == functions::vqshrntq
> +      || base == functions::vqshrunbq
> +      || base == functions::vqshruntq
>        || (base == functions::vrshlq && mode_suffix_id == MODE_n)
>        || base == functions::vrshrnbq
>        || base == functions::vrshrntq
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index ed7852e2460..b2701f1135d 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -113,7 +113,6 @@
>  #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
>  #define vabavq(__a, __b, __c) __arm_vabavq(__a, __b, __c)
>  #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
> -#define vqrshrunbq(__a, __b, __imm) __arm_vqrshrunbq(__a, __b, __imm)
>  #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
>  #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
>  #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
> @@ -190,9 +189,6 @@
>  #define vqmovnbq_m(__a, __b, __p) __arm_vqmovnbq_m(__a, __b, __p)
>  #define vqmovntq_m(__a, __b, __p) __arm_vqmovntq_m(__a, __b, __p)
>  #define vrev32q_m(__inactive, __a, __p) __arm_vrev32q_m(__inactive, __a,
> __p)
> -#define vqrshruntq(__a, __b, __imm) __arm_vqrshruntq(__a, __b, __imm)
> -#define vqshrunbq(__a, __b, __imm) __arm_vqshrunbq(__a, __b, __imm)
> -#define vqshruntq(__a, __b, __imm) __arm_vqshruntq(__a, __b, __imm)
>  #define vqmovunbq_m(__a, __b, __p) __arm_vqmovunbq_m(__a, __b, __p)
>  #define vqmovuntq_m(__a, __b, __p) __arm_vqmovuntq_m(__a, __b, __p)
>  #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm,
> __p)
> @@ -236,10 +232,6 @@
>  #define vmulltq_poly_m(__inactive, __a, __b, __p)
> __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
>  #define vqdmullbq_m(__inactive, __a, __b, __p)
> __arm_vqdmullbq_m(__inactive, __a, __b, __p)
>  #define vqdmulltq_m(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m(__inactive, __a, __b, __p)
> -#define vqrshrunbq_m(__a, __b, __imm, __p) __arm_vqrshrunbq_m(__a,
> __b, __imm, __p)
> -#define vqrshruntq_m(__a, __b, __imm, __p) __arm_vqrshruntq_m(__a,
> __b, __imm, __p)
> -#define vqshrunbq_m(__a, __b, __imm, __p) __arm_vqshrunbq_m(__a,
> __b, __imm, __p)
> -#define vqshruntq_m(__a, __b, __imm, __p) __arm_vqshruntq_m(__a, __b,
> __imm, __p)
>  #define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a,
> __b, __c, __p)
>  #define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a,
> __b, __c, __p)
>  #define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a,
> __b, __c, __p)
> @@ -889,8 +881,6 @@
>  #define vcvtq_m_f16_u16(__inactive, __a, __p)
> __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
>  #define vcvtq_m_f32_s32(__inactive, __a, __p)
> __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
>  #define vcvtq_m_f32_u32(__inactive, __a, __p)
> __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
> -#define vqrshrunbq_n_s16(__a, __b,  __imm)
> __arm_vqrshrunbq_n_s16(__a, __b,  __imm)
> -#define vqrshrunbq_n_s32(__a, __b,  __imm)
> __arm_vqrshrunbq_n_s32(__a, __b,  __imm)
>  #define vrmlaldavhaq_s32(__a, __b, __c) __arm_vrmlaldavhaq_s32(__a,
> __b, __c)
>  #define vrmlaldavhaq_u32(__a, __b, __c) __arm_vrmlaldavhaq_u32(__a,
> __b, __c)
>  #define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
> @@ -1203,9 +1193,6 @@
>  #define vcmpneq_m_f16(__a, __b, __p) __arm_vcmpneq_m_f16(__a, __b,
> __p)
>  #define vcmpneq_m_n_f16(__a, __b, __p) __arm_vcmpneq_m_n_f16(__a,
> __b, __p)
>  #define vmvnq_m_n_u16(__inactive,  __imm, __p)
> __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
> -#define vqrshruntq_n_s16(__a, __b,  __imm) __arm_vqrshruntq_n_s16(__a,
> __b,  __imm)
> -#define vqshrunbq_n_s16(__a, __b,  __imm) __arm_vqshrunbq_n_s16(__a,
> __b,  __imm)
> -#define vqshruntq_n_s16(__a, __b,  __imm) __arm_vqshruntq_n_s16(__a,
> __b,  __imm)
>  #define vcvtmq_m_u16_f16(__inactive, __a, __p)
> __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
>  #define vcvtnq_m_u16_f16(__inactive, __a, __p)
> __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
>  #define vcvtpq_m_u16_f16(__inactive, __a, __p)
> __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
> @@ -1278,9 +1265,6 @@
>  #define vcmpneq_m_f32(__a, __b, __p) __arm_vcmpneq_m_f32(__a, __b,
> __p)
>  #define vcmpneq_m_n_f32(__a, __b, __p) __arm_vcmpneq_m_n_f32(__a,
> __b, __p)
>  #define vmvnq_m_n_u32(__inactive,  __imm, __p)
> __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
> -#define vqrshruntq_n_s32(__a, __b,  __imm) __arm_vqrshruntq_n_s32(__a,
> __b,  __imm)
> -#define vqshrunbq_n_s32(__a, __b,  __imm) __arm_vqshrunbq_n_s32(__a,
> __b,  __imm)
> -#define vqshruntq_n_s32(__a, __b,  __imm) __arm_vqshruntq_n_s32(__a,
> __b,  __imm)
>  #define vcvtmq_m_u32_f32(__inactive, __a, __p)
> __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
>  #define vcvtnq_m_u32_f32(__inactive, __a, __p)
> __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
>  #define vcvtpq_m_u32_f32(__inactive, __a, __p)
> __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
> @@ -1466,14 +1450,6 @@
>  #define vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
>  #define vqdmulltq_m_s32(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_s32(__inactive, __a, __b, __p)
>  #define vqdmulltq_m_s16(__inactive, __a, __b, __p)
> __arm_vqdmulltq_m_s16(__inactive, __a, __b, __p)
> -#define vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshrunbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshrunbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqrshruntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqrshruntq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshrunbq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqshrunbq_m_n_s16(__a, __b,  __imm, __p)
> -#define vqshruntq_m_n_s32(__a, __b,  __imm, __p)
> __arm_vqshruntq_m_n_s32(__a, __b,  __imm, __p)
> -#define vqshruntq_m_n_s16(__a, __b,  __imm, __p)
> __arm_vqshruntq_m_n_s16(__a, __b,  __imm, __p)
>  #define vrmlaldavhaq_p_s32(__a, __b, __c, __p)
> __arm_vrmlaldavhaq_p_s32(__a, __b, __c, __p)
>  #define vrmlaldavhaq_p_u32(__a, __b, __c, __p)
> __arm_vrmlaldavhaq_p_u32(__a, __b, __c, __p)
>  #define vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
> __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
> @@ -4445,20 +4421,6 @@ __arm_vbicq_m_n_u32 (uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
>    return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrunbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshrunbq_n_sv4si (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrmlaldavhaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
> @@ -6320,27 +6282,6 @@ __arm_vmvnq_m_n_u16 (uint16x8_t __inactive,
> const int __imm, mve_pred16_t __p)
>    return __builtin_mve_vmvnq_m_n_uv8hi (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshruntq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrunbq_n_sv8hi (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshruntq_n_sv8hi (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovunbq_m_s16 (uint8x16_t __a, int16x8_t __b, mve_pred16_t
> __p)
> @@ -6537,27 +6478,6 @@ __arm_vmvnq_m_n_u32 (uint32x4_t __inactive,
> const int __imm, mve_pred16_t __p)
>    return __builtin_mve_vmvnq_m_n_uv4si (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqrshruntq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshrunbq_n_sv4si (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> -  return __builtin_mve_vqshruntq_n_sv4si (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovunbq_m_s32 (uint16x8_t __a, int32x4_t __b, mve_pred16_t
> __p)
> @@ -7797,62 +7717,6 @@ __arm_vqdmulltq_m_s16 (int32x4_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_p
>    return __builtin_mve_vqdmulltq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrunbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshrunbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshruntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqrshruntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrunbq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshrunbq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_m_n_s32 (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshruntq_m_n_sv4si (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_m_n_s16 (uint8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vqshruntq_m_n_sv8hi (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrmlaldavhaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c,
> mve_pred16_t __p)
> @@ -16398,20 +16262,6 @@ __arm_vbicq_m_n (uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
>   return __arm_vbicq_m_n_u32 (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshrunbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshrunbq_n_s32 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrmlaldavhaq (int64_t __a, int32x4_t __b, int32x4_t __c)
> @@ -18260,27 +18110,6 @@ __arm_vmvnq_m (uint16x8_t __inactive, const
> int __imm, mve_pred16_t __p)
>   return __arm_vmvnq_m_n_u16 (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqrshruntq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqshrunbq_n_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq (uint8x16_t __a, int16x8_t __b, const int __imm)
> -{
> - return __arm_vqshruntq_n_s16 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovunbq_m (uint8x16_t __a, int16x8_t __b, mve_pred16_t __p)
> @@ -18477,27 +18306,6 @@ __arm_vmvnq_m (uint32x4_t __inactive, const
> int __imm, mve_pred16_t __p)
>   return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqrshruntq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshrunbq_n_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq (uint16x8_t __a, int32x4_t __b, const int __imm)
> -{
> - return __arm_vqshruntq_n_s32 (__a, __b, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovunbq_m (uint16x8_t __a, int32x4_t __b, mve_pred16_t __p)
> @@ -19737,62 +19545,6 @@ __arm_vqdmulltq_m (int32x4_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pred1
>   return __arm_vqdmulltq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrunbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshrunbq_m (uint8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshrunbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshruntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqrshruntq_m (uint8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqrshruntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrunbq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshrunbq_m (uint8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshrunbq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_m (uint16x8_t __a, int32x4_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshruntq_m_n_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vqshruntq_m (uint8x16_t __a, int16x8_t __b, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vqshruntq_m_n_s16 (__a, __b, __imm, __p);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vrmlaldavhaq_p (int64_t __a, int32x4_t __b, int32x4_t __c,
> mve_pred16_t __p)
> @@ -25799,12 +25551,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> @@ -26364,18 +26110,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2));})
> 
> -#define __arm_vqshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
> -#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vqmovnbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -26404,12 +26138,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqmovuntq_m_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqmovuntq_m_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> 
> -#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vnegq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -27544,12 +27272,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> 
> -#define __arm_vqrshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vqrdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -27861,24 +27583,12 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vrev32q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrev32q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2));})
> 
> -#define __arm_vqshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
>    int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2));})
> 
> -#define __arm_vqrshruntq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vqmovuntq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28718,30 +28428,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]:
> __arm_vshlltq_m_n_u8 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshlltq_m_n_u16 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint16x8_t), p2, p3));})
> 
> -#define __arm_vqshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> -
> -#define __arm_vqshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> -
> -#define __arm_vqrshrunbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshrunbq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshrunbq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> -
> -#define __arm_vqrshruntq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqrshruntq_m_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqrshruntq_m_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
> -
>  #define __arm_vmlaldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -28831,12 +28517,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]:
> __arm_vmvnq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce1(__p1, int) , p2), \
>    int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]:
> __arm_vmvnq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce1(__p1, int) , p2));})
> 
> -#define __arm_vqshrunbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int16x8_t]:
> __arm_vqshrunbq_n_s16 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, int16x8_t), p2), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int32x4_t]:
> __arm_vqshrunbq_n_s32 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, int32x4_t), p2));})
> -
>  #define __arm_vqshluq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape
  2023-05-05  8:39 ` [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape Christophe Lyon
@ 2023-05-05 11:05   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:05 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape
> 
> This patch adds the binary_rshift shape description.
> 

Ok.
Thanks,
Kyrill

> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_rshift): New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_rshift): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 36 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 37 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index e3bf586565c..7078f7d7220 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -365,6 +365,42 @@ struct binary_def : public overloaded_base<0>
>  };
>  SHAPE (binary)
> 
> +/* <T0>_t vfoo[_n_t0](<T0>_t, const int)
> +
> +   Shape for vector shift right operations that take a vector first
> +   argument and an integer, and produce a vector.
> +
> +   Check that 'imm' is in the [1..#bits] range.
> +
> +   Example: vrshrq.
> +   int8x16_t [__arm_]vrshrq[_n_s8](int8x16_t a, const int imm)
> +   int8x16_t [__arm_]vrshrq_m[_n_s8](int8x16_t inactive, int8x16_t a, const
> int imm, mve_pred16_t p)
> +   int8x16_t [__arm_]vrshrq_x[_n_s8](int8x16_t a, const int imm,
> mve_pred16_t p)  */
> +struct binary_rshift_def : public overloaded_base<0>
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_n,
> preserve_user_namespace);
> +    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    return r.resolve_uniform (1, 1);
> +  }
> +
> +  bool
> +  check (function_checker &c) const override
> +  {
> +    unsigned int bits = c.type_suffix (0).element_bits;
> +    return c.require_immediate_range (1, 1, bits);
> +  }
> +};
> +SHAPE (binary_rshift)
> +
>  /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t)
>     <T0>_t vfoo[_n_t0](<T0>_t, <S0>_t)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index ca1c1017e8e..09e00b69e63 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -40,6 +40,7 @@ namespace arm_mve
>      extern const function_shape *const binary_opt_n;
>      extern const function_shape *const binary_orrq;
>      extern const function_shape *const binary_round_lshift;
> +    extern const function_shape *const binary_rshift;
>      extern const function_shape *const binary_rshift_narrow;
>      extern const function_shape *const binary_rshift_narrow_unsigned;
>      extern const function_shape *const create;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq
  2023-05-05  8:39 ` [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq Christophe Lyon
@ 2023-05-05 11:06   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:06 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq
> 
> Factorize vsrhrq vrshrq so that they use the same pattern.

Ok.
Thanks,
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_VSHRQ_M_N, MVE_VSHRQ_N): New.
> 	(mve_insn): Add vrshr, vshr.
> 	* config/arm/mve.md (mve_vshrq_n_<supf><mode>)
> 	(mve_vrshrq_n_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
> 	(mve_vrshrq_m_n_<supf><mode>, mve_vshrq_m_n_<supf><mode>):
> Merge
> 	into ...
> 	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
> ---
>  gcc/config/arm/iterators.md | 14 +++++++++++
>  gcc/config/arm/mve.md       | 46 +++++++------------------------------
>  2 files changed, 22 insertions(+), 38 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 583206dac9e..53873704174 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -408,6 +408,16 @@ (define_int_iterator MVE_INT_N_BINARY   [
>  		     VSUBQ_N_S VSUBQ_N_U
>  		     ])
> 
> +(define_int_iterator MVE_VSHRQ_M_N [
> +		     VRSHRQ_M_N_S VRSHRQ_M_N_U
> +		     VSHRQ_M_N_S VSHRQ_M_N_U
> +		     ])
> +
> +(define_int_iterator MVE_VSHRQ_N [
> +		     VRSHRQ_N_S VRSHRQ_N_U
> +		     VSHRQ_N_S VSHRQ_N_U
> +		     ])
> +
>  (define_int_iterator MVE_INT_SU_N_BINARY   [
>  		     VHADDQ_N_S VHADDQ_N_U
>  		     VHSUBQ_N_S VHSUBQ_N_U
> @@ -636,6 +646,8 @@ (define_int_attr mve_insn [
>  		 (VRSHRNBQ_N_S "vrshrnb") (VRSHRNBQ_N_U "vrshrnb")
>  		 (VRSHRNTQ_M_N_S "vrshrnt") (VRSHRNTQ_M_N_U
> "vrshrnt")
>  		 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
> +		 (VRSHRQ_M_N_S "vrshr") (VRSHRQ_M_N_U "vrshr")
> +		 (VRSHRQ_N_S "vrshr") (VRSHRQ_N_U "vrshr")
>  		 (VSHLQ_M_N_S "vshl") (VSHLQ_M_N_U "vshl")
>  		 (VSHLQ_M_R_S "vshl") (VSHLQ_M_R_U "vshl")
>  		 (VSHLQ_M_S "vshl") (VSHLQ_M_U "vshl")
> @@ -646,6 +658,8 @@ (define_int_attr mve_insn [
>  		 (VSHRNBQ_N_S "vshrnb") (VSHRNBQ_N_U "vshrnb")
>  		 (VSHRNTQ_M_N_S "vshrnt") (VSHRNTQ_M_N_U "vshrnt")
>  		 (VSHRNTQ_N_S "vshrnt") (VSHRNTQ_N_U "vshrnt")
> +		 (VSHRQ_M_N_S "vshr") (VSHRQ_M_N_U "vshr")
> +		 (VSHRQ_N_S "vshr") (VSHRQ_N_U "vshr")
>  		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub")
> (VSUBQ_M_N_F "vsub")
>  		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F
> "vsub")
>  		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F
> "vsub")
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 20ce7ecb3d6..b5c89fd4105 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -728,18 +728,19 @@ (define_insn
> "@mve_<mve_insn>q_<supf><mode>"
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vshrq_n_s, vshrq_n_u])
> +;; [vrshrq_n_s, vrshrq_n_u]
> +;; [vshrq_n_s, vshrq_n_u]
>  ;;
>  ;; Version that takes an immediate as operand 2.
> -(define_insn "mve_vshrq_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")
>  		       (match_operand:SI 2 "<MVE_pred2>"
> "<MVE_constraint2>")]
> -	 VSHRQ_N))
> +	 MVE_VSHRQ_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vshr.<supf><V_sz_elem>\t%q0, %q1, %2"
> +  "<mve_insn>.<supf><V_sz_elem>\t%q0, %q1, %2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -1401,21 +1402,6 @@ (define_insn "mve_vqshluq_n_s<mode>"
>    [(set_attr "type" "mve_move")
>  ])
> 
> -;;
> -;; [vrshrq_n_s, vrshrq_n_u])
> -;;
> -(define_insn "mve_vrshrq_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand"
> "w")
> -		       (match_operand:SI 2 "<MVE_pred2>"
> "<MVE_constraint2>")]
> -	 VRSHRQ_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vrshr.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -])
> -
>  ;;
>  ;; [vabdq_f]
>  ;;
> @@ -4661,35 +4647,19 @@ (define_insn
> "@mve_<mve_insn>q_m_n_<supf><mode>"
> 
>  ;;
>  ;; [vrshrq_m_n_s, vrshrq_m_n_u])
> -;;
> -(define_insn "mve_vrshrq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:SI 3 "<MVE_pred2>"
> "<MVE_constraint2>")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VRSHRQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vrshrt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vshrq_m_n_s, vshrq_m_n_u])
>  ;;
> -(define_insn "mve_vshrq_m_n_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>  	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
>  		       (match_operand:MVE_2 2 "s_register_operand" "w")
>  		       (match_operand:SI 3 "<MVE_pred2>"
> "<MVE_constraint2>")
>  		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VSHRQ_M_N))
> +	 MVE_VSHRQ_M_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vshrt.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

* RE: [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq
  2023-05-05  8:39 ` [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq Christophe Lyon
@ 2023-05-05 11:07   ` Kyrylo Tkachov
  0 siblings, 0 replies; 46+ messages in thread
From: Kyrylo Tkachov @ 2023-05-05 11:07 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 5, 2023 9:40 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq
> 
> Implement vshrq and vrshrq using the new MVE builtins framework.

Ok.
Looking forward to more of the transition!
Kyrill

> 
> 2022-09-08  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-base.cc (vrshrq, vshrq): New.
> 	* config/arm/arm-mve-builtins-base.def (vrshrq, vshrq): New.
> 	* config/arm/arm-mve-builtins-base.h (vrshrq, vshrq): New.
> 	* config/arm/arm_mve.h (vshrq): Remove.
> 	(vrshrq): Remove.
> 	(vrshrq_m): Remove.
> 	(vshrq_m): Remove.
> 	(vrshrq_x): Remove.
> 	(vshrq_x): Remove.
> 	(vshrq_n_s8): Remove.
> 	(vshrq_n_s16): Remove.
> 	(vshrq_n_s32): Remove.
> 	(vshrq_n_u8): Remove.
> 	(vshrq_n_u16): Remove.
> 	(vshrq_n_u32): Remove.
> 	(vrshrq_n_u8): Remove.
> 	(vrshrq_n_s8): Remove.
> 	(vrshrq_n_u16): Remove.
> 	(vrshrq_n_s16): Remove.
> 	(vrshrq_n_u32): Remove.
> 	(vrshrq_n_s32): Remove.
> 	(vrshrq_m_n_s8): Remove.
> 	(vrshrq_m_n_s32): Remove.
> 	(vrshrq_m_n_s16): Remove.
> 	(vrshrq_m_n_u8): Remove.
> 	(vrshrq_m_n_u32): Remove.
> 	(vrshrq_m_n_u16): Remove.
> 	(vshrq_m_n_s8): Remove.
> 	(vshrq_m_n_s32): Remove.
> 	(vshrq_m_n_s16): Remove.
> 	(vshrq_m_n_u8): Remove.
> 	(vshrq_m_n_u32): Remove.
> 	(vshrq_m_n_u16): Remove.
> 	(vrshrq_x_n_s8): Remove.
> 	(vrshrq_x_n_s16): Remove.
> 	(vrshrq_x_n_s32): Remove.
> 	(vrshrq_x_n_u8): Remove.
> 	(vrshrq_x_n_u16): Remove.
> 	(vrshrq_x_n_u32): Remove.
> 	(vshrq_x_n_s8): Remove.
> 	(vshrq_x_n_s16): Remove.
> 	(vshrq_x_n_s32): Remove.
> 	(vshrq_x_n_u8): Remove.
> 	(vshrq_x_n_u16): Remove.
> 	(vshrq_x_n_u32): Remove.
> 	(__arm_vshrq_n_s8): Remove.
> 	(__arm_vshrq_n_s16): Remove.
> 	(__arm_vshrq_n_s32): Remove.
> 	(__arm_vshrq_n_u8): Remove.
> 	(__arm_vshrq_n_u16): Remove.
> 	(__arm_vshrq_n_u32): Remove.
> 	(__arm_vrshrq_n_u8): Remove.
> 	(__arm_vrshrq_n_s8): Remove.
> 	(__arm_vrshrq_n_u16): Remove.
> 	(__arm_vrshrq_n_s16): Remove.
> 	(__arm_vrshrq_n_u32): Remove.
> 	(__arm_vrshrq_n_s32): Remove.
> 	(__arm_vrshrq_m_n_s8): Remove.
> 	(__arm_vrshrq_m_n_s32): Remove.
> 	(__arm_vrshrq_m_n_s16): Remove.
> 	(__arm_vrshrq_m_n_u8): Remove.
> 	(__arm_vrshrq_m_n_u32): Remove.
> 	(__arm_vrshrq_m_n_u16): Remove.
> 	(__arm_vshrq_m_n_s8): Remove.
> 	(__arm_vshrq_m_n_s32): Remove.
> 	(__arm_vshrq_m_n_s16): Remove.
> 	(__arm_vshrq_m_n_u8): Remove.
> 	(__arm_vshrq_m_n_u32): Remove.
> 	(__arm_vshrq_m_n_u16): Remove.
> 	(__arm_vrshrq_x_n_s8): Remove.
> 	(__arm_vrshrq_x_n_s16): Remove.
> 	(__arm_vrshrq_x_n_s32): Remove.
> 	(__arm_vrshrq_x_n_u8): Remove.
> 	(__arm_vrshrq_x_n_u16): Remove.
> 	(__arm_vrshrq_x_n_u32): Remove.
> 	(__arm_vshrq_x_n_s8): Remove.
> 	(__arm_vshrq_x_n_s16): Remove.
> 	(__arm_vshrq_x_n_s32): Remove.
> 	(__arm_vshrq_x_n_u8): Remove.
> 	(__arm_vshrq_x_n_u16): Remove.
> 	(__arm_vshrq_x_n_u32): Remove.
> 	(__arm_vshrq): Remove.
> 	(__arm_vrshrq): Remove.
> 	(__arm_vrshrq_m): Remove.
> 	(__arm_vshrq_m): Remove.
> 	(__arm_vrshrq_x): Remove.
> 	(__arm_vshrq_x): Remove.
> ---
>  gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
>  gcc/config/arm/arm-mve-builtins-base.def |   2 +
>  gcc/config/arm/arm-mve-builtins-base.h   |   2 +
>  gcc/config/arm/arm_mve.h                 | 628 -----------------------
>  4 files changed, 6 insertions(+), 628 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index e7d2e0abffc..bb585a3921f 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -225,9 +225,11 @@ FUNCTION_WITHOUT_N_NO_F (vrmulhq,
> VRMULHQ)
>  FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
>  FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
>  FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
> +FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
>  FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
>  FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
>  FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
> +FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
>  FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
>  FUNCTION (vuninitializedq, vuninitializedq_impl,)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 50cb2d055e9..33c95c02396 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -51,10 +51,12 @@ DEF_MVE_FUNCTION (vrmulhq, binary, all_integer,
> mx_or_none)
>  DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r"
> forms do not support the "x" predicate
>  DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32,
> m_or_none)
>  DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32,
> m_or_none)
> +DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
>  DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
>  #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index fcac772bc5b..2a230f5f34d 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -55,9 +55,11 @@ extern const function_base *const vrmulhq;
>  extern const function_base *const vrshlq;
>  extern const function_base *const vrshrnbq;
>  extern const function_base *const vrshrntq;
> +extern const function_base *const vrshrq;
>  extern const function_base *const vshlq;
>  extern const function_base *const vshrnbq;
>  extern const function_base *const vshrntq;
> +extern const function_base *const vshrq;
>  extern const function_base *const vsubq;
>  extern const function_base *const vuninitializedq;
> 
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index b2701f1135d..89de7e0e46b 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -57,7 +57,6 @@
>  #define vrev64q(__a) __arm_vrev64q(__a)
>  #define vqabsq(__a) __arm_vqabsq(__a)
>  #define vqnegq(__a) __arm_vqnegq(__a)
> -#define vshrq(__a, __imm) __arm_vshrq(__a, __imm)
>  #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
>  #define vcmpneq(__a, __b) __arm_vcmpneq(__a, __b)
>  #define vornq(__a, __b) __arm_vornq(__a, __b)
> @@ -79,7 +78,6 @@
>  #define vmaxavq(__a, __b) __arm_vmaxavq(__a, __b)
>  #define vmaxaq(__a, __b) __arm_vmaxaq(__a, __b)
>  #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
> -#define vrshrq(__a, __imm) __arm_vrshrq(__a, __imm)
>  #define vcmpltq(__a, __b) __arm_vcmpltq(__a, __b)
>  #define vcmpleq(__a, __b) __arm_vcmpleq(__a, __b)
>  #define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
> @@ -221,8 +219,6 @@
>  #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b,
> __c, __p)
>  #define vqrdmlsdhq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
> -#define vrshrq_m(__inactive, __a, __imm, __p) __arm_vrshrq_m(__inactive,
> __a, __imm, __p)
> -#define vshrq_m(__inactive, __a, __imm, __p) __arm_vshrq_m(__inactive,
> __a, __imm, __p)
>  #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm,
> __p)
>  #define vmlaldavaq_p(__a, __b, __c, __p) __arm_vmlaldavaq_p(__a, __b,
> __c, __p)
>  #define vmlaldavaxq_p(__a, __b, __c, __p) __arm_vmlaldavaxq_p(__a, __b,
> __c, __p)
> @@ -334,8 +330,6 @@
>  #define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
>  #define vshllbq_x(__a, __imm, __p) __arm_vshllbq_x(__a, __imm, __p)
>  #define vshlltq_x(__a, __imm, __p) __arm_vshlltq_x(__a, __imm, __p)
> -#define vrshrq_x(__a, __imm, __p) __arm_vrshrq_x(__a, __imm, __p)
> -#define vshrq_x(__a, __imm, __p) __arm_vshrq_x(__a, __imm, __p)
>  #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
>  #define vadciq_m(__inactive, __a, __b, __carry_out, __p)
> __arm_vadciq_m(__inactive, __a, __b, __carry_out, __p)
>  #define vadcq(__a, __b, __carry) __arm_vadcq(__a, __b, __carry)
> @@ -565,12 +559,6 @@
>  #define vcvtq_n_s32_f32(__a,  __imm6) __arm_vcvtq_n_s32_f32(__a,
> __imm6)
>  #define vcvtq_n_u16_f16(__a,  __imm6) __arm_vcvtq_n_u16_f16(__a,
> __imm6)
>  #define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,
> __imm6)
> -#define vshrq_n_s8(__a,  __imm) __arm_vshrq_n_s8(__a,  __imm)
> -#define vshrq_n_s16(__a,  __imm) __arm_vshrq_n_s16(__a,  __imm)
> -#define vshrq_n_s32(__a,  __imm) __arm_vshrq_n_s32(__a,  __imm)
> -#define vshrq_n_u8(__a,  __imm) __arm_vshrq_n_u8(__a,  __imm)
> -#define vshrq_n_u16(__a,  __imm) __arm_vshrq_n_u16(__a,  __imm)
> -#define vshrq_n_u32(__a,  __imm) __arm_vshrq_n_u32(__a,  __imm)
>  #define vaddlvq_p_s32(__a, __p) __arm_vaddlvq_p_s32(__a, __p)
>  #define vaddlvq_p_u32(__a, __p) __arm_vaddlvq_p_u32(__a, __p)
>  #define vcmpneq_s8(__a, __b) __arm_vcmpneq_s8(__a, __b)
> @@ -602,7 +590,6 @@
>  #define vmaxavq_s8(__a, __b) __arm_vmaxavq_s8(__a, __b)
>  #define vmaxaq_s8(__a, __b) __arm_vmaxaq_s8(__a, __b)
>  #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
> -#define vrshrq_n_u8(__a,  __imm) __arm_vrshrq_n_u8(__a,  __imm)
>  #define vcmpneq_n_s8(__a, __b) __arm_vcmpneq_n_s8(__a, __b)
>  #define vcmpltq_s8(__a, __b) __arm_vcmpltq_s8(__a, __b)
>  #define vcmpltq_n_s8(__a, __b) __arm_vcmpltq_n_s8(__a, __b)
> @@ -632,7 +619,6 @@
>  #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
>  #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
>  #define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
> -#define vrshrq_n_s8(__a,  __imm) __arm_vrshrq_n_s8(__a,  __imm)
>  #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
>  #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
>  #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
> @@ -656,7 +642,6 @@
>  #define vmaxavq_s16(__a, __b) __arm_vmaxavq_s16(__a, __b)
>  #define vmaxaq_s16(__a, __b) __arm_vmaxaq_s16(__a, __b)
>  #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
> -#define vrshrq_n_u16(__a,  __imm) __arm_vrshrq_n_u16(__a,  __imm)
>  #define vcmpneq_n_s16(__a, __b) __arm_vcmpneq_n_s16(__a, __b)
>  #define vcmpltq_s16(__a, __b) __arm_vcmpltq_s16(__a, __b)
>  #define vcmpltq_n_s16(__a, __b) __arm_vcmpltq_n_s16(__a, __b)
> @@ -686,7 +671,6 @@
>  #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
>  #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
>  #define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
> -#define vrshrq_n_s16(__a,  __imm) __arm_vrshrq_n_s16(__a,  __imm)
>  #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
>  #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
>  #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
> @@ -710,7 +694,6 @@
>  #define vmaxavq_s32(__a, __b) __arm_vmaxavq_s32(__a, __b)
>  #define vmaxaq_s32(__a, __b) __arm_vmaxaq_s32(__a, __b)
>  #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
> -#define vrshrq_n_u32(__a,  __imm) __arm_vrshrq_n_u32(__a,  __imm)
>  #define vcmpneq_n_s32(__a, __b) __arm_vcmpneq_n_s32(__a, __b)
>  #define vcmpltq_s32(__a, __b) __arm_vcmpltq_s32(__a, __b)
>  #define vcmpltq_n_s32(__a, __b) __arm_vcmpltq_n_s32(__a, __b)
> @@ -740,7 +723,6 @@
>  #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
>  #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
>  #define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
> -#define vrshrq_n_s32(__a,  __imm) __arm_vrshrq_n_s32(__a,  __imm)
>  #define vqmovntq_u16(__a, __b) __arm_vqmovntq_u16(__a, __b)
>  #define vqmovnbq_u16(__a, __b) __arm_vqmovnbq_u16(__a, __b)
>  #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
> @@ -1410,18 +1392,6 @@
>  #define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
>  #define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
> -#define vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s8(__inactive, __a,  __imm, __p)
> -#define vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s32(__inactive, __a,  __imm, __p)
> -#define vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_s16(__inactive, __a,  __imm, __p)
> -#define vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u8(__inactive, __a,  __imm, __p)
> -#define vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u32(__inactive, __a,  __imm, __p)
> -#define vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vrshrq_m_n_u16(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_s8(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s8(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_s32(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s32(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_s16(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_s16(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_u8(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_u8(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_u32(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_u32(__inactive, __a,  __imm, __p)
> -#define vshrq_m_n_u16(__inactive, __a,  __imm, __p)
> __arm_vshrq_m_n_u16(__inactive, __a,  __imm, __p)
>  #define vsliq_m_n_s8(__a, __b,  __imm, __p) __arm_vsliq_m_n_s8(__a, __b,
> __imm, __p)
>  #define vsliq_m_n_s32(__a, __b,  __imm, __p) __arm_vsliq_m_n_s32(__a,
> __b,  __imm, __p)
>  #define vsliq_m_n_s16(__a, __b,  __imm, __p) __arm_vsliq_m_n_s16(__a,
> __b,  __imm, __p)
> @@ -1914,18 +1884,6 @@
>  #define vshlltq_x_n_s16(__a,  __imm, __p) __arm_vshlltq_x_n_s16(__a,
> __imm, __p)
>  #define vshlltq_x_n_u8(__a,  __imm, __p) __arm_vshlltq_x_n_u8(__a,
> __imm, __p)
>  #define vshlltq_x_n_u16(__a,  __imm, __p) __arm_vshlltq_x_n_u16(__a,
> __imm, __p)
> -#define vrshrq_x_n_s8(__a,  __imm, __p) __arm_vrshrq_x_n_s8(__a,
> __imm, __p)
> -#define vrshrq_x_n_s16(__a,  __imm, __p) __arm_vrshrq_x_n_s16(__a,
> __imm, __p)
> -#define vrshrq_x_n_s32(__a,  __imm, __p) __arm_vrshrq_x_n_s32(__a,
> __imm, __p)
> -#define vrshrq_x_n_u8(__a,  __imm, __p) __arm_vrshrq_x_n_u8(__a,
> __imm, __p)
> -#define vrshrq_x_n_u16(__a,  __imm, __p) __arm_vrshrq_x_n_u16(__a,
> __imm, __p)
> -#define vrshrq_x_n_u32(__a,  __imm, __p) __arm_vrshrq_x_n_u32(__a,
> __imm, __p)
> -#define vshrq_x_n_s8(__a,  __imm, __p) __arm_vshrq_x_n_s8(__a,  __imm,
> __p)
> -#define vshrq_x_n_s16(__a,  __imm, __p) __arm_vshrq_x_n_s16(__a,
> __imm, __p)
> -#define vshrq_x_n_s32(__a,  __imm, __p) __arm_vshrq_x_n_s32(__a,
> __imm, __p)
> -#define vshrq_x_n_u8(__a,  __imm, __p) __arm_vshrq_x_n_u8(__a,  __imm,
> __p)
> -#define vshrq_x_n_u16(__a,  __imm, __p) __arm_vshrq_x_n_u16(__a,
> __imm, __p)
> -#define vshrq_x_n_u32(__a,  __imm, __p) __arm_vshrq_x_n_u32(__a,
> __imm, __p)
>  #define vdupq_x_n_f16(__a, __p) __arm_vdupq_x_n_f16(__a, __p)
>  #define vdupq_x_n_f32(__a, __p) __arm_vdupq_x_n_f32(__a, __p)
>  #define vminnmq_x_f16(__a, __b, __p) __arm_vminnmq_x_f16(__a, __b,
> __p)
> @@ -2659,47 +2617,6 @@ __arm_vpnot (mve_pred16_t __a)
>    return __builtin_mve_vpnotv16bi (__a);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_s8 (int8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_sv16qi (__a, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_s16 (int16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_sv8hi (__a, __imm);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_s32 (int32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_sv4si (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_u8 (uint8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_uv16qi (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_u16 (uint16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_uv8hi (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_n_u32 (uint32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vshrq_n_uv4si (__a, __imm);
> -}
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vaddlvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
> @@ -2919,13 +2836,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
>    return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_u8 (uint8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_uv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s8 (int8x16_t __a, int8_t __b)
> @@ -3129,13 +3039,6 @@ __arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
>    return __builtin_mve_vaddvaq_sv16qi (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_s8 (int8x16_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_sv16qi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
> @@ -3299,13 +3202,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t
> __b)
>    return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_u16 (uint16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_uv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s16 (int16x8_t __a, int16_t __b)
> @@ -3509,13 +3405,6 @@ __arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
>    return __builtin_mve_vaddvaq_sv8hi (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_s16 (int16x8_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_sv8hi (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
> @@ -3679,13 +3568,6 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t
> __b)
>    return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_u32 (uint32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_uv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq_n_s32 (int32x4_t __a, int32_t __b)
> @@ -3889,13 +3771,6 @@ __arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
>    return __builtin_mve_vaddvaq_sv4si (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_n_s32 (int32x4_t __a, const int __imm)
> -{
> -  return __builtin_mve_vrshrq_n_sv4si (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovntq_u16 (uint8x16_t __a, uint16x8_t __b)
> @@ -7437,90 +7312,6 @@ __arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve
>    return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv8hi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv16qi (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv4si (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv8hi (__inactive, __a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsliq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -10496,90 +10287,6 @@ __arm_vshlltq_x_n_u16 (uint16x8_t __a, const
> int __imm, mve_pred16_t __p)
>    return __builtin_mve_vshlltq_m_n_uv8hi (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv16qi (__arm_vuninitializedq_s8 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv8hi (__arm_vuninitializedq_s16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a,
> __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv16qi (__arm_vuninitializedq_u8 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t
> __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv8hi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t
> __p)
> -{
> -  return __builtin_mve_vrshrq_m_n_uv4si (__arm_vuninitializedq_u32 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_s8 (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a,
> __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a,
> __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a,
> __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_u8 (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv16qi (__arm_vuninitializedq_u8 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv8hi (__arm_vuninitializedq_u16 (),
> __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> -  return __builtin_mve_vshrq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a,
> __imm, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vadciq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
> @@ -14533,48 +14240,6 @@ __arm_vaddlvq (uint32x4_t __a)
>   return __arm_vaddlvq_u32 (__a);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (int8x16_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_s8 (__a, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (int16x8_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_s16 (__a, __imm);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (int32x4_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_s32 (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (uint8x16_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_u8 (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (uint16x8_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_u16 (__a, __imm);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq (uint32x4_t __a, const int __imm)
> -{
> - return __arm_vshrq_n_u32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline int64_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vaddlvq_p (int32x4_t __a, mve_pred16_t __p)
> @@ -14792,13 +14457,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u8 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (uint8x16_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_u8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int8x16_t __a, int8_t __b)
> @@ -15002,13 +14660,6 @@ __arm_vaddvaq (int32_t __a, int8x16_t __b)
>   return __arm_vaddvaq_s8 (__a, __b);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (int8x16_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_s8 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (uint16x8_t __a, uint16x8_t __b)
> @@ -15170,13 +14821,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u16 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (uint16x8_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_u16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int16x8_t __a, int16_t __b)
> @@ -15380,13 +15024,6 @@ __arm_vaddvaq (int32_t __a, int16x8_t __b)
>   return __arm_vaddvaq_s16 (__a, __b);
>  }
> 
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (int16x8_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_s16 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vornq (uint32x4_t __a, uint32x4_t __b)
> @@ -15548,13 +15185,6 @@ __arm_vbrsrq (uint32x4_t __a, int32_t __b)
>   return __arm_vbrsrq_n_u32 (__a, __b);
>  }
> 
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (uint32x4_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_u32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline mve_pred16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vcmpneq (int32x4_t __a, int32_t __b)
> @@ -15758,13 +15388,6 @@ __arm_vaddvaq (int32_t __a, int32x4_t __b)
>   return __arm_vaddvaq_s32 (__a, __b);
>  }
> 
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq (int32x4_t __a, const int __imm)
> -{
> - return __arm_vrshrq_n_s32 (__a, __imm);
> -}
> -
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqmovntq (uint8x16_t __a, uint16x8_t __b)
> @@ -19265,90 +18888,6 @@ __arm_vqrdmlsdhxq_m (int16x8_t __inactive,
> int16x8_t __a, int16x8_t __b, mve_pre
>   return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_s8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_s32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_s16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (uint8x16_t __inactive, uint8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_u8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (uint32x4_t __inactive, uint32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_u32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_m (uint16x8_t __inactive, uint16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vrshrq_m_n_u16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (int8x16_t __inactive, int8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_s8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (int32x4_t __inactive, int32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_s32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (int16x8_t __inactive, int16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_s16 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (uint8x16_t __inactive, uint8x16_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_u8 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (uint32x4_t __inactive, uint32x4_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_u32 (__inactive, __a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_m (uint16x8_t __inactive, uint16x8_t __a, const int __imm,
> mve_pred16_t __p)
> -{
> - return __arm_vshrq_m_n_u16 (__inactive, __a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vsliq_m (int8x16_t __a, int8x16_t __b, const int __imm,
> mve_pred16_t __p)
> @@ -21827,90 +21366,6 @@ __arm_vshlltq_x (uint16x8_t __a, const int
> __imm, mve_pred16_t __p)
>   return __arm_vshlltq_x_n_u16 (__a, __imm, __p);
>  }
> 
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_s8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_s16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_s32 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_u8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_u16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vrshrq_x (uint32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vrshrq_x_n_u32 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (int8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_s8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (int16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_s16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (int32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_s32 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (uint8x16_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_u8 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (uint16x8_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_u16 (__a, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshrq_x (uint32x4_t __a, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshrq_x_n_u32 (__a, __imm, __p);
> -}
> -
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vadciq (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
> @@ -25121,15 +24576,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_f16_u16
> (__ARM_mve_coerce(__p0, uint16x8_t)), \
>    int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_f32_u32
> (__ARM_mve_coerce(__p0, uint32x4_t)));})
> 
> -#define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vcvtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_n_f16_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> @@ -25394,24 +24840,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshllbq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
>    int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshllbq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1));})
> 
> -#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
> -#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> @@ -26935,15 +26363,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int16x8_t]: __arm_vqnegq_s16
> (__ARM_mve_coerce(__p0, int16x8_t)), \
>    int (*)[__ARM_mve_type_int32x4_t]: __arm_vqnegq_s32
> (__ARM_mve_coerce(__p0, int32x4_t)));})
> 
> -#define __arm_vshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -26966,15 +26385,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
>    int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1));})
> 
> -#define __arm_vrshrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_n_s8
> (__ARM_mve_coerce(__p0, int8x16_t), p1), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_n_s16
> (__ARM_mve_coerce(__p0, int16x8_t), p1), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_n_s32
> (__ARM_mve_coerce(__p0, int32x4_t), p1), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_n_u8
> (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_n_u16
> (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_n_u32
> (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
> -
>  #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> @@ -28128,15 +27538,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_x_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2), \
>    int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_x_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2));})
> 
> -#define __arm_vrshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrshrq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrshrq_x_n_s16
> (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrshrq_x_n_s32
> (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrshrq_x_n_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrshrq_x_n_u16
> (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrshrq_x_n_u32
> (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vshllbq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
>    int (*)[__ARM_mve_type_int8x16_t]: __arm_vshllbq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> @@ -28211,15 +27612,6 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_int_n]: __arm_vddupq_x_n_u32 ((uint32_t) __p1,
> p2, p3), \
>    int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_x_wb_u32
> (__ARM_mve_coerce(__p1, uint32_t *), p2, p3));})
> 
> -#define __arm_vshrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
> -  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshrq_x_n_s8
> (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshrq_x_n_s16
> (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshrq_x_n_s32
> (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshrq_x_n_u8
> (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshrq_x_n_u16
> (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshrq_x_n_u32
> (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
> -
>  #define __arm_vhcaddq_rot270_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0,
> \
> @@ -28366,26 +27758,6 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2,
> int), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve
> _type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2,
> int), p3));})
> 
> -#define __arm_vrshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vrshrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vrshrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vrshrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vrshrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vrshrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vrshrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
> -
> -#define __arm_vshrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> -  __typeof(p1) __p1 = (p1); \
> -  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> -  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]:
> __arm_vshrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t),
> __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]:
> __arm_vshrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t),
> __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]:
> __arm_vshrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t),
> __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]:
> __arm_vshrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t),
> __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]:
> __arm_vshrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t),
> __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
> -  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]:
> __arm_vshrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t),
> __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
> -
>  #define __arm_vsliq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0,
> \
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 46+ messages in thread

end of thread, other threads:[~2023-05-05 11:07 UTC | newest]

Thread overview: 46+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-05  8:39 [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Christophe Lyon
2023-05-05  8:39 ` [PATCH 02/23] arm: [MVE intrinsics] factorize vqrshlq vrshlq Christophe Lyon
2023-05-05  9:58   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 03/23] arm: [MVE intrinsics] rework vrshlq vqrshlq Christophe Lyon
2023-05-05  9:59   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 04/23] arm: [MVE intrinsics] factorize vqshlq vshlq Christophe Lyon
2023-05-05 10:00   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 05/23] arm: [MVE intrinsics] rework vqrdmulhq Christophe Lyon
2023-05-05 10:01   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 06/23] arm: [MVE intrinsics] factorize vabdq Christophe Lyon
2023-05-05 10:48   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 07/23] arm: [MVE intrinsics] rework vabdq Christophe Lyon
2023-05-05 10:49   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 08/23] arm: [MVE intrinsics] add binary_lshift shape Christophe Lyon
2023-05-05 10:51   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 09/23] arm: [MVE intrinsics] add support for MODE_r Christophe Lyon
2023-05-05 10:55   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 10/23] arm: [MVE intrinsics] add binary_lshift_r shape Christophe Lyon
2023-05-05 10:56   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 11/23] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vshl Christophe Lyon
2023-05-05 10:56   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 12/23] arm: [MVE intrinsics] rework vqshlq vshlq Christophe Lyon
2023-05-05 10:58   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 13/23] arm: [MVE intrinsics] factorize vmaxq vminq Christophe Lyon
2023-05-05 10:58   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 14/23] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-05 10:59   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 15/23] arm: [MVE intrinsics] add binary_rshift_narrow shape Christophe Lyon
2023-05-05 11:00   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 16/23] arm: [MVE intrinsics] factorize vshrntq vshrnbq vrshrnbq vrshrntq vqshrnbq vqshrntq vqrshrnbq vqrshrntq Christophe Lyon
2023-05-05 11:00   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 17/23] arm: [MVE intrinsics] rework vshrnbq vshrntq " Christophe Lyon
2023-05-05 11:02   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 18/23] arm: [MVE intrinsics] add binary_rshift_narrow_unsigned shape Christophe Lyon
2023-05-05 11:03   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 19/23] arm: [MVE intrinsics] factorize vqrshrunb vqrshrunt vqshrunb vqshrunt Christophe Lyon
2023-05-05 11:04   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 20/23] arm: [MVE intrinsics] rework vqrshrunbq vqrshruntq vqshrunbq vqshruntq Christophe Lyon
2023-05-05 11:05   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 21/23] arm: [MVE intrinsics] add binary_rshift shape Christophe Lyon
2023-05-05 11:05   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 22/23] arm: [MVE intrinsics] factorize vsrhrq vrshrq Christophe Lyon
2023-05-05 11:06   ` Kyrylo Tkachov
2023-05-05  8:39 ` [PATCH 23/23] arm: [MVE intrinsics] rework vshrq vrshrq Christophe Lyon
2023-05-05 11:07   ` Kyrylo Tkachov
2023-05-05  9:55 ` [PATCH 01/23] arm: [MVE intrinsics] add binary_round_lshift shape Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).