public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq
@ 2023-05-11 12:18 Christophe Lyon
  2023-05-11 12:18 ` [PATCH 02/24] arm: [MVE intrinsics] add unary_widen_acc shape Christophe Lyon
                   ` (23 more replies)
  0 siblings, 24 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:18 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vaddlvaq builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vaddlva.
	* config/arm/mve.md (mve_vaddlvaq_<supf>v4si): Rename into ...
	(@mve_<mve_insn>q_<supf>v4si): ... this.
	(mve_vaddlvaq_p_<supf>v4si): Rename into ...
	(@mve_<mve_insn>q_p_<supf>v4si): ... this.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 2f6de937ef7..ff146afd913 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -759,6 +759,8 @@ (define_int_attr mve_insn [
 		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 		 (VABSQ_M_F "vabs")
 		 (VABSQ_M_S "vabs")
+		 (VADDLVAQ_P_S "vaddlva") (VADDLVAQ_P_U "vaddlva")
+		 (VADDLVAQ_S "vaddlva") (VADDLVAQ_U "vaddlva")
 		 (VADDLVQ_P_S "vaddlv") (VADDLVQ_P_U "vaddlv")
 		 (VADDLVQ_S "vaddlv") (VADDLVQ_U "vaddlv")
 		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd") (VADDQ_M_N_F "vadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index f5cb8ef48ef..b548eced4f5 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1222,7 +1222,7 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
 ;;
 ;; [vaddlvaq_s vaddlvaq_u])
 ;;
-(define_insn "mve_vaddlvaq_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
@@ -1230,7 +1230,7 @@ (define_insn "mve_vaddlvaq_<supf>v4si"
 	 VADDLVAQ))
   ]
   "TARGET_HAVE_MVE"
-  "vaddlva.<supf>32\t%Q0, %R0, %q2"
+  "<mve_insn>.<supf>32\t%Q0, %R0, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2534,7 +2534,7 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
 ;;
 ;; [vaddlvaq_p_s vaddlvaq_p_u])
 ;;
-(define_insn "mve_vaddlvaq_p_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_p_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
@@ -2543,7 +2543,7 @@ (define_insn "mve_vaddlvaq_p_<supf>v4si"
 	 VADDLVAQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vaddlvat.<supf>32\t%Q0, %R0, %q2"
+  "vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 02/24] arm: [MVE intrinsics] add unary_widen_acc shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
@ 2023-05-11 12:18 ` Christophe Lyon
  2023-05-11 12:18 ` [PATCH 03/24] arm: [MVE intrinsics] rework vaddlvaq Christophe Lyon
                   ` (22 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:18 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_widen_acc shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_widen_acc): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_widen_acc): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 34 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 35 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ae73fc6b1b7..a7faf8299cb 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1282,6 +1282,40 @@ struct unary_widen_def : public overloaded_base<0>
 };
 SHAPE (unary_widen)
 
+/* <S0:twice>_t vfoo[_<t0>](<S0:twice>_t, <T0>_t)
+
+   i.e. a version of "unary" in which the source elements are half the
+   size of the destination scalar and accumulator, but have the same
+   type class.
+
+   Example: vaddlvaq.
+   int64_t [__arm_]vaddlvaq[_s32](int64_t a, int32x4_t b)
+   int64_t [__arm_]vaddlvaq_p[_s32](int64_t a, int32x4_t b, mve_pred16_t p)  */
+struct unary_widen_acc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sw0,sw0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !r.require_derived_scalar_type (0, r.SAME_TYPE_CLASS)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_widen_acc)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 5a8d9fe2b2d..46cc26ef918 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -59,6 +59,7 @@ namespace arm_mve
     extern const function_shape *const unary_int32_acc;
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_widen;
+    extern const function_shape *const unary_widen_acc;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 03/24] arm: [MVE intrinsics] rework vaddlvaq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
  2023-05-11 12:18 ` [PATCH 02/24] arm: [MVE intrinsics] add unary_widen_acc shape Christophe Lyon
@ 2023-05-11 12:18 ` Christophe Lyon
  2023-05-11 12:18 ` [PATCH 04/24] arm: [MVE intrinsics] add binary_acc_int32 shape Christophe Lyon
                   ` (21 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:18 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vaddlvaq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vaddlvaq): New.
	* config/arm/arm-mve-builtins-base.def (vaddlvaq): New.
	* config/arm/arm-mve-builtins-base.h (vaddlvaq): New.
	* config/arm/arm_mve.h (vaddlvaq): Remove.
	(vaddlvaq_p): Remove.
	(vaddlvaq_u32): Remove.
	(vaddlvaq_s32): Remove.
	(vaddlvaq_p_s32): Remove.
	(vaddlvaq_p_u32): Remove.
	(__arm_vaddlvaq_u32): Remove.
	(__arm_vaddlvaq_s32): Remove.
	(__arm_vaddlvaq_p_s32): Remove.
	(__arm_vaddlvaq_p_u32): Remove.
	(__arm_vaddlvaq): Remove.
	(__arm_vaddlvaq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  1 +
 gcc/config/arm/arm-mve-builtins-base.def |  1 +
 gcc/config/arm/arm-mve-builtins-base.h   |  1 +
 gcc/config/arm/arm_mve.h                 | 74 ------------------------
 4 files changed, 3 insertions(+), 74 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 2dec15ac0b1..070a41c2d89 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -244,6 +244,7 @@ namespace arm_mve {
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
+FUNCTION_PRED_P_S_U (vaddlvaq, VADDLVAQ)
 FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index b0de5af1013..62d2050b86d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -20,6 +20,7 @@
 #define REQUIRES_FLOAT false
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vaddlvaq, unary_widen_acc, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vaddlvq, unary_acc, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vaddvaq, unary_int32_acc, all_integer, p_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index fa2e97fd461..59754a03977 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -25,6 +25,7 @@ namespace functions {
 
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
+extern const function_base *const vaddlvaq;
 extern const function_base *const vaddlvq;
 extern const function_base *const vaddq;
 extern const function_base *const vaddvaq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index c0891b7592a..8b61593c6b0 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -66,7 +66,6 @@
 #define vmlsldavq(__a, __b) __arm_vmlsldavq(__a, __b)
 #define vmlaldavxq(__a, __b) __arm_vmlaldavxq(__a, __b)
 #define vrmlaldavhq(__a, __b) __arm_vrmlaldavhq(__a, __b)
-#define vaddlvaq(__a, __b) __arm_vaddlvaq(__a, __b)
 #define vrmlsldavhxq(__a, __b) __arm_vrmlsldavhxq(__a, __b)
 #define vrmlsldavhq(__a, __b) __arm_vrmlsldavhq(__a, __b)
 #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
@@ -103,7 +102,6 @@
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
-#define vaddlvaq_p(__a, __b, __p) __arm_vaddlvaq_p(__a, __b, __p)
 #define vrmlaldavhq_p(__a, __b, __p) __arm_vrmlaldavhq_p(__a, __b, __p)
 #define vrmlaldavhxq_p(__a, __b, __p) __arm_vrmlaldavhxq_p(__a, __b, __p)
 #define vrmlsldavhq_p(__a, __b, __p) __arm_vrmlsldavhq_p(__a, __b, __p)
@@ -474,14 +472,12 @@
 #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
 #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
 #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
-#define vaddlvaq_u32(__a, __b) __arm_vaddlvaq_u32(__a, __b)
 #define vrmlsldavhxq_s32(__a, __b) __arm_vrmlsldavhxq_s32(__a, __b)
 #define vrmlsldavhq_s32(__a, __b) __arm_vrmlsldavhq_s32(__a, __b)
 #define vrmlaldavhxq_s32(__a, __b) __arm_vrmlaldavhxq_s32(__a, __b)
 #define vrmlaldavhq_s32(__a, __b) __arm_vrmlaldavhq_s32(__a, __b)
 #define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
 #define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
-#define vaddlvaq_s32(__a, __b) __arm_vaddlvaq_s32(__a, __b)
 #define vabavq_s8(__a, __b, __c) __arm_vabavq_s8(__a, __b, __c)
 #define vabavq_s16(__a, __b, __c) __arm_vabavq_s16(__a, __b, __c)
 #define vabavq_s32(__a, __b, __c) __arm_vabavq_s32(__a, __b, __c)
@@ -615,7 +611,6 @@
 #define vrmlaldavhaxq_s32(__a, __b, __c) __arm_vrmlaldavhaxq_s32(__a, __b, __c)
 #define vrmlsldavhaq_s32(__a, __b, __c) __arm_vrmlsldavhaq_s32(__a, __b, __c)
 #define vrmlsldavhaxq_s32(__a, __b, __c) __arm_vrmlsldavhaxq_s32(__a, __b, __c)
-#define vaddlvaq_p_s32(__a, __b, __p) __arm_vaddlvaq_p_s32(__a, __b, __p)
 #define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
@@ -624,7 +619,6 @@
 #define vrmlaldavhxq_p_s32(__a, __b, __p) __arm_vrmlaldavhxq_p_s32(__a, __b, __p)
 #define vrmlsldavhq_p_s32(__a, __b, __p) __arm_vrmlsldavhq_p_s32(__a, __b, __p)
 #define vrmlsldavhxq_p_s32(__a, __b, __p) __arm_vrmlsldavhxq_p_s32(__a, __b, __p)
-#define vaddlvaq_p_u32(__a, __b, __p) __arm_vaddlvaq_p_u32(__a, __b, __p)
 #define vrmlaldavhq_p_u32(__a, __b, __p) __arm_vrmlaldavhq_p_u32(__a, __b, __p)
 #define vmvnq_m_n_s16(__inactive,  __imm, __p) __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
 #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
@@ -2253,13 +2247,6 @@ __arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
   return __builtin_mve_vctp16q_mv8bi (__a, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_u32 (uint64_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vaddlvaq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlsldavhxq_s32 (int32x4_t __a, int32x4_t __b)
@@ -2288,13 +2275,6 @@ __arm_vrmlaldavhq_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vrmlaldavhq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_s32 (int64_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vaddlvaq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq_s8 (uint32_t __a, int8x16_t __b, int8x16_t __c)
@@ -3182,13 +3162,6 @@ __arm_vrmlsldavhaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
   return __builtin_mve_vrmlsldavhaxq_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_p_s32 (int64_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddlvaq_p_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -3217,13 +3190,6 @@ __arm_vrmlsldavhxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vrmlsldavhxq_p_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_p_u32 (uint64_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddlvaq_p_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -10047,13 +10013,6 @@ __arm_vrmlaldavhq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vrmlaldavhq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq (uint64_t __a, uint32x4_t __b)
-{
- return __arm_vaddlvaq_u32 (__a, __b);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlsldavhxq (int32x4_t __a, int32x4_t __b)
@@ -10082,13 +10041,6 @@ __arm_vrmlaldavhq (int32x4_t __a, int32x4_t __b)
  return __arm_vrmlaldavhq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq (int64_t __a, int32x4_t __b)
-{
- return __arm_vaddlvaq_s32 (__a, __b);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vabavq (uint32_t __a, int8x16_t __b, int8x16_t __c)
@@ -10964,13 +10916,6 @@ __arm_vrmlsldavhaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
  return __arm_vrmlsldavhaxq_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_p (int64_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vaddlvaq_p_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -10999,13 +10944,6 @@ __arm_vrmlsldavhxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vrmlsldavhxq_p_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvaq_p (uint64_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vaddlvaq_p_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -18698,18 +18636,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vaddlvaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vaddlvaq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddlvaq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vaddlvaq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vaddlvaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddlvaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 04/24] arm: [MVE intrinsics] add binary_acc_int32 shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
  2023-05-11 12:18 ` [PATCH 02/24] arm: [MVE intrinsics] add unary_widen_acc shape Christophe Lyon
  2023-05-11 12:18 ` [PATCH 03/24] arm: [MVE intrinsics] rework vaddlvaq Christophe Lyon
@ 2023-05-11 12:18 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 05/24] arm: [MVE intrinsics] factorize vmladav vmladavx vmlsdav vmlsdavx vmladava vmladavax vmlsdava vmlsdavax Christophe Lyon
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:18 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_acc_int32 shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_acc_int32): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_acc_int32): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 27 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index a7faf8299cb..e491c810b40 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -365,6 +365,33 @@ struct binary_def : public overloaded_base<0>
 };
 SHAPE (binary)
 
+/* <[u]int32>_t vfoo[_<t0>](<T0>_t, <T0>_t)
+
+   i.e. the shape for binary operations that operate on a pair of
+   vectors and produce an int32_t or an uint32_t depending on the
+   signedness of the input elements.
+
+   Example: vmladavq.
+   int32_t [__arm_]vmladavq[_s16](int16x8_t m1, int16x8_t m2)
+   int32_t [__arm_]vmladavq_p[_s16](int16x8_t m1, int16x8_t m2, mve_pred16_t p)  */
+struct binary_acc_int32_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx32,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2);
+  }
+};
+SHAPE (binary_acc_int32)
+
 /* <T0>_t vfoo[_n_t0](<T0>_t, const int)
 
    Shape for vector shift right operations that take a vector first
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 46cc26ef918..9e877c9591a 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -37,6 +37,7 @@ namespace arm_mve
     extern const function_shape *const binary;
     extern const function_shape *const binary_lshift;
     extern const function_shape *const binary_lshift_r;
+    extern const function_shape *const binary_acc_int32;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
     extern const function_shape *const binary_maxvminv;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 05/24] arm: [MVE intrinsics] factorize vmladav vmladavx vmlsdav vmlsdavx vmladava vmladavax vmlsdava vmlsdavax
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (2 preceding siblings ...)
  2023-05-11 12:18 ` [PATCH 04/24] arm: [MVE intrinsics] add binary_acc_int32 shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 06/24] arm: [MVE intrinsics] rework vmladavq vmladavxq vmlsdavq vmlsdavxq Christophe Lyon
                   ` (19 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmladav, vmladavx, vmlsdav, vmlsdavx, vmladava, vmladavax,
vmlsdava, vmlsdavax builtins so that they use the same parameterized
names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VMLxDAVQ, MVE_VMLxDAVQ_P)
	(MVE_VMLxDAVAQ, MVE_VMLxDAVAQ_P): New.
	(mve_insn): Add vmladava, vmladavax, vmladav, vmladavx, vmlsdava,
	vmlsdavax, vmlsdav, vmlsdavx.
	(supf): Add VMLADAVAXQ_P_S, VMLADAVAXQ_S, VMLADAVXQ_P_S,
	VMLADAVXQ_S, VMLSDAVAQ_P_S, VMLSDAVAQ_S, VMLSDAVAXQ_P_S,
	VMLSDAVAXQ_S, VMLSDAVQ_P_S, VMLSDAVQ_S, VMLSDAVXQ_P_S,
	VMLSDAVXQ_S.
	* config/arm/mve.md (mve_vmladavq_<supf><mode>)
	(mve_vmladavxq_s<mode>, mve_vmlsdavq_s<mode>)
	(mve_vmlsdavxq_s<mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vmlsdavaq_s<mode>, mve_vmladavaxq_s<mode>)
	(mve_vmlsdavaxq_s<mode>, mve_vmladavaq_<supf><mode>): Merge into
	...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vmladavq_p_<supf><mode>, mve_vmladavxq_p_s<mode>)
	(mve_vmlsdavq_p_s<mode>, mve_vmlsdavxq_p_s<mode>): Merge into ...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
	(mve_vmladavaq_p_<supf><mode>, mve_vmladavaxq_p_s<mode>)
	(mve_vmlsdavaq_p_s<mode>, mve_vmlsdavaxq_p_s<mode>): Merge into
	...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  56 +++++++++
 gcc/config/arm/mve.md       | 236 +++++-------------------------------
 2 files changed, 84 insertions(+), 208 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index ff146afd913..68f5314041b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -699,6 +699,34 @@ (define_int_iterator MVE_VMAXAVMINAQ_M [
 		     VMINAQ_M_S
 		     ])
 
+(define_int_iterator MVE_VMLxDAVQ [
+		     VMLADAVQ_S VMLADAVQ_U
+		     VMLADAVXQ_S
+		     VMLSDAVQ_S
+		     VMLSDAVXQ_S
+		     ])
+
+(define_int_iterator MVE_VMLxDAVQ_P [
+		     VMLADAVQ_P_S VMLADAVQ_P_U
+		     VMLADAVXQ_P_S
+		     VMLSDAVQ_P_S
+		     VMLSDAVXQ_P_S
+		     ])
+
+(define_int_iterator MVE_VMLxDAVAQ [
+		     VMLADAVAQ_S VMLADAVAQ_U
+		     VMLSDAVAXQ_S
+		     VMLSDAVAQ_S
+		     VMLADAVAXQ_S
+		     ])
+
+(define_int_iterator MVE_VMLxDAVAQ_P [
+		     VMLADAVAQ_P_S VMLADAVAQ_P_U
+		     VMLSDAVAXQ_P_S
+		     VMLSDAVAQ_P_S
+		     VMLADAVAXQ_P_S
+		     ])
+
 (define_int_iterator MVE_MOVN [
 		     VMOVNBQ_S VMOVNBQ_U
 		     VMOVNTQ_S VMOVNTQ_U
@@ -817,8 +845,24 @@ (define_int_attr mve_insn [
 		 (VMINQ_M_S "vmin") (VMINQ_M_U "vmin")
 		 (VMINVQ_P_S "vminv") (VMINVQ_P_U "vminv")
 		 (VMINVQ_S "vminv") (VMINVQ_U "vminv")
+		 (VMLADAVAQ_P_S "vmladava") (VMLADAVAQ_P_U "vmladava")
+		 (VMLADAVAQ_S "vmladava") (VMLADAVAQ_U "vmladava")
+		 (VMLADAVAXQ_P_S "vmladavax")
+		 (VMLADAVAXQ_S "vmladavax")
+		 (VMLADAVQ_P_S "vmladav") (VMLADAVQ_P_U "vmladav")
+		 (VMLADAVQ_S "vmladav") (VMLADAVQ_U "vmladav")
+		 (VMLADAVXQ_P_S "vmladavx")
+		 (VMLADAVXQ_S "vmladavx")
 		 (VMLAQ_M_N_S "vmla") (VMLAQ_M_N_U "vmla")
 		 (VMLASQ_M_N_S "vmlas") (VMLASQ_M_N_U "vmlas")
+		 (VMLSDAVAQ_P_S "vmlsdava")
+		 (VMLSDAVAQ_S "vmlsdava")
+		 (VMLSDAVAXQ_P_S "vmlsdavax")
+		 (VMLSDAVAXQ_S "vmlsdavax")
+		 (VMLSDAVQ_P_S "vmlsdav")
+		 (VMLSDAVQ_S "vmlsdav")
+		 (VMLSDAVXQ_P_S "vmlsdavx")
+		 (VMLSDAVXQ_S "vmlsdavx")
 		 (VMOVLBQ_M_S "vmovlb") (VMOVLBQ_M_U "vmovlb")
 		 (VMOVLBQ_S "vmovlb") (VMOVLBQ_U "vmovlb")
 		 (VMOVLTQ_M_S "vmovlt") (VMOVLTQ_M_U "vmovlt")
@@ -2237,6 +2281,18 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VCMPLTQ_M_S "s")
 		       (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u")
 		       (VCMPNEQ_M_S "s") (VCMPNEQ_M_U "u")
+		       (VMLADAVAXQ_P_S "s")
+		       (VMLADAVAXQ_S "s")
+		       (VMLADAVXQ_P_S "s")
+		       (VMLADAVXQ_S "s")
+		       (VMLSDAVAQ_P_S "s")
+		       (VMLSDAVAQ_S "s")
+		       (VMLSDAVAXQ_P_S "s")
+		       (VMLSDAVAXQ_S "s")
+		       (VMLSDAVQ_P_S "s")
+		       (VMLSDAVQ_S "s")
+		       (VMLSDAVXQ_P_S "s")
+		       (VMLSDAVXQ_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b548eced4f5..f95525db583 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -985,62 +985,20 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
 ])
 
 ;;
-;; [vmladavq_u, vmladavq_s])
+;; [vmladavq_u, vmladavq_s]
+;; [vmladavxq_s]
+;; [vmlsdavq_s]
+;; [vmlsdavxq_s]
 ;;
-(define_insn "mve_vmladavq_<supf><mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMLADAVQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmladav.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmladavxq_s])
-;;
-(define_insn "mve_vmladavxq_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMLADAVXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmladavx.s%#<V_sz_elem>\t%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsdavq_s])
-;;
-(define_insn "mve_vmlsdavq_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMLSDAVQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsdav.s%#<V_sz_elem>\t%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsdavxq_s])
-;;
-(define_insn "mve_vmlsdavxq_s<mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
 		    (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMLSDAVXQ_S))
+	 MVE_VMLxDAVQ))
   ]
   "TARGET_HAVE_MVE"
-  "vmlsdavx.s%#<V_sz_elem>\t%0, %q1, %q2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2043,50 +2001,40 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vmladavaq_u, vmladavaq_s])
+;; [vmladavaq_u, vmladavaq_s]
+;; [vmladavaxq_s]
+;; [vmlsdavaq_s]
+;; [vmlsdavaxq_s]
 ;;
-(define_insn "mve_vmladavaq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VMLADAVAQ))
+	 MVE_VMLxDAVAQ))
   ]
   "TARGET_HAVE_MVE"
-  "vmladava.<supf>%#<V_sz_elem>	%0, %q2, %q3"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
 ;;
-;; [vmladavq_p_u, vmladavq_p_s])
-;;
-(define_insn "mve_vmladavq_p_<supf><mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLADAVQ_P))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmladavt.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmladavxq_p_s])
+;; [vmladavq_p_u, vmladavq_p_s]
+;; [vmladavxq_p_s]
+;; [vmlsdavq_p_s]
+;; [vmlsdavxq_p_s]
 ;;
-(define_insn "mve_vmladavxq_p_s<mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLADAVXQ_P_S))
+	 MVE_VMLxDAVQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmladavxt.s%#<V_sz_elem>\t%0, %q1, %q2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2122,38 +2070,6 @@ (define_insn "mve_vmlasq_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vmlsdavq_p_s])
-;;
-(define_insn "mve_vmlsdavq_p_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLSDAVQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsdavt.s%#<V_sz_elem>	%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlsdavxq_p_s])
-;;
-(define_insn "mve_vmlsdavxq_p_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLSDAVXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsdavxt.s%#<V_sz_elem>	%0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vmvnq_m_s, vmvnq_m_u])
 ;;
@@ -2460,54 +2376,6 @@ (define_insn "mve_vqdmladhq_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vmlsdavaxq_s])
-;;
-(define_insn "mve_vmlsdavaxq_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")
-		    (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VMLSDAVAXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsdavax.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsdavaq_s])
-;;
-(define_insn "mve_vmlsdavaq_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")
-		    (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VMLSDAVAQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsdava.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmladavaxq_s])
-;;
-(define_insn "mve_vmladavaxq_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		    (match_operand:MVE_2 2 "s_register_operand" "w")
-		    (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VMLADAVAXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmladavax.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vabsq_m_f]
 ;; [vnegq_m_f]
@@ -3483,19 +3351,22 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
 
 ;;
 ;;
-;; [vmladavaq_p_u, vmladavaq_p_s])
+;; [vmladavaq_p_u, vmladavaq_p_s]
+;; [vmladavaxq_p_s]
+;; [vmlsdavaq_p_s]
+;; [vmlsdavaxq_p_s]
 ;;
-(define_insn "mve_vmladavaq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
 		    (match_operand:MVE_2 2 "s_register_operand" "w")
 		    (match_operand:MVE_2 3 "s_register_operand" "w")
 		    (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLADAVAQ_P))
+	 MVE_VMLxDAVAQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmladavat.<supf>%#<V_sz_elem>	%0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3637,57 +3508,6 @@ (define_insn "mve_vhcaddq_rot90_m_s<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vmladavaxq_p_s])
-;;
-(define_insn "mve_vmladavaxq_p_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLADAVAXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmladavaxt.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlsdavaq_p_s])
-;;
-(define_insn "mve_vmlsdavaq_p_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLSDAVAQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsdavat.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlsdavaxq_p_s])
-;;
-(define_insn "mve_vmlsdavaxq_p_s<mode>"
-  [
-   (set (match_operand:SI 0 "s_register_operand" "=Te")
-	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLSDAVAXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsdavaxt.s%#<V_sz_elem>\t%0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vmlaldavaq_p_u, vmlaldavaq_p_s])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 06/24] arm: [MVE intrinsics] rework vmladavq vmladavxq vmlsdavq vmlsdavxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (3 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 05/24] arm: [MVE intrinsics] factorize vmladav vmladavx vmlsdav vmlsdavx vmladava vmladavax vmlsdava vmlsdavax Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 07/24] arm: [MVE intrinsics] add binary_acca_int32 shape Christophe Lyon
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmladavq, vmladavxq, vmlsdavq, vmlsdavxq using the new MVE
builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmladavq, vmladavxq)
	(vmlsdavq, vmlsdavxq): New.
	* config/arm/arm-mve-builtins-base.def (vmladavq, vmladavxq)
	(vmlsdavq, vmlsdavxq): New.
	* config/arm/arm-mve-builtins-base.h (vmladavq, vmladavxq)
	(vmlsdavq, vmlsdavxq): New.
	* config/arm/arm_mve.h (vmladavq): Remove.
	(vmlsdavxq): Remove.
	(vmlsdavq): Remove.
	(vmladavxq): Remove.
	(vmladavq_p): Remove.
	(vmlsdavxq_p): Remove.
	(vmlsdavq_p): Remove.
	(vmladavxq_p): Remove.
	(vmladavq_u8): Remove.
	(vmlsdavxq_s8): Remove.
	(vmlsdavq_s8): Remove.
	(vmladavxq_s8): Remove.
	(vmladavq_s8): Remove.
	(vmladavq_u16): Remove.
	(vmlsdavxq_s16): Remove.
	(vmlsdavq_s16): Remove.
	(vmladavxq_s16): Remove.
	(vmladavq_s16): Remove.
	(vmladavq_u32): Remove.
	(vmlsdavxq_s32): Remove.
	(vmlsdavq_s32): Remove.
	(vmladavxq_s32): Remove.
	(vmladavq_s32): Remove.
	(vmladavq_p_u8): Remove.
	(vmlsdavxq_p_s8): Remove.
	(vmlsdavq_p_s8): Remove.
	(vmladavxq_p_s8): Remove.
	(vmladavq_p_s8): Remove.
	(vmladavq_p_u16): Remove.
	(vmlsdavxq_p_s16): Remove.
	(vmlsdavq_p_s16): Remove.
	(vmladavxq_p_s16): Remove.
	(vmladavq_p_s16): Remove.
	(vmladavq_p_u32): Remove.
	(vmlsdavxq_p_s32): Remove.
	(vmlsdavq_p_s32): Remove.
	(vmladavxq_p_s32): Remove.
	(vmladavq_p_s32): Remove.
	(__arm_vmladavq_u8): Remove.
	(__arm_vmlsdavxq_s8): Remove.
	(__arm_vmlsdavq_s8): Remove.
	(__arm_vmladavxq_s8): Remove.
	(__arm_vmladavq_s8): Remove.
	(__arm_vmladavq_u16): Remove.
	(__arm_vmlsdavxq_s16): Remove.
	(__arm_vmlsdavq_s16): Remove.
	(__arm_vmladavxq_s16): Remove.
	(__arm_vmladavq_s16): Remove.
	(__arm_vmladavq_u32): Remove.
	(__arm_vmlsdavxq_s32): Remove.
	(__arm_vmlsdavq_s32): Remove.
	(__arm_vmladavxq_s32): Remove.
	(__arm_vmladavq_s32): Remove.
	(__arm_vmladavq_p_u8): Remove.
	(__arm_vmlsdavxq_p_s8): Remove.
	(__arm_vmlsdavq_p_s8): Remove.
	(__arm_vmladavxq_p_s8): Remove.
	(__arm_vmladavq_p_s8): Remove.
	(__arm_vmladavq_p_u16): Remove.
	(__arm_vmlsdavxq_p_s16): Remove.
	(__arm_vmlsdavq_p_s16): Remove.
	(__arm_vmladavxq_p_s16): Remove.
	(__arm_vmladavq_p_s16): Remove.
	(__arm_vmladavq_p_u32): Remove.
	(__arm_vmlsdavxq_p_s32): Remove.
	(__arm_vmlsdavq_p_s32): Remove.
	(__arm_vmladavxq_p_s32): Remove.
	(__arm_vmladavq_p_s32): Remove.
	(__arm_vmladavq): Remove.
	(__arm_vmlsdavxq): Remove.
	(__arm_vmlsdavq): Remove.
	(__arm_vmladavxq): Remove.
	(__arm_vmladavq_p): Remove.
	(__arm_vmlsdavxq_p): Remove.
	(__arm_vmlsdavq_p): Remove.
	(__arm_vmladavxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   4 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm_mve.h                 | 523 -----------------------
 4 files changed, 12 insertions(+), 523 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 070a41c2d89..69af6f9139e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -280,6 +280,10 @@ FUNCTION (vminnmq, unspec_based_mve_function_exact_insn, (UNKNOWN, UNKNOWN, SMIN
 FUNCTION_PRED_P_F (vminnmvq, VMINNMVQ)
 FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ)
 FUNCTION_PRED_P_S_U (vminvq, VMINVQ)
+FUNCTION_PRED_P_S_U (vmladavq, VMLADAVQ)
+FUNCTION_PRED_P_S (vmladavxq, VMLADAVXQ)
+FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ)
+FUNCTION_PRED_P_S (vmlsdavxq, VMLSDAVXQ)
 FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ)
 FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 62d2050b86d..40d462fc7d2 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -49,6 +49,10 @@ DEF_MVE_FUNCTION (vminaq, binary_maxamina, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vminavq, binary_maxavminav, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vminq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vminvq, binary_maxvminv, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmladavq, binary_acc_int32, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmladavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmlsdavxq, binary_acc_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovltq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 59754a03977..e3c69a81a45 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -62,6 +62,10 @@ extern const function_base *const vminnmq;
 extern const function_base *const vminnmvq;
 extern const function_base *const vminq;
 extern const function_base *const vminvq;
+extern const function_base *const vmladavq;
+extern const function_base *const vmladavxq;
+extern const function_base *const vmlsdavq;
+extern const function_base *const vmlsdavxq;
 extern const function_base *const vmovlbq;
 extern const function_base *const vmovltq;
 extern const function_base *const vmovnbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 8b61593c6b0..a739db4e4cc 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -46,15 +46,11 @@
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
-#define vmladavq(__a, __b) __arm_vmladavq(__a, __b)
 #define vcaddq_rot90(__a, __b) __arm_vcaddq_rot90(__a, __b)
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
 #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
-#define vmlsdavxq(__a, __b) __arm_vmlsdavxq(__a, __b)
-#define vmlsdavq(__a, __b) __arm_vmlsdavq(__a, __b)
-#define vmladavxq(__a, __b) __arm_vmladavxq(__a, __b)
 #define vhcaddq_rot90(__a, __b) __arm_vhcaddq_rot90(__a, __b)
 #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
@@ -81,13 +77,9 @@
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
 #define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
-#define vmladavq_p(__a, __b, __p) __arm_vmladavq_p(__a, __b, __p)
 #define vmladavaq(__a, __b, __c) __arm_vmladavaq(__a, __b, __c)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
-#define vmlsdavxq_p(__a, __b, __p) __arm_vmlsdavxq_p(__a, __b, __p)
-#define vmlsdavq_p(__a, __b, __p) __arm_vmlsdavq_p(__a, __b, __p)
-#define vmladavxq_p(__a, __b, __p) __arm_vmladavxq_p(__a, __b, __p)
 #define vqrdmlsdhxq(__inactive, __a, __b) __arm_vqrdmlsdhxq(__inactive, __a, __b)
 #define vqrdmlsdhq(__inactive, __a, __b) __arm_vqrdmlsdhq(__inactive, __a, __b)
 #define vqrdmladhxq(__inactive, __a, __b) __arm_vqrdmladhxq(__inactive, __a, __b)
@@ -362,7 +354,6 @@
 #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
 #define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
 #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
-#define vmladavq_u8(__a, __b) __arm_vmladavq_u8(__a, __b)
 #define vcaddq_rot90_u8(__a, __b) __arm_vcaddq_rot90_u8(__a, __b)
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
@@ -371,10 +362,6 @@
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
-#define vmlsdavxq_s8(__a, __b) __arm_vmlsdavxq_s8(__a, __b)
-#define vmlsdavq_s8(__a, __b) __arm_vmlsdavq_s8(__a, __b)
-#define vmladavxq_s8(__a, __b) __arm_vmladavxq_s8(__a, __b)
-#define vmladavq_s8(__a, __b) __arm_vmladavq_s8(__a, __b)
 #define vhcaddq_rot90_s8(__a, __b) __arm_vhcaddq_rot90_s8(__a, __b)
 #define vhcaddq_rot270_s8(__a, __b) __arm_vhcaddq_rot270_s8(__a, __b)
 #define vcaddq_rot90_s8(__a, __b) __arm_vcaddq_rot90_s8(__a, __b)
@@ -384,7 +371,6 @@
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
-#define vmladavq_u16(__a, __b) __arm_vmladavq_u16(__a, __b)
 #define vcaddq_rot90_u16(__a, __b) __arm_vcaddq_rot90_u16(__a, __b)
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
@@ -393,10 +379,6 @@
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
-#define vmlsdavxq_s16(__a, __b) __arm_vmlsdavxq_s16(__a, __b)
-#define vmlsdavq_s16(__a, __b) __arm_vmlsdavq_s16(__a, __b)
-#define vmladavxq_s16(__a, __b) __arm_vmladavxq_s16(__a, __b)
-#define vmladavq_s16(__a, __b) __arm_vmladavq_s16(__a, __b)
 #define vhcaddq_rot90_s16(__a, __b) __arm_vhcaddq_rot90_s16(__a, __b)
 #define vhcaddq_rot270_s16(__a, __b) __arm_vhcaddq_rot270_s16(__a, __b)
 #define vcaddq_rot90_s16(__a, __b) __arm_vcaddq_rot90_s16(__a, __b)
@@ -406,7 +388,6 @@
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
-#define vmladavq_u32(__a, __b) __arm_vmladavq_u32(__a, __b)
 #define vcaddq_rot90_u32(__a, __b) __arm_vcaddq_rot90_u32(__a, __b)
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
@@ -415,10 +396,6 @@
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
-#define vmlsdavxq_s32(__a, __b) __arm_vmlsdavxq_s32(__a, __b)
-#define vmlsdavq_s32(__a, __b) __arm_vmlsdavq_s32(__a, __b)
-#define vmladavxq_s32(__a, __b) __arm_vmladavxq_s32(__a, __b)
-#define vmladavq_s32(__a, __b) __arm_vmladavq_s32(__a, __b)
 #define vhcaddq_rot90_s32(__a, __b) __arm_vhcaddq_rot90_s32(__a, __b)
 #define vhcaddq_rot270_s32(__a, __b) __arm_vhcaddq_rot270_s32(__a, __b)
 #define vcaddq_rot90_s32(__a, __b) __arm_vcaddq_rot90_s32(__a, __b)
@@ -509,15 +486,10 @@
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
-#define vmladavq_p_u8(__a, __b, __p) __arm_vmladavq_p_u8(__a, __b, __p)
 #define vmladavaq_u8(__a, __b, __c) __arm_vmladavaq_u8(__a, __b, __c)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
-#define vmlsdavxq_p_s8(__a, __b, __p) __arm_vmlsdavxq_p_s8(__a, __b, __p)
-#define vmlsdavq_p_s8(__a, __b, __p) __arm_vmlsdavq_p_s8(__a, __b, __p)
-#define vmladavxq_p_s8(__a, __b, __p) __arm_vmladavxq_p_s8(__a, __b, __p)
-#define vmladavq_p_s8(__a, __b, __p) __arm_vmladavq_p_s8(__a, __b, __p)
 #define vqrdmlsdhxq_s8(__inactive, __a, __b) __arm_vqrdmlsdhxq_s8(__inactive, __a, __b)
 #define vqrdmlsdhq_s8(__inactive, __a, __b) __arm_vqrdmlsdhq_s8(__inactive, __a, __b)
 #define vqrdmlashq_n_s8(__a, __b, __c) __arm_vqrdmlashq_n_s8(__a, __b, __c)
@@ -543,15 +515,10 @@
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
-#define vmladavq_p_u16(__a, __b, __p) __arm_vmladavq_p_u16(__a, __b, __p)
 #define vmladavaq_u16(__a, __b, __c) __arm_vmladavaq_u16(__a, __b, __c)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
-#define vmlsdavxq_p_s16(__a, __b, __p) __arm_vmlsdavxq_p_s16(__a, __b, __p)
-#define vmlsdavq_p_s16(__a, __b, __p) __arm_vmlsdavq_p_s16(__a, __b, __p)
-#define vmladavxq_p_s16(__a, __b, __p) __arm_vmladavxq_p_s16(__a, __b, __p)
-#define vmladavq_p_s16(__a, __b, __p) __arm_vmladavq_p_s16(__a, __b, __p)
 #define vqrdmlsdhxq_s16(__inactive, __a, __b) __arm_vqrdmlsdhxq_s16(__inactive, __a, __b)
 #define vqrdmlsdhq_s16(__inactive, __a, __b) __arm_vqrdmlsdhq_s16(__inactive, __a, __b)
 #define vqrdmlashq_n_s16(__a, __b, __c) __arm_vqrdmlashq_n_s16(__a, __b, __c)
@@ -577,15 +544,10 @@
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
-#define vmladavq_p_u32(__a, __b, __p) __arm_vmladavq_p_u32(__a, __b, __p)
 #define vmladavaq_u32(__a, __b, __c) __arm_vmladavaq_u32(__a, __b, __c)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
-#define vmlsdavxq_p_s32(__a, __b, __p) __arm_vmlsdavxq_p_s32(__a, __b, __p)
-#define vmlsdavq_p_s32(__a, __b, __p) __arm_vmlsdavq_p_s32(__a, __b, __p)
-#define vmladavxq_p_s32(__a, __b, __p) __arm_vmladavxq_p_s32(__a, __b, __p)
-#define vmladavq_p_s32(__a, __b, __p) __arm_vmladavq_p_s32(__a, __b, __p)
 #define vqrdmlsdhxq_s32(__inactive, __a, __b) __arm_vqrdmlsdhxq_s32(__inactive, __a, __b)
 #define vqrdmlsdhq_s32(__inactive, __a, __b) __arm_vqrdmlsdhq_s32(__inactive, __a, __b)
 #define vqrdmlashq_n_s32(__a, __b, __c) __arm_vqrdmlashq_n_s32(__a, __b, __c)
@@ -1583,13 +1545,6 @@ __arm_vmullbq_int_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vmullbq_int_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmladavq_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -1648,34 +1603,6 @@ __arm_vmullbq_int_s8 (int8x16_t __a, int8x16_t __b)
   return __builtin_mve_vmullbq_int_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmlsdavxq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmlsdavq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmladavxq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmladavq_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b)
@@ -1739,13 +1666,6 @@ __arm_vmullbq_int_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vmullbq_int_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmladavq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -1804,34 +1724,6 @@ __arm_vmullbq_int_s16 (int16x8_t __a, int16x8_t __b)
   return __builtin_mve_vmullbq_int_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlsdavxq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlsdavq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmladavxq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmladavq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b)
@@ -1895,13 +1787,6 @@ __arm_vmullbq_int_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vmullbq_int_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vmladavq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -1960,34 +1845,6 @@ __arm_vmullbq_int_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vmullbq_int_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlsdavxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlsdavq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmladavxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmladavq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b)
@@ -2448,13 +2305,6 @@ __arm_vmlaq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
   return __builtin_mve_vmlaq_n_uv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
@@ -2483,34 +2333,6 @@ __arm_vmvnq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavxq_p_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavq_p_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavxq_p_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
@@ -2686,13 +2508,6 @@ __arm_vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
   return __builtin_mve_vmlaq_n_uv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -2721,34 +2536,6 @@ __arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavxq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavxq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
@@ -2924,13 +2711,6 @@ __arm_vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
   return __builtin_mve_vmlaq_n_uv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
@@ -2959,34 +2739,6 @@ __arm_vmvnq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavxq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavxq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
@@ -9383,13 +9135,6 @@ __arm_vmullbq_int (uint8x16_t __a, uint8x16_t __b)
  return __arm_vmullbq_int_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmladavq_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90 (uint8x16_t __a, uint8x16_t __b)
@@ -9446,34 +9191,6 @@ __arm_vmullbq_int (int8x16_t __a, int8x16_t __b)
  return __arm_vmullbq_int_s8 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmlsdavxq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmlsdavq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmladavxq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmladavq_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int8x16_t __a, int8x16_t __b)
@@ -9537,13 +9254,6 @@ __arm_vmullbq_int (uint16x8_t __a, uint16x8_t __b)
  return __arm_vmullbq_int_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmladavq_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90 (uint16x8_t __a, uint16x8_t __b)
@@ -9600,34 +9310,6 @@ __arm_vmullbq_int (int16x8_t __a, int16x8_t __b)
  return __arm_vmullbq_int_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlsdavxq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlsdavq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmladavxq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmladavq_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int16x8_t __a, int16x8_t __b)
@@ -9691,13 +9373,6 @@ __arm_vmullbq_int (uint32x4_t __a, uint32x4_t __b)
  return __arm_vmullbq_int_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmladavq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90 (uint32x4_t __a, uint32x4_t __b)
@@ -9754,34 +9429,6 @@ __arm_vmullbq_int (int32x4_t __a, int32x4_t __b)
  return __arm_vmullbq_int_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlsdavxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlsdavq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmladavxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmladavq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vhcaddq_rot90 (int32x4_t __a, int32x4_t __b)
@@ -10202,13 +9849,6 @@ __arm_vmlaq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
  return __arm_vmlaq_n_u8 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
@@ -10237,34 +9877,6 @@ __arm_vmvnq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavxq_p_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavq_p_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavxq_p_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
@@ -10440,13 +10052,6 @@ __arm_vmlaq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
  return __arm_vmlaq_n_u16 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -10475,34 +10080,6 @@ __arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavxq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavxq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
@@ -10678,13 +10255,6 @@ __arm_vmlaq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
  return __arm_vmlaq_n_u32 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmladavaq (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
@@ -10713,34 +10283,6 @@ __arm_vmvnq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavxq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsdavq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavxq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmladavq_p_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
@@ -18460,20 +18002,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vmlsdavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
-#define __arm_vmlsdavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -18669,43 +18197,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaxq_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaxq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
 
-#define __arm_vmladavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vmladavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavq_p_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavq_p_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vmladavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavxq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavxq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavxq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vmladavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavxq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vmlaldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -18744,20 +18235,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vmlsdavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vmlsdavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 07/24] arm: [MVE intrinsics] add binary_acca_int32 shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (4 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 06/24] arm: [MVE intrinsics] rework vmladavq vmladavxq vmlsdavq vmlsdavxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 08/24] arm: [MVE intrinsics] rework vmladavaq vmladavaxq vmlsdavaq vmlsdavaxq Christophe Lyon
                   ` (17 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_acca_int32 shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc	(binary_acca_int32): New.
	* config/arm/arm-mve-builtins-shapes.h	(binary_acca_int32): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 37 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 38 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index e491c810b40..ceb13230da6 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -392,6 +392,43 @@ struct binary_acc_int32_def : public overloaded_base<0>
 };
 SHAPE (binary_acc_int32)
 
+/* <[u]int32>_t vfoo[_<t0>]([u]int32_t, <T0>_t, <T0>_t)
+
+   Example: vmladavaq.
+   int32_t [__arm_]vmladavaq[_s16](int32_t add, int16x8_t m1, int16x8_t m2)
+   int32_t [__arm_]vmladavaq_p[_s16](int32_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p)  */
+struct binary_acca_int32_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx32,sx32,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    unsigned int last_arg = i;
+    for (i = 1; i < last_arg; i++)
+      if (!r.require_matching_vector_type (i, type))
+	return error_mark_node;
+
+    if (!r.require_integer_immediate (0))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_acca_int32)
+
 /* <T0>_t vfoo[_n_t0](<T0>_t, const int)
 
    Shape for vector shift right operations that take a vector first
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 9e877c9591a..7f68d41efe6 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -38,6 +38,7 @@ namespace arm_mve
     extern const function_shape *const binary_lshift;
     extern const function_shape *const binary_lshift_r;
     extern const function_shape *const binary_acc_int32;
+    extern const function_shape *const binary_acca_int32;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
     extern const function_shape *const binary_maxvminv;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 08/24] arm: [MVE intrinsics] rework vmladavaq vmladavaxq vmlsdavaq vmlsdavaxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (5 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 07/24] arm: [MVE intrinsics] add binary_acca_int32 shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 09/24] arm: [MVE intrinsics] factorize vabavq Christophe Lyon
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmladavaq, vmladavaxq, vmlsdavaq, vmlsdavaxq using the new
MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmladavaxq, vmladavaq)
	(vmlsdavaq, vmlsdavaxq): New.
	* config/arm/arm-mve-builtins-base.def (vmladavaxq, vmladavaq)
	(vmlsdavaq, vmlsdavaxq): New.
	* config/arm/arm-mve-builtins-base.h (vmladavaxq, vmladavaq)
	(vmlsdavaq, vmlsdavaxq): New.
	* config/arm/arm_mve.h (vmladavaq): Remove.
	(vmlsdavaxq): Remove.
	(vmlsdavaq): Remove.
	(vmladavaxq): Remove.
	(vmladavaq_p): Remove.
	(vmladavaxq_p): Remove.
	(vmlsdavaq_p): Remove.
	(vmlsdavaxq_p): Remove.
	(vmladavaq_u8): Remove.
	(vmlsdavaxq_s8): Remove.
	(vmlsdavaq_s8): Remove.
	(vmladavaxq_s8): Remove.
	(vmladavaq_s8): Remove.
	(vmladavaq_u16): Remove.
	(vmlsdavaxq_s16): Remove.
	(vmlsdavaq_s16): Remove.
	(vmladavaxq_s16): Remove.
	(vmladavaq_s16): Remove.
	(vmladavaq_u32): Remove.
	(vmlsdavaxq_s32): Remove.
	(vmlsdavaq_s32): Remove.
	(vmladavaxq_s32): Remove.
	(vmladavaq_s32): Remove.
	(vmladavaq_p_s8): Remove.
	(vmladavaq_p_s32): Remove.
	(vmladavaq_p_s16): Remove.
	(vmladavaq_p_u8): Remove.
	(vmladavaq_p_u32): Remove.
	(vmladavaq_p_u16): Remove.
	(vmladavaxq_p_s8): Remove.
	(vmladavaxq_p_s32): Remove.
	(vmladavaxq_p_s16): Remove.
	(vmlsdavaq_p_s8): Remove.
	(vmlsdavaq_p_s32): Remove.
	(vmlsdavaq_p_s16): Remove.
	(vmlsdavaxq_p_s8): Remove.
	(vmlsdavaxq_p_s32): Remove.
	(vmlsdavaxq_p_s16): Remove.
	(__arm_vmladavaq_u8): Remove.
	(__arm_vmlsdavaxq_s8): Remove.
	(__arm_vmlsdavaq_s8): Remove.
	(__arm_vmladavaxq_s8): Remove.
	(__arm_vmladavaq_s8): Remove.
	(__arm_vmladavaq_u16): Remove.
	(__arm_vmlsdavaxq_s16): Remove.
	(__arm_vmlsdavaq_s16): Remove.
	(__arm_vmladavaxq_s16): Remove.
	(__arm_vmladavaq_s16): Remove.
	(__arm_vmladavaq_u32): Remove.
	(__arm_vmlsdavaxq_s32): Remove.
	(__arm_vmlsdavaq_s32): Remove.
	(__arm_vmladavaxq_s32): Remove.
	(__arm_vmladavaq_s32): Remove.
	(__arm_vmladavaq_p_s8): Remove.
	(__arm_vmladavaq_p_s32): Remove.
	(__arm_vmladavaq_p_s16): Remove.
	(__arm_vmladavaq_p_u8): Remove.
	(__arm_vmladavaq_p_u32): Remove.
	(__arm_vmladavaq_p_u16): Remove.
	(__arm_vmladavaxq_p_s8): Remove.
	(__arm_vmladavaxq_p_s32): Remove.
	(__arm_vmladavaxq_p_s16): Remove.
	(__arm_vmlsdavaq_p_s8): Remove.
	(__arm_vmlsdavaq_p_s32): Remove.
	(__arm_vmlsdavaq_p_s16): Remove.
	(__arm_vmlsdavaxq_p_s8): Remove.
	(__arm_vmlsdavaxq_p_s32): Remove.
	(__arm_vmlsdavaxq_p_s16): Remove.
	(__arm_vmladavaq): Remove.
	(__arm_vmlsdavaxq): Remove.
	(__arm_vmlsdavaq): Remove.
	(__arm_vmladavaxq): Remove.
	(__arm_vmladavaq_p): Remove.
	(__arm_vmladavaxq_p): Remove.
	(__arm_vmlsdavaq_p): Remove.
	(__arm_vmlsdavaxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   4 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm_mve.h                 | 538 -----------------------
 4 files changed, 12 insertions(+), 538 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 69af6f9139e..8a5ab990337 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -280,8 +280,12 @@ FUNCTION (vminnmq, unspec_based_mve_function_exact_insn, (UNKNOWN, UNKNOWN, SMIN
 FUNCTION_PRED_P_F (vminnmvq, VMINNMVQ)
 FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ)
 FUNCTION_PRED_P_S_U (vminvq, VMINVQ)
+FUNCTION_PRED_P_S (vmladavaxq, VMLADAVAXQ)
+FUNCTION_PRED_P_S_U (vmladavaq, VMLADAVAQ)
 FUNCTION_PRED_P_S_U (vmladavq, VMLADAVQ)
 FUNCTION_PRED_P_S (vmladavxq, VMLADAVXQ)
+FUNCTION_PRED_P_S (vmlsdavaq, VMLSDAVAQ)
+FUNCTION_PRED_P_S (vmlsdavaxq, VMLSDAVAXQ)
 FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ)
 FUNCTION_PRED_P_S (vmlsdavxq, VMLSDAVXQ)
 FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 40d462fc7d2..cf0ed4b58df 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -49,8 +49,12 @@ DEF_MVE_FUNCTION (vminaq, binary_maxamina, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vminavq, binary_maxavminav, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vminq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vminvq, binary_maxvminv, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmladavaq, binary_acca_int32, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmladavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmladavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmladavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlsdavaq, binary_acca_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlsdavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavxq, binary_acc_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index e3c69a81a45..4f09bebf1cb 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -62,8 +62,12 @@ extern const function_base *const vminnmq;
 extern const function_base *const vminnmvq;
 extern const function_base *const vminq;
 extern const function_base *const vminvq;
+extern const function_base *const vmladavaq;
+extern const function_base *const vmladavaxq;
 extern const function_base *const vmladavq;
 extern const function_base *const vmladavxq;
+extern const function_base *const vmlsdavaq;
+extern const function_base *const vmlsdavaxq;
 extern const function_base *const vmlsdavq;
 extern const function_base *const vmlsdavxq;
 extern const function_base *const vmovlbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index a739db4e4cc..86fa7fcf789 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -77,7 +77,6 @@
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
 #define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
-#define vmladavaq(__a, __b, __c) __arm_vmladavaq(__a, __b, __c)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vqrdmlsdhxq(__inactive, __a, __b) __arm_vqrdmlsdhxq(__inactive, __a, __b)
@@ -88,9 +87,6 @@
 #define vqdmlsdhq(__inactive, __a, __b) __arm_vqdmlsdhq(__inactive, __a, __b)
 #define vqdmladhxq(__inactive, __a, __b) __arm_vqdmladhxq(__inactive, __a, __b)
 #define vqdmladhq(__inactive, __a, __b) __arm_vqdmladhq(__inactive, __a, __b)
-#define vmlsdavaxq(__a, __b, __c) __arm_vmlsdavaxq(__a, __b, __c)
-#define vmlsdavaq(__a, __b, __c) __arm_vmlsdavaq(__a, __b, __c)
-#define vmladavaxq(__a, __b, __c) __arm_vmladavaxq(__a, __b, __c)
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
@@ -115,12 +111,8 @@
 #define vcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot270_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m(__inactive, __a, __b, __p)
-#define vmladavaq_p(__a, __b, __c, __p) __arm_vmladavaq_p(__a, __b, __c, __p)
-#define vmladavaxq_p(__a, __b, __c, __p) __arm_vmladavaxq_p(__a, __b, __c, __p)
 #define vmlaq_m(__a, __b, __c, __p) __arm_vmlaq_m(__a, __b, __c, __p)
 #define vmlasq_m(__a, __b, __c, __p) __arm_vmlasq_m(__a, __b, __c, __p)
-#define vmlsdavaq_p(__a, __b, __c, __p) __arm_vmlsdavaq_p(__a, __b, __c, __p)
-#define vmlsdavaxq_p(__a, __b, __c, __p) __arm_vmlsdavaxq_p(__a, __b, __c, __p)
 #define vmullbq_int_m(__inactive, __a, __b, __p) __arm_vmullbq_int_m(__inactive, __a, __b, __p)
 #define vmulltq_int_m(__inactive, __a, __b, __p) __arm_vmulltq_int_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
@@ -486,7 +478,6 @@
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
-#define vmladavaq_u8(__a, __b, __c) __arm_vmladavaq_u8(__a, __b, __c)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
@@ -502,12 +493,8 @@
 #define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
 #define vqdmladhxq_s8(__inactive, __a, __b) __arm_vqdmladhxq_s8(__inactive, __a, __b)
 #define vqdmladhq_s8(__inactive, __a, __b) __arm_vqdmladhq_s8(__inactive, __a, __b)
-#define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c)
-#define vmlsdavaq_s8(__a, __b, __c) __arm_vmlsdavaq_s8(__a, __b, __c)
 #define vmlasq_n_s8(__a, __b, __c) __arm_vmlasq_n_s8(__a, __b, __c)
 #define vmlaq_n_s8(__a, __b, __c) __arm_vmlaq_n_s8(__a, __b, __c)
-#define vmladavaxq_s8(__a, __b, __c) __arm_vmladavaxq_s8(__a, __b, __c)
-#define vmladavaq_s8(__a, __b, __c) __arm_vmladavaq_s8(__a, __b, __c)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
 #define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
@@ -515,7 +502,6 @@
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
-#define vmladavaq_u16(__a, __b, __c) __arm_vmladavaq_u16(__a, __b, __c)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
@@ -531,12 +517,8 @@
 #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
 #define vqdmladhxq_s16(__inactive, __a, __b) __arm_vqdmladhxq_s16(__inactive, __a, __b)
 #define vqdmladhq_s16(__inactive, __a, __b) __arm_vqdmladhq_s16(__inactive, __a, __b)
-#define vmlsdavaxq_s16(__a, __b, __c) __arm_vmlsdavaxq_s16(__a, __b, __c)
-#define vmlsdavaq_s16(__a, __b, __c) __arm_vmlsdavaq_s16(__a, __b, __c)
 #define vmlasq_n_s16(__a, __b, __c) __arm_vmlasq_n_s16(__a, __b, __c)
 #define vmlaq_n_s16(__a, __b, __c) __arm_vmlaq_n_s16(__a, __b, __c)
-#define vmladavaxq_s16(__a, __b, __c) __arm_vmladavaxq_s16(__a, __b, __c)
-#define vmladavaq_s16(__a, __b, __c) __arm_vmladavaq_s16(__a, __b, __c)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
 #define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
@@ -544,7 +526,6 @@
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
-#define vmladavaq_u32(__a, __b, __c) __arm_vmladavaq_u32(__a, __b, __c)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
@@ -560,12 +541,8 @@
 #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
 #define vqdmladhxq_s32(__inactive, __a, __b) __arm_vqdmladhxq_s32(__inactive, __a, __b)
 #define vqdmladhq_s32(__inactive, __a, __b) __arm_vqdmladhq_s32(__inactive, __a, __b)
-#define vmlsdavaxq_s32(__a, __b, __c) __arm_vmlsdavaxq_s32(__a, __b, __c)
-#define vmlsdavaq_s32(__a, __b, __c) __arm_vmlsdavaq_s32(__a, __b, __c)
 #define vmlasq_n_s32(__a, __b, __c) __arm_vmlasq_n_s32(__a, __b, __c)
 #define vmlaq_n_s32(__a, __b, __c) __arm_vmlaq_n_s32(__a, __b, __c)
-#define vmladavaxq_s32(__a, __b, __c) __arm_vmladavaxq_s32(__a, __b, __c)
-#define vmladavaq_s32(__a, __b, __c) __arm_vmladavaq_s32(__a, __b, __c)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
 #define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
 #define vpselq_u64(__a, __b, __p) __arm_vpselq_u64(__a, __b, __p)
@@ -689,15 +666,6 @@
 #define vhcaddq_rot90_m_s8(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s8(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s32(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s32(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s16(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s16(__inactive, __a, __b, __p)
-#define vmladavaq_p_s8(__a, __b, __c, __p) __arm_vmladavaq_p_s8(__a, __b, __c, __p)
-#define vmladavaq_p_s32(__a, __b, __c, __p) __arm_vmladavaq_p_s32(__a, __b, __c, __p)
-#define vmladavaq_p_s16(__a, __b, __c, __p) __arm_vmladavaq_p_s16(__a, __b, __c, __p)
-#define vmladavaq_p_u8(__a, __b, __c, __p) __arm_vmladavaq_p_u8(__a, __b, __c, __p)
-#define vmladavaq_p_u32(__a, __b, __c, __p) __arm_vmladavaq_p_u32(__a, __b, __c, __p)
-#define vmladavaq_p_u16(__a, __b, __c, __p) __arm_vmladavaq_p_u16(__a, __b, __c, __p)
-#define vmladavaxq_p_s8(__a, __b, __c, __p) __arm_vmladavaxq_p_s8(__a, __b, __c, __p)
-#define vmladavaxq_p_s32(__a, __b, __c, __p) __arm_vmladavaxq_p_s32(__a, __b, __c, __p)
-#define vmladavaxq_p_s16(__a, __b, __c, __p) __arm_vmladavaxq_p_s16(__a, __b, __c, __p)
 #define vmlaq_m_n_s8(__a, __b, __c, __p) __arm_vmlaq_m_n_s8(__a, __b, __c, __p)
 #define vmlaq_m_n_s32(__a, __b, __c, __p) __arm_vmlaq_m_n_s32(__a, __b, __c, __p)
 #define vmlaq_m_n_s16(__a, __b, __c, __p) __arm_vmlaq_m_n_s16(__a, __b, __c, __p)
@@ -710,12 +678,6 @@
 #define vmlasq_m_n_u8(__a, __b, __c, __p) __arm_vmlasq_m_n_u8(__a, __b, __c, __p)
 #define vmlasq_m_n_u32(__a, __b, __c, __p) __arm_vmlasq_m_n_u32(__a, __b, __c, __p)
 #define vmlasq_m_n_u16(__a, __b, __c, __p) __arm_vmlasq_m_n_u16(__a, __b, __c, __p)
-#define vmlsdavaq_p_s8(__a, __b, __c, __p) __arm_vmlsdavaq_p_s8(__a, __b, __c, __p)
-#define vmlsdavaq_p_s32(__a, __b, __c, __p) __arm_vmlsdavaq_p_s32(__a, __b, __c, __p)
-#define vmlsdavaq_p_s16(__a, __b, __c, __p) __arm_vmlsdavaq_p_s16(__a, __b, __c, __p)
-#define vmlsdavaxq_p_s8(__a, __b, __c, __p) __arm_vmlsdavaxq_p_s8(__a, __b, __c, __p)
-#define vmlsdavaxq_p_s32(__a, __b, __c, __p) __arm_vmlsdavaxq_p_s32(__a, __b, __c, __p)
-#define vmlsdavaxq_p_s16(__a, __b, __c, __p) __arm_vmlsdavaxq_p_s16(__a, __b, __c, __p)
 #define vmullbq_int_m_s8(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s8(__inactive, __a, __b, __p)
 #define vmullbq_int_m_s32(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s32(__inactive, __a, __b, __p)
 #define vmullbq_int_m_s16(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s16(__inactive, __a, __b, __p)
@@ -2305,13 +2267,6 @@ __arm_vmlaq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
   return __builtin_mve_vmlaq_n_uv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
-{
-  return __builtin_mve_vmladavaq_uv16qi (__a, __b, __c);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -2417,20 +2372,6 @@ __arm_vqdmladhq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
   return __builtin_mve_vqdmladhq_sv16qi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_s8 (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_mve_vmlsdavaxq_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_s8 (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_mve_vmlsdavaq_sv16qi (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -2445,20 +2386,6 @@ __arm_vmlaq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
   return __builtin_mve_vmlaq_n_sv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_s8 (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_mve_vmladavaxq_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_s8 (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_mve_vmladavaq_sv16qi (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -2508,13 +2435,6 @@ __arm_vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
   return __builtin_mve_vmlaq_n_uv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
-{
-  return __builtin_mve_vmladavaq_uv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -2620,20 +2540,6 @@ __arm_vqdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
   return __builtin_mve_vqdmladhq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_s16 (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlsdavaxq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_s16 (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlsdavaq_sv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -2648,20 +2554,6 @@ __arm_vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
   return __builtin_mve_vmlaq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_s16 (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmladavaxq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_s16 (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmladavaq_sv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -2711,13 +2603,6 @@ __arm_vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
   return __builtin_mve_vmlaq_n_uv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_mve_vmladavaq_uv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -2823,20 +2708,6 @@ __arm_vqdmladhq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vqdmladhq_sv4si (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_s32 (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlsdavaxq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_s32 (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlsdavaq_sv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -2851,20 +2722,6 @@ __arm_vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
   return __builtin_mve_vmlaq_n_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_s32 (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmladavaxq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_s32 (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmladavaq_sv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -3432,69 +3289,6 @@ __arm_vhcaddq_rot90_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, m
   return __builtin_mve_vhcaddq_rot90_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_s32 (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_s16 (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_uv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaq_p_uv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaxq_p_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p_s32 (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaxq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p_s16 (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavaxq_p_sv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -3579,48 +3373,6 @@ __arm_vmlasq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t
   return __builtin_mve_vmlasq_m_n_uv8hi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaq_p_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p_s32 (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p_s16 (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaq_p_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p_s8 (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaxq_p_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p_s32 (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaxq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p_s16 (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavaxq_p_sv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_int_m_s8 (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -9849,13 +9601,6 @@ __arm_vmlaq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
  return __arm_vmlaq_n_u8 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
-{
- return __arm_vmladavaq_u8 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -9961,20 +9706,6 @@ __arm_vqdmladhq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
  return __arm_vqdmladhq_s8 (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
- return __arm_vmlsdavaxq_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
- return __arm_vmlsdavaq_s8 (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -9989,20 +9720,6 @@ __arm_vmlaq (int8x16_t __a, int8x16_t __b, int8_t __c)
  return __arm_vmlaq_n_s8 (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
- return __arm_vmladavaxq_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (int32_t __a, int8x16_t __b, int8x16_t __c)
-{
- return __arm_vmladavaq_s8 (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -10052,13 +9769,6 @@ __arm_vmlaq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
  return __arm_vmlaq_n_u16 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
-{
- return __arm_vmladavaq_u16 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -10164,20 +9874,6 @@ __arm_vqdmladhq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
  return __arm_vqdmladhq_s16 (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlsdavaxq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlsdavaq_s16 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -10192,20 +9888,6 @@ __arm_vmlaq (int16x8_t __a, int16x8_t __b, int16_t __c)
  return __arm_vmlaq_n_s16 (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmladavaxq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (int32_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmladavaq_s16 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -10255,13 +9937,6 @@ __arm_vmlaq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
  return __arm_vmlaq_n_u32 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
-{
- return __arm_vmladavaq_u32 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -10367,20 +10042,6 @@ __arm_vqdmladhq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
  return __arm_vqdmladhq_s32 (__inactive, __a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlsdavaxq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlsdavaq_s32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -10395,20 +10056,6 @@ __arm_vmlaq (int32x4_t __a, int32x4_t __b, int32_t __c)
  return __arm_vmlaq_n_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmladavaxq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq (int32_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmladavaq_s32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -10976,69 +10623,6 @@ __arm_vhcaddq_rot90_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
  return __arm_vhcaddq_rot90_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_u8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaq_p (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaq_p_u16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaxq_p_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaxq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavaxq_p (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmladavaxq_p_s16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlaq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -11123,48 +10707,6 @@ __arm_vmlasq_m (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t __p)
  return __arm_vmlasq_m_n_u16 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaq_p_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaq_p (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaq_p_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p (int32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaxq_p_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p (int32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaxq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavaxq_p (int32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsdavaxq_p_s16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_int_m (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -17155,17 +16697,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot90_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot90_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vmladavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaq_p_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavaq_p_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaq_p_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -17981,42 +17512,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vmlsdavaxq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_p_s8 (p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_p_s16 (p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_p_s32 (p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vmlsdavaq(p0,p1,p2) ({  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vmlsdavaxq(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaxq_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaxq_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaxq_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vmlsdavaq_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmlsdavaq_p_s8(p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsdavaq_p_s16(p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsdavaq_p_s32(p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vmladavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_p_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -18164,39 +17659,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaq_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavaq_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaq_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vmladavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaq_p_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavaq_p_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaq_p_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vmladavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmladavaxq_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmladavaxq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmladavaxq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmladavaxq_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmladavaxq_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmladavaxq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
 #define __arm_vmlaldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 09/24] arm: [MVE intrinsics] factorize vabavq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (6 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 08/24] arm: [MVE intrinsics] rework vmladavaq vmladavaxq vmlsdavaq vmlsdavaxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 10/24] arm: [MVE intrinsics] rework vabavq Christophe Lyon
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vabavq builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vabav.
	* config/arm/mve.md (mve_vabavq_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this,.
	(mve_vabavq_p_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this,.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 68f5314041b..cafb62a574e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -783,6 +783,8 @@ (define_int_attr mve_cmp_op1 [
 		 ])
 
 (define_int_attr mve_insn [
+		 (VABAVQ_P_S "vabav") (VABAVQ_P_U "vabav")
+		 (VABAVQ_S "vabav") (VABAVQ_U "vabav")
 		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd") (VABDQ_M_F "vabd")
 		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 		 (VABSQ_M_F "vabs")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index f95525db583..df7829bc183 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1807,7 +1807,7 @@ (define_insn "mve_vrmlaldavhaq_<supf>v4si"
 ;;
 ;; [vabavq_s, vabavq_u])
 ;;
-(define_insn "mve_vabavq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
@@ -1816,7 +1816,7 @@ (define_insn "mve_vabavq_<supf><mode>"
 	 VABAVQ))
   ]
   "TARGET_HAVE_MVE"
-  "vabav.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -3107,7 +3107,7 @@ (define_insn "mve_vrmlsldavhaq_sv4si"
 ;;
 ;; [vabavq_p_s, vabavq_p_u])
 ;;
-(define_insn "mve_vabavq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
@@ -3117,7 +3117,7 @@ (define_insn "mve_vabavq_p_<supf><mode>"
 	 VABAVQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vabavt.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length" "8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 10/24] arm: [MVE intrinsics] rework vabavq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (7 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 09/24] arm: [MVE intrinsics] factorize vabavq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 11/24] arm: [MVE intrinsics] add binary_acc_int64 shape Christophe Lyon
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vabavq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vabavq): New.
	* config/arm/arm-mve-builtins-base.def (vabavq): New.
	* config/arm/arm-mve-builtins-base.h (vabavq): New.
	* config/arm/arm_mve.h (vabavq): Remove.
	(vabavq_p): Remove.
	(vabavq_s8): Remove.
	(vabavq_s16): Remove.
	(vabavq_s32): Remove.
	(vabavq_u8): Remove.
	(vabavq_u16): Remove.
	(vabavq_u32): Remove.
	(vabavq_p_s8): Remove.
	(vabavq_p_u8): Remove.
	(vabavq_p_s16): Remove.
	(vabavq_p_u16): Remove.
	(vabavq_p_s32): Remove.
	(vabavq_p_u32): Remove.
	(__arm_vabavq_s8): Remove.
	(__arm_vabavq_s16): Remove.
	(__arm_vabavq_s32): Remove.
	(__arm_vabavq_u8): Remove.
	(__arm_vabavq_u16): Remove.
	(__arm_vabavq_u32): Remove.
	(__arm_vabavq_p_s8): Remove.
	(__arm_vabavq_p_u8): Remove.
	(__arm_vabavq_p_s16): Remove.
	(__arm_vabavq_p_u16): Remove.
	(__arm_vabavq_p_s32): Remove.
	(__arm_vabavq_p_u32): Remove.
	(__arm_vabavq): Remove.
	(__arm_vabavq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 215 -----------------------
 4 files changed, 3 insertions(+), 215 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 8a5ab990337..a81cf4cba5e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -241,6 +241,7 @@ namespace arm_mve {
    (-1, -1, UNSPEC##_F,							\
     -1, -1, UNSPEC##_P_F))
 
+FUNCTION_PRED_P_S_U (vabavq, VABAVQ)
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index cf0ed4b58df..934f45bc220 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #define REQUIRES_FLOAT false
+DEF_MVE_FUNCTION (vabavq, binary_acca_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vaddlvaq, unary_widen_acc, integer_32, p_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 4f09bebf1cb..1d29a940200 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -23,6 +23,7 @@
 namespace arm_mve {
 namespace functions {
 
+extern const function_base *const vabavq;
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
 extern const function_base *const vaddlvaq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 86fa7fcf789..f8afe19e86e 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -65,7 +65,6 @@
 #define vrmlsldavhxq(__a, __b) __arm_vrmlsldavhxq(__a, __b)
 #define vrmlsldavhq(__a, __b) __arm_vrmlsldavhq(__a, __b)
 #define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
-#define vabavq(__a, __b, __c) __arm_vabavq(__a, __b, __c)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
@@ -104,7 +103,6 @@
 #define vmlsldavxq_p(__a, __b, __p) __arm_vmlsldavxq_p(__a, __b, __p)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
-#define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -447,9 +445,6 @@
 #define vrmlaldavhq_s32(__a, __b) __arm_vrmlaldavhq_s32(__a, __b)
 #define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
 #define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
-#define vabavq_s8(__a, __b, __c) __arm_vabavq_s8(__a, __b, __c)
-#define vabavq_s16(__a, __b, __c) __arm_vabavq_s16(__a, __b, __c)
-#define vabavq_s32(__a, __b, __c) __arm_vabavq_s32(__a, __b, __c)
 #define vbicq_m_n_s16(__a,  __imm, __p) __arm_vbicq_m_n_s16(__a,  __imm, __p)
 #define vbicq_m_n_s32(__a,  __imm, __p) __arm_vbicq_m_n_s32(__a,  __imm, __p)
 #define vbicq_m_n_u16(__a,  __imm, __p) __arm_vbicq_m_n_u16(__a,  __imm, __p)
@@ -470,9 +465,6 @@
 #define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
 #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
-#define vabavq_u8(__a, __b, __c) __arm_vabavq_u8(__a, __b, __c)
-#define vabavq_u16(__a, __b, __c) __arm_vabavq_u16(__a, __b, __c)
-#define vabavq_u32(__a, __b, __c) __arm_vabavq_u32(__a, __b, __c)
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
@@ -620,22 +612,16 @@
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
-#define vabavq_p_s8(__a, __b, __c, __p) __arm_vabavq_p_s8(__a, __b, __c, __p)
 #define vsriq_m_n_u8(__a, __b,  __imm, __p) __arm_vsriq_m_n_u8(__a, __b,  __imm, __p)
-#define vabavq_p_u8(__a, __b, __c, __p) __arm_vabavq_p_u8(__a, __b, __c, __p)
 #define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s16(__a, __b,  __imm, __p) __arm_vsriq_m_n_s16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s16(__inactive, __a,  __imm, __p)
-#define vabavq_p_s16(__a, __b, __c, __p) __arm_vabavq_p_s16(__a, __b, __c, __p)
 #define vsriq_m_n_u16(__a, __b,  __imm, __p) __arm_vsriq_m_n_u16(__a, __b,  __imm, __p)
-#define vabavq_p_u16(__a, __b, __c, __p) __arm_vabavq_p_u16(__a, __b, __c, __p)
 #define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s32(__a, __b,  __imm, __p) __arm_vsriq_m_n_s32(__a, __b,  __imm, __p)
 #define vqshluq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s32(__inactive, __a,  __imm, __p)
-#define vabavq_p_s32(__a, __b, __c, __p) __arm_vabavq_p_s32(__a, __b, __c, __p)
 #define vsriq_m_n_u32(__a, __b,  __imm, __p) __arm_vsriq_m_n_u32(__a, __b,  __imm, __p)
-#define vabavq_p_u32(__a, __b, __c, __p) __arm_vabavq_p_u32(__a, __b, __c, __p)
 #define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
 #define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
 #define vbicq_m_s16(__inactive, __a, __b, __p) __arm_vbicq_m_s16(__inactive, __a, __b, __p)
@@ -2094,48 +2080,6 @@ __arm_vrmlaldavhq_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vrmlaldavhq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_s8 (uint32_t __a, int8x16_t __b, int8x16_t __c)
-{
-  return __builtin_mve_vabavq_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_s16 (uint32_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vabavq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_s32 (uint32_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vabavq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
-{
-  return __builtin_mve_vabavq_uv16qi(__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
-{
-  return __builtin_mve_vabavq_uv8hi(__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_mve_vabavq_uv4si(__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
@@ -2988,13 +2932,6 @@ __arm_vqshluq_m_n_s8 (uint8x16_t __inactive, int8x16_t __a, const int __imm, mve
   return __builtin_mve_vqshluq_m_n_sv16qi (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_s8 (uint32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_sv16qi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -3002,13 +2939,6 @@ __arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_
   return __builtin_mve_vsriq_m_n_uv16qi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_uv16qi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -3023,13 +2953,6 @@ __arm_vqshluq_m_n_s16 (uint16x8_t __inactive, int16x8_t __a, const int __imm, mv
   return __builtin_mve_vqshluq_m_n_sv8hi (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_s16 (uint32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_sv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -3037,13 +2960,6 @@ __arm_vsriq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16
   return __builtin_mve_vsriq_m_n_uv8hi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_uv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -3058,13 +2974,6 @@ __arm_vqshluq_m_n_s32 (uint32x4_t __inactive, int32x4_t __a, const int __imm, mv
   return __builtin_mve_vqshluq_m_n_sv4si (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_s32 (uint32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_sv4si (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -3072,13 +2981,6 @@ __arm_vsriq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16
   return __builtin_mve_vsriq_m_n_uv4si (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vabavq_p_uv4si (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -9440,48 +9342,6 @@ __arm_vrmlaldavhq (int32x4_t __a, int32x4_t __b)
  return __arm_vrmlaldavhq_s32 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, int8x16_t __b, int8x16_t __c)
-{
- return __arm_vabavq_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vabavq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vabavq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
-{
- return __arm_vabavq_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
-{
- return __arm_vabavq_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
-{
- return __arm_vabavq_u32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_n (int16x8_t __a, const int __imm, mve_pred16_t __p)
@@ -10322,13 +10182,6 @@ __arm_vqshluq_m (uint8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred
  return __arm_vqshluq_m_n_s8 (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, int8x16_t __b, int8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_s8 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -10336,13 +10189,6 @@ __arm_vsriq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u8 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, uint8x16_t __b, uint8x16_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_u8 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -10357,13 +10203,6 @@ __arm_vqshluq_m (uint16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred
  return __arm_vqshluq_m_n_s16 (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_s16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -10371,13 +10210,6 @@ __arm_vsriq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u16 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_u16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -10392,13 +10224,6 @@ __arm_vqshluq_m (uint32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred
  return __arm_vqshluq_m_n_s32 (__inactive, __a, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_s32 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -10406,13 +10231,6 @@ __arm_vsriq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsriq_m_n_u32 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vabavq_p (uint32_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vabavq_p_u32 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -16642,17 +16460,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vabavq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabavq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabavq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabavq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabavq_p_u8(__p0, __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -17637,28 +17444,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
 
-#define __arm_vabavq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabavq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vabavq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vabavq_p_s8(__p0, __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vabavq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vabavq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vabavq_p_u8(__p0, __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vabavq_p_u16(__p0, __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vabavq_p_u32(__p0, __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmlaldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 11/24] arm: [MVE intrinsics] add binary_acc_int64 shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (8 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 10/24] arm: [MVE intrinsics] rework vabavq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 12/24] arm: [MVE intrinsics] factorize vmlaldavq vmlaldavxq vmlsldavq vmlsldavxq Christophe Lyon
                   ` (13 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_acc_int64 shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_acc_int64): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_acc_int64): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 23 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 24 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ceb13230da6..f1c3844953a 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -392,6 +392,29 @@ struct binary_acc_int32_def : public overloaded_base<0>
 };
 SHAPE (binary_acc_int32)
 
+/* <[u]int64>_t vfoo[_<t0>](<T0>_t, <T0>_t)
+
+   Example: vmlaldavq.
+   int64_t [__arm_]vmlaldavq[_s16](int16x8_t m1, int16x8_t m2)
+   int64_t [__arm_]vmlaldavq_p[_s16](int16x8_t m1, int16x8_t m2, mve_pred16_t p)  */
+struct binary_acc_int64_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx64,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2);
+  }
+};
+SHAPE (binary_acc_int64)
+
 /* <[u]int32>_t vfoo[_<t0>]([u]int32_t, <T0>_t, <T0>_t)
 
    Example: vmladavaq.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 7f68d41efe6..73e82d2fd7a 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -38,6 +38,7 @@ namespace arm_mve
     extern const function_shape *const binary_lshift;
     extern const function_shape *const binary_lshift_r;
     extern const function_shape *const binary_acc_int32;
+    extern const function_shape *const binary_acc_int64;
     extern const function_shape *const binary_acca_int32;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 12/24] arm: [MVE intrinsics] factorize vmlaldavq vmlaldavxq vmlsldavq vmlsldavxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (9 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 11/24] arm: [MVE intrinsics] add binary_acc_int64 shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 13/24] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (12 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmlaldavq, vmlaldavxq, vmlsldavq, vmlsldavxq builtins so
that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VMLxLDAVxQ, MVE_VMLxLDAVxQ_P): New.
	(mve_insn): Add vmlaldav, vmlaldavx, vmlsldav, vmlsldavx.
	(supf): Add VMLALDAVXQ_S, VMLSLDAVQ_S, VMLSLDAVXQ_S,
	VMLALDAVXQ_P_S, VMLSLDAVQ_P_S, VMLSLDAVXQ_P_S.
	* config/arm/mve.md (mve_vmlaldavq_<supf><mode>)
	(mve_vmlaldavxq_s<mode>, mve_vmlsldavq_s<mode>)
	(mve_vmlsldavxq_s<mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vmlaldavq_p_<supf><mode>, mve_vmlaldavxq_p_s<mode>)
	(mve_vmlsldavq_p_s<mode>, mve_vmlsldavxq_p_s<mode>): Merge into
	...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  28 +++++++++
 gcc/config/arm/mve.md       | 114 +++++-------------------------------
 2 files changed, 42 insertions(+), 100 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index cafb62a574e..227ba52aed5 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -727,6 +727,20 @@ (define_int_iterator MVE_VMLxDAVAQ_P [
 		     VMLADAVAXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VMLxLDAVxQ [
+		     VMLALDAVQ_S VMLALDAVQ_U
+		     VMLALDAVXQ_S
+		     VMLSLDAVQ_S
+		     VMLSLDAVXQ_S
+		     ])
+
+(define_int_iterator MVE_VMLxLDAVxQ_P [
+		     VMLALDAVQ_P_S VMLALDAVQ_P_U
+		     VMLALDAVXQ_P_S
+		     VMLSLDAVQ_P_S
+		     VMLSLDAVXQ_P_S
+		     ])
+
 (define_int_iterator MVE_MOVN [
 		     VMOVNBQ_S VMOVNBQ_U
 		     VMOVNTQ_S VMOVNTQ_U
@@ -855,6 +869,10 @@ (define_int_attr mve_insn [
 		 (VMLADAVQ_S "vmladav") (VMLADAVQ_U "vmladav")
 		 (VMLADAVXQ_P_S "vmladavx")
 		 (VMLADAVXQ_S "vmladavx")
+		 (VMLALDAVQ_P_S "vmlaldav") (VMLALDAVQ_P_U "vmlaldav")
+		 (VMLALDAVQ_S "vmlaldav") (VMLALDAVQ_U "vmlaldav")
+		 (VMLALDAVXQ_P_S "vmlaldavx")
+		 (VMLALDAVXQ_S "vmlaldavx")
 		 (VMLAQ_M_N_S "vmla") (VMLAQ_M_N_U "vmla")
 		 (VMLASQ_M_N_S "vmlas") (VMLASQ_M_N_U "vmlas")
 		 (VMLSDAVAQ_P_S "vmlsdava")
@@ -865,6 +883,10 @@ (define_int_attr mve_insn [
 		 (VMLSDAVQ_S "vmlsdav")
 		 (VMLSDAVXQ_P_S "vmlsdavx")
 		 (VMLSDAVXQ_S "vmlsdavx")
+		 (VMLSLDAVQ_P_S "vmlsldav")
+		 (VMLSLDAVQ_S "vmlsldav")
+		 (VMLSLDAVXQ_P_S "vmlsldavx")
+		 (VMLSLDAVXQ_S "vmlsldavx")
 		 (VMOVLBQ_M_S "vmovlb") (VMOVLBQ_M_U "vmovlb")
 		 (VMOVLBQ_S "vmovlb") (VMOVLBQ_U "vmovlb")
 		 (VMOVLTQ_M_S "vmovlt") (VMOVLTQ_M_U "vmovlt")
@@ -2295,6 +2317,12 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMLSDAVQ_S "s")
 		       (VMLSDAVXQ_P_S "s")
 		       (VMLSDAVXQ_S "s")
+		       (VMLALDAVXQ_S "s")
+		       (VMLSLDAVQ_S "s")
+		       (VMLSLDAVXQ_S "s")
+		       (VMLALDAVXQ_P_S "s")
+		       (VMLSLDAVQ_P_S "s")
+		       (VMLSLDAVXQ_P_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index df7829bc183..584e6129ea5 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1405,62 +1405,20 @@ (define_insn "@mve_<max_min_f_str>q_f<mode>"
 ])
 
 ;;
-;; [vmlaldavq_u, vmlaldavq_s])
+;; [vmlaldavq_u, vmlaldavq_s]
+;; [vmlaldavxq_s]
+;; [vmlsldavq_s]
+;; [vmlsldavxq_s]
 ;;
-(define_insn "mve_vmlaldavq_<supf><mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		    (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VMLALDAVQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlaldav.<supf>%#<V_sz_elem>	%Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlaldavxq_s])
-;;
-(define_insn "mve_vmlaldavxq_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		    (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VMLALDAVXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlaldavx.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsldavq_s])
-;;
-(define_insn "mve_vmlsldavq_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		    (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VMLSLDAVQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsldav.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsldavxq_s])
-;;
-(define_insn "mve_vmlsldavxq_s<mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
 		    (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VMLSLDAVXQ_S))
+	 MVE_VMLxLDAVxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vmlsldavx.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2666,37 +2624,25 @@ (define_insn "mve_vmlaldavaxq_s<mode>"
 ])
 
 ;;
-;; [vmlaldavq_p_u, vmlaldavq_p_s])
+;; [vmlaldavq_p_u, vmlaldavq_p_s]
+;; [vmlaldavxq_p_s]
+;; [vmlsldavq_p_s]
+;; [vmlsldavxq_p_s]
 ;;
-(define_insn "mve_vmlaldavq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
 		       (match_operand:MVE_5 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLALDAVQ_P))
+	 MVE_VMLxLDAVxQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmlaldavt.<supf>%#<V_sz_elem> %Q0, %R0, %q1, %q2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
-;; [vmlaldavxq_p_s])
-;;
-(define_insn "mve_vmlaldavxq_p_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLALDAVXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlaldavxt.s%#<V_sz_elem>\t%Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-;;
 ;; [vmlsldavaq_s])
 ;;
 (define_insn "mve_vmlsldavaq_s<mode>"
@@ -2728,38 +2674,6 @@ (define_insn "mve_vmlsldavaxq_s<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vmlsldavq_p_s])
-;;
-(define_insn "mve_vmlsldavq_p_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLSLDAVQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsldavt.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlsldavxq_p_s])
-;;
-(define_insn "mve_vmlsldavxq_p_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMLSLDAVXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsldavxt.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vmovlbq_m_u, vmovlbq_m_s])
 ;; [vmovltq_m_u, vmovltq_m_s])
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 13/24] arm: [MVE intrinsics] rework vmlaldavq vmlaldavxq vmlsldavq vmlsldavxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (10 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 12/24] arm: [MVE intrinsics] factorize vmlaldavq vmlaldavxq vmlsldavq vmlsldavxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 14/24] arm: [MVE intrinsics] factorize vrmlaldavhq vrmlaldavhxq vrmlsldavhq vrmlsldavhxq Christophe Lyon
                   ` (11 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmlaldavq, vmlaldavxq, vmlsldavq, vmlsldavxq using the new
MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmlaldavq, vmlaldavxq)
	(vmlsldavq, vmlsldavxq): New.
	* config/arm/arm-mve-builtins-base.def (vmlaldavq, vmlaldavxq)
	(vmlsldavq, vmlsldavxq): New.
	* config/arm/arm-mve-builtins-base.h (vmlaldavq, vmlaldavxq)
	(vmlsldavq, vmlsldavxq): New.
	* config/arm/arm_mve.h (vmlaldavq): Remove.
	(vmlsldavxq): Remove.
	(vmlsldavq): Remove.
	(vmlaldavxq): Remove.
	(vmlaldavq_p): Remove.
	(vmlaldavxq_p): Remove.
	(vmlsldavq_p): Remove.
	(vmlsldavxq_p): Remove.
	(vmlaldavq_u16): Remove.
	(vmlsldavxq_s16): Remove.
	(vmlsldavq_s16): Remove.
	(vmlaldavxq_s16): Remove.
	(vmlaldavq_s16): Remove.
	(vmlaldavq_u32): Remove.
	(vmlsldavxq_s32): Remove.
	(vmlsldavq_s32): Remove.
	(vmlaldavxq_s32): Remove.
	(vmlaldavq_s32): Remove.
	(vmlaldavq_p_s16): Remove.
	(vmlaldavxq_p_s16): Remove.
	(vmlsldavq_p_s16): Remove.
	(vmlsldavxq_p_s16): Remove.
	(vmlaldavq_p_u16): Remove.
	(vmlaldavq_p_s32): Remove.
	(vmlaldavxq_p_s32): Remove.
	(vmlsldavq_p_s32): Remove.
	(vmlsldavxq_p_s32): Remove.
	(vmlaldavq_p_u32): Remove.
	(__arm_vmlaldavq_u16): Remove.
	(__arm_vmlsldavxq_s16): Remove.
	(__arm_vmlsldavq_s16): Remove.
	(__arm_vmlaldavxq_s16): Remove.
	(__arm_vmlaldavq_s16): Remove.
	(__arm_vmlaldavq_u32): Remove.
	(__arm_vmlsldavxq_s32): Remove.
	(__arm_vmlsldavq_s32): Remove.
	(__arm_vmlaldavxq_s32): Remove.
	(__arm_vmlaldavq_s32): Remove.
	(__arm_vmlaldavq_p_s16): Remove.
	(__arm_vmlaldavxq_p_s16): Remove.
	(__arm_vmlsldavq_p_s16): Remove.
	(__arm_vmlsldavxq_p_s16): Remove.
	(__arm_vmlaldavq_p_u16): Remove.
	(__arm_vmlaldavq_p_s32): Remove.
	(__arm_vmlaldavxq_p_s32): Remove.
	(__arm_vmlsldavq_p_s32): Remove.
	(__arm_vmlsldavxq_p_s32): Remove.
	(__arm_vmlaldavq_p_u32): Remove.
	(__arm_vmlaldavq): Remove.
	(__arm_vmlsldavxq): Remove.
	(__arm_vmlsldavq): Remove.
	(__arm_vmlaldavxq): Remove.
	(__arm_vmlaldavq_p): Remove.
	(__arm_vmlaldavxq_p): Remove.
	(__arm_vmlsldavq_p): Remove.
	(__arm_vmlsldavxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   4 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm_mve.h                 | 366 -----------------------
 4 files changed, 12 insertions(+), 366 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index a81cf4cba5e..af1a2c9942a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -285,10 +285,14 @@ FUNCTION_PRED_P_S (vmladavaxq, VMLADAVAXQ)
 FUNCTION_PRED_P_S_U (vmladavaq, VMLADAVAQ)
 FUNCTION_PRED_P_S_U (vmladavq, VMLADAVQ)
 FUNCTION_PRED_P_S (vmladavxq, VMLADAVXQ)
+FUNCTION_PRED_P_S_U (vmlaldavq, VMLALDAVQ)
+FUNCTION_PRED_P_S (vmlaldavxq, VMLALDAVXQ)
 FUNCTION_PRED_P_S (vmlsdavaq, VMLSDAVAQ)
 FUNCTION_PRED_P_S (vmlsdavaxq, VMLSDAVAXQ)
 FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ)
 FUNCTION_PRED_P_S (vmlsdavxq, VMLSDAVXQ)
+FUNCTION_PRED_P_S (vmlsldavq, VMLSLDAVQ)
+FUNCTION_PRED_P_S (vmlsldavxq, VMLSLDAVXQ)
 FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ)
 FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 934f45bc220..f7f353b34a7 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -54,10 +54,14 @@ DEF_MVE_FUNCTION (vmladavaq, binary_acca_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmladavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmladavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmladavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlaldavq, binary_acc_int64, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vmlaldavxq, binary_acc_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavaq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlsldavq, binary_acc_int64, signed_16_32, p_or_none)
+DEF_MVE_FUNCTION (vmlsldavxq, binary_acc_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovltq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 1d29a940200..08d07a7c6d5 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -67,10 +67,14 @@ extern const function_base *const vmladavaq;
 extern const function_base *const vmladavaxq;
 extern const function_base *const vmladavq;
 extern const function_base *const vmladavxq;
+extern const function_base *const vmlaldavq;
+extern const function_base *const vmlaldavxq;
 extern const function_base *const vmlsdavaq;
 extern const function_base *const vmlsdavaxq;
 extern const function_base *const vmlsdavq;
 extern const function_base *const vmlsdavxq;
+extern const function_base *const vmlsldavq;
+extern const function_base *const vmlsldavxq;
 extern const function_base *const vmovlbq;
 extern const function_base *const vmovltq;
 extern const function_base *const vmovnbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index f8afe19e86e..50e9ecbfc85 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -55,12 +55,8 @@
 #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
-#define vmlaldavq(__a, __b) __arm_vmlaldavq(__a, __b)
 #define vqdmulltq(__a, __b) __arm_vqdmulltq(__a, __b)
 #define vqdmullbq(__a, __b) __arm_vqdmullbq(__a, __b)
-#define vmlsldavxq(__a, __b) __arm_vmlsldavxq(__a, __b)
-#define vmlsldavq(__a, __b) __arm_vmlsldavq(__a, __b)
-#define vmlaldavxq(__a, __b) __arm_vmlaldavxq(__a, __b)
 #define vrmlaldavhq(__a, __b) __arm_vrmlaldavhq(__a, __b)
 #define vrmlsldavhxq(__a, __b) __arm_vrmlsldavhxq(__a, __b)
 #define vrmlsldavhq(__a, __b) __arm_vrmlsldavhq(__a, __b)
@@ -97,10 +93,6 @@
 #define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c)
 #define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c)
 #define vmlsldavaxq(__a, __b, __c) __arm_vmlsldavaxq(__a, __b, __c)
-#define vmlaldavq_p(__a, __b, __p) __arm_vmlaldavq_p(__a, __b, __p)
-#define vmlaldavxq_p(__a, __b, __p) __arm_vmlaldavxq_p(__a, __b, __p)
-#define vmlsldavq_p(__a, __b, __p) __arm_vmlsldavq_p(__a, __b, __p)
-#define vmlsldavxq_p(__a, __b, __p) __arm_vmlsldavxq_p(__a, __b, __p)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
@@ -394,17 +386,12 @@
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
-#define vmlaldavq_u16(__a, __b) __arm_vmlaldavq_u16(__a, __b)
 #define vbicq_n_u16(__a,  __imm) __arm_vbicq_n_u16(__a,  __imm)
 #define vqdmulltq_s16(__a, __b) __arm_vqdmulltq_s16(__a, __b)
 #define vqdmulltq_n_s16(__a, __b) __arm_vqdmulltq_n_s16(__a, __b)
 #define vqdmullbq_s16(__a, __b) __arm_vqdmullbq_s16(__a, __b)
 #define vqdmullbq_n_s16(__a, __b) __arm_vqdmullbq_n_s16(__a, __b)
 #define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b)
-#define vmlsldavxq_s16(__a, __b) __arm_vmlsldavxq_s16(__a, __b)
-#define vmlsldavq_s16(__a, __b) __arm_vmlsldavq_s16(__a, __b)
-#define vmlaldavxq_s16(__a, __b) __arm_vmlaldavxq_s16(__a, __b)
-#define vmlaldavq_s16(__a, __b) __arm_vmlaldavq_s16(__a, __b)
 #define vcmulq_rot90_f16(__a, __b) __arm_vcmulq_rot90_f16(__a, __b)
 #define vcmulq_rot270_f16(__a, __b) __arm_vcmulq_rot270_f16(__a, __b)
 #define vcmulq_rot180_f16(__a, __b) __arm_vcmulq_rot180_f16(__a, __b)
@@ -415,17 +402,12 @@
 #define vbicq_n_s16(__a,  __imm) __arm_vbicq_n_s16(__a,  __imm)
 #define vmulltq_poly_p16(__a, __b) __arm_vmulltq_poly_p16(__a, __b)
 #define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b)
-#define vmlaldavq_u32(__a, __b) __arm_vmlaldavq_u32(__a, __b)
 #define vbicq_n_u32(__a,  __imm) __arm_vbicq_n_u32(__a,  __imm)
 #define vqdmulltq_s32(__a, __b) __arm_vqdmulltq_s32(__a, __b)
 #define vqdmulltq_n_s32(__a, __b) __arm_vqdmulltq_n_s32(__a, __b)
 #define vqdmullbq_s32(__a, __b) __arm_vqdmullbq_s32(__a, __b)
 #define vqdmullbq_n_s32(__a, __b) __arm_vqdmullbq_n_s32(__a, __b)
 #define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b)
-#define vmlsldavxq_s32(__a, __b) __arm_vmlsldavxq_s32(__a, __b)
-#define vmlsldavq_s32(__a, __b) __arm_vmlsldavq_s32(__a, __b)
-#define vmlaldavxq_s32(__a, __b) __arm_vmlaldavxq_s32(__a, __b)
-#define vmlaldavq_s32(__a, __b) __arm_vmlaldavq_s32(__a, __b)
 #define vcmulq_rot90_f32(__a, __b) __arm_vcmulq_rot90_f32(__a, __b)
 #define vcmulq_rot270_f32(__a, __b) __arm_vcmulq_rot270_f32(__a, __b)
 #define vcmulq_rot180_f32(__a, __b) __arm_vcmulq_rot180_f32(__a, __b)
@@ -568,10 +550,6 @@
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
 #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p)
-#define vmlaldavq_p_s16(__a, __b, __p) __arm_vmlaldavq_p_s16(__a, __b, __p)
-#define vmlaldavxq_p_s16(__a, __b, __p) __arm_vmlaldavxq_p_s16(__a, __b, __p)
-#define vmlsldavq_p_s16(__a, __b, __p) __arm_vmlsldavq_p_s16(__a, __b, __p)
-#define vmlsldavxq_p_s16(__a, __b, __p) __arm_vmlsldavxq_p_s16(__a, __b, __p)
 #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
 #define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
@@ -579,7 +557,6 @@
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
 #define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b, __c)
-#define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b, __p)
 #define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
@@ -597,10 +574,6 @@
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
 #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p)
-#define vmlaldavq_p_s32(__a, __b, __p) __arm_vmlaldavq_p_s32(__a, __b, __p)
-#define vmlaldavxq_p_s32(__a, __b, __p) __arm_vmlaldavxq_p_s32(__a, __b, __p)
-#define vmlsldavq_p_s32(__a, __b, __p) __arm_vmlsldavq_p_s32(__a, __b, __p)
-#define vmlsldavxq_p_s32(__a, __b, __p) __arm_vmlsldavxq_p_s32(__a, __b, __p)
 #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
 #define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
@@ -608,7 +581,6 @@
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
 #define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b, __c)
-#define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b, __p)
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
@@ -1849,13 +1821,6 @@ __arm_vmullbq_poly_p8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vmullbq_poly_pv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmlaldavq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_u16 (uint16x8_t __a, const int __imm)
@@ -1891,34 +1856,6 @@ __arm_vqdmullbq_n_s16 (int16x8_t __a, int16_t __b)
   return __builtin_mve_vqdmullbq_n_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlsldavxq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlsldavq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlaldavxq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmlaldavq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_s16 (int16x8_t __a, const int __imm)
@@ -1940,13 +1877,6 @@ __arm_vmullbq_poly_p16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vmullbq_poly_pv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vmlaldavq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_u32 (uint32x4_t __a, const int __imm)
@@ -1982,34 +1912,6 @@ __arm_vqdmullbq_n_s32 (int32x4_t __a, int32_t __b)
   return __builtin_mve_vqdmullbq_n_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlsldavxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlsldavq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlaldavxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmlaldavq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_s32 (int32x4_t __a, const int __imm)
@@ -2785,34 +2687,6 @@ __arm_vmlsldavaxq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
   return __builtin_mve_vmlsldavaxq_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavxq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavxq_p_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2827,13 +2701,6 @@ __arm_vmlaldavaq_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
   return __builtin_mve_vmlaldavaq_uv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavq_p_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2869,34 +2736,6 @@ __arm_vmlsldavaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
   return __builtin_mve_vmlsldavaxq_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavxq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavxq_p_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2911,13 +2750,6 @@ __arm_vmlaldavaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
   return __builtin_mve_vmlaldavaq_uv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavq_p_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -9139,13 +8971,6 @@ __arm_vmullbq_poly (uint8x16_t __a, uint8x16_t __b)
  return __arm_vmullbq_poly_p8 (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmlaldavq_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint16x8_t __a, const int __imm)
@@ -9181,34 +9006,6 @@ __arm_vqdmullbq (int16x8_t __a, int16_t __b)
  return __arm_vqdmullbq_n_s16 (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlsldavxq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlsldavq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlaldavxq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmlaldavq_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int16x8_t __a, const int __imm)
@@ -9230,13 +9027,6 @@ __arm_vmullbq_poly (uint16x8_t __a, uint16x8_t __b)
  return __arm_vmullbq_poly_p16 (__a, __b);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmlaldavq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint32x4_t __a, const int __imm)
@@ -9272,34 +9062,6 @@ __arm_vqdmullbq (int32x4_t __a, int32_t __b)
  return __arm_vqdmullbq_n_s32 (__a, __b);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlsldavxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlsldavq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlaldavxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmlaldavq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int32x4_t __a, const int __imm)
@@ -10035,34 +9797,6 @@ __arm_vmlsldavaxq (int64_t __a, int16x8_t __b, int16x8_t __c)
  return __arm_vmlsldavaxq_s16 (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavxq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsldavq_p_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsldavxq_p_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -10077,13 +9811,6 @@ __arm_vmlaldavaq (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
  return __arm_vmlaldavaq_u16 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavq_p_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -10119,34 +9846,6 @@ __arm_vmlsldavaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
  return __arm_vmlsldavaxq_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavxq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsldavq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlsldavxq_p_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -10161,13 +9860,6 @@ __arm_vmlaldavaq (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
  return __arm_vmlaldavaq_u32 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmlaldavq_p_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -15203,12 +14895,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
 
-#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -16272,12 +15958,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
 
-#define __arm_vmlaldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -17460,28 +17140,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vmlaldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vmlaldavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavq_p_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vmlaldavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -17496,30 +17154,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vmlsldavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vmlsldavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
-#define __arm_vmlsldavxq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vmlsldavxq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
-
 #define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 14/24] arm: [MVE intrinsics] factorize vrmlaldavhq vrmlaldavhxq vrmlsldavhq vrmlsldavhxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (11 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 13/24] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 15/24] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vrmlaldavhq, vrmlaldavhxq, vrmlsldavhq, vrmlsldavhxq
builtins so that they use the same parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VRMLxLDAVxQ, MVE_VRMLxLDAVHxQ_P):
	New.
	(mve_insn): Add vrmlaldavh, vrmlaldavhx, vrmlsldavh, vrmlsldavhx.
	(supf): Add VRMLALDAVHXQ_P_S, VRMLALDAVHXQ_S, VRMLSLDAVHQ_P_S,
	VRMLSLDAVHQ_S, VRMLSLDAVHXQ_P_S, VRMLSLDAVHXQ_S.
	* config/arm/mve.md (mve_vrmlaldavhxq_sv4si)
	(mve_vrmlsldavhq_sv4si, mve_vrmlsldavhxq_sv4si)
	(mve_vrmlaldavhq_<supf>v4si): Merge into ...
	(@mve_<mve_insn>q_<supf>v4si): ... this.
	(mve_vrmlaldavhxq_p_sv4si, mve_vrmlsldavhq_p_sv4si)
	(mve_vrmlsldavhxq_p_sv4si, mve_vrmlaldavhq_p_<supf>v4si): Merge
	into ...
	(@mve_<mve_insn>q_p_<supf>v4si): ... this.
---
 gcc/config/arm/iterators.md |  28 +++++++++
 gcc/config/arm/mve.md       | 117 +++++-------------------------------
 2 files changed, 43 insertions(+), 102 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 227ba52aed5..729127d8586 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -741,6 +741,20 @@ (define_int_iterator MVE_VMLxLDAVxQ_P [
 		     VMLSLDAVXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VRMLxLDAVxQ [
+		     VRMLALDAVHQ_S VRMLALDAVHQ_U
+		     VRMLALDAVHXQ_S
+		     VRMLSLDAVHQ_S
+		     VRMLSLDAVHXQ_S
+		     ])
+
+(define_int_iterator MVE_VRMLxLDAVHxQ_P [
+		     VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U
+		     VRMLALDAVHXQ_P_S
+		     VRMLSLDAVHQ_P_S
+		     VRMLSLDAVHXQ_P_S
+		     ])
+
 (define_int_iterator MVE_MOVN [
 		     VMOVNBQ_S VMOVNBQ_U
 		     VMOVNTQ_S VMOVNTQ_U
@@ -979,6 +993,14 @@ (define_int_attr mve_insn [
 		 (VREV64Q_S "vrev64") (VREV64Q_U "vrev64") (VREV64Q_F "vrev64")
 		 (VRHADDQ_M_S "vrhadd") (VRHADDQ_M_U "vrhadd")
 		 (VRHADDQ_S "vrhadd") (VRHADDQ_U "vrhadd")
+		 (VRMLALDAVHQ_P_S "vrmlaldavh") (VRMLALDAVHQ_P_U "vrmlaldavh")
+		 (VRMLALDAVHQ_S "vrmlaldavh") (VRMLALDAVHQ_U "vrmlaldavh")
+		 (VRMLALDAVHXQ_P_S "vrmlaldavhx")
+		 (VRMLALDAVHXQ_S "vrmlaldavhx")
+		 (VRMLSLDAVHQ_P_S "vrmlsldavh")
+		 (VRMLSLDAVHQ_S "vrmlsldavh")
+		 (VRMLSLDAVHXQ_P_S "vrmlsldavhx")
+		 (VRMLSLDAVHXQ_S "vrmlsldavhx")
 		 (VRMULHQ_M_S "vrmulh") (VRMULHQ_M_U "vrmulh")
 		 (VRMULHQ_S "vrmulh") (VRMULHQ_U "vrmulh")
 		 (VRNDAQ_F "vrnda") (VRNDAQ_M_F "vrnda")
@@ -2323,6 +2345,12 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMLALDAVXQ_P_S "s")
 		       (VMLSLDAVQ_P_S "s")
 		       (VMLSLDAVXQ_P_S "s")
+		       (VRMLALDAVHXQ_P_S "s")
+		       (VRMLALDAVHXQ_S "s")
+		       (VRMLSLDAVHQ_P_S "s")
+		       (VRMLSLDAVHQ_S "s")
+		       (VRMLSLDAVHXQ_P_S "s")
+		       (VRMLSLDAVHXQ_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 584e6129ea5..e2259aa48e9 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1563,47 +1563,20 @@ (define_insn "mve_vqdmulltq_s<mode>"
 ])
 
 ;;
-;; [vrmlaldavhxq_s])
+;; [vrmlaldavhq_u vrmlaldavhq_s]
+;; [vrmlaldavhxq_s]
+;; [vrmlsldavhq_s]
+;; [vrmlsldavhxq_s]
 ;;
-(define_insn "mve_vrmlaldavhxq_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		    (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VRMLALDAVHXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlaldavhx.s32 %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vrmlsldavhq_s])
-;;
-(define_insn "mve_vrmlsldavhq_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		    (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VRMLSLDAVHQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlsldavh.s32\t%Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vrmlsldavhxq_s])
-;;
-(define_insn "mve_vrmlsldavhxq_sv4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
 		    (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VRMLSLDAVHXQ_S))
+	 MVE_VRMLxLDAVxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vrmlsldavhx.s32\t%Q0, %R0, %q1, %q2"
+  "<mve_insn>.<supf>32\t%Q0, %R0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1653,21 +1626,6 @@ (define_insn "mve_vmullbq_poly_p<mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vrmlaldavhq_u vrmlaldavhq_s])
-;;
-(define_insn "mve_vrmlaldavhq_<supf>v4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		    (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VRMLALDAVHQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlaldavh.<supf>32\t%Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vcmpeqq_m_f]
 ;; [vcmpgeq_m_f]
@@ -2826,18 +2784,21 @@ (define_insn "mve_vrmlaldavhaxq_sv4si"
 ])
 
 ;;
-;; [vrmlaldavhxq_p_s])
+;; [vrmlaldavhq_p_u vrmlaldavhq_p_s]
+;; [vrmlaldavhxq_p_s]
+;; [vrmlsldavhq_p_s]
+;; [vrmlsldavhxq_p_s]
 ;;
-(define_insn "mve_vrmlaldavhxq_p_sv4si"
+(define_insn "@mve_<mve_insn>q_p_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
 		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VRMLALDAVHXQ_P_S))
+		       (match_operand:V4BI 3 "vpr_register_operand" "Up")]
+	 MVE_VRMLxLDAVHxQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vrmlaldavhxt.s32 %Q0, %R0, %q1, %q2"
+  "vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2857,38 +2818,6 @@ (define_insn "mve_vrmlsldavhaxq_sv4si"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vrmlsldavhq_p_s])
-;;
-(define_insn "mve_vrmlsldavhq_p_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VRMLSLDAVHQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlsldavht.s32 %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vrmlsldavhxq_p_s])
-;;
-(define_insn "mve_vrmlsldavhxq_p_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VRMLSLDAVHXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlsldavhxt.s32 %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vcvtmq_m_s, vcvtmq_m_u])
 ;;
@@ -2986,22 +2915,6 @@ (define_insn "mve_vcvtq_m_from_f_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrmlaldavhq_p_u vrmlaldavhq_p_s])
-;;
-(define_insn "mve_vrmlaldavhq_p_<supf>v4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
-		    (match_operand:V4SI 2 "s_register_operand" "w")
-		    (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VRMLALDAVHQ_P))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlaldavht.<supf>32 %Q0, %R0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vrmlsldavhaq_s])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 15/24] arm: [MVE intrinsics] rework vrmlaldavhq vrmlaldavhxq vrmlsldavhq vrmlsldavhxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (12 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 14/24] arm: [MVE intrinsics] factorize vrmlaldavhq vrmlaldavhxq vrmlsldavhq vrmlsldavhxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 16/24] arm: [MVE intrinsics] add binary_acca_int64 shape Christophe Lyon
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vrmlaldavhq, vrmlaldavhxq, vrmlsldavhq, vrmlsldavhxq using
the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vrmlaldavhq, vrmlaldavhxq)
	(vrmlsldavhq, vrmlsldavhxq): New.
	* config/arm/arm-mve-builtins-base.def (vrmlaldavhq, vrmlaldavhxq)
	(vrmlsldavhq, vrmlsldavhxq): New.
	* config/arm/arm-mve-builtins-base.h (vrmlaldavhq, vrmlaldavhxq)
	(vrmlsldavhq, vrmlsldavhxq): New.
	* config/arm/arm-mve-builtins-functions.h
	(unspec_mve_function_exact_insn_pred_p): Handle vrmlaldavhq,
	vrmlaldavhxq, vrmlsldavhq, vrmlsldavhxq.
	* config/arm/arm_mve.h (vrmlaldavhq): Remove.
	(vrmlsldavhxq): Remove.
	(vrmlsldavhq): Remove.
	(vrmlaldavhxq): Remove.
	(vrmlaldavhq_p): Remove.
	(vrmlaldavhxq_p): Remove.
	(vrmlsldavhq_p): Remove.
	(vrmlsldavhxq_p): Remove.
	(vrmlaldavhq_u32): Remove.
	(vrmlsldavhxq_s32): Remove.
	(vrmlsldavhq_s32): Remove.
	(vrmlaldavhxq_s32): Remove.
	(vrmlaldavhq_s32): Remove.
	(vrmlaldavhq_p_s32): Remove.
	(vrmlaldavhxq_p_s32): Remove.
	(vrmlsldavhq_p_s32): Remove.
	(vrmlsldavhxq_p_s32): Remove.
	(vrmlaldavhq_p_u32): Remove.
	(__arm_vrmlaldavhq_u32): Remove.
	(__arm_vrmlsldavhxq_s32): Remove.
	(__arm_vrmlsldavhq_s32): Remove.
	(__arm_vrmlaldavhxq_s32): Remove.
	(__arm_vrmlaldavhq_s32): Remove.
	(__arm_vrmlaldavhq_p_s32): Remove.
	(__arm_vrmlaldavhxq_p_s32): Remove.
	(__arm_vrmlsldavhq_p_s32): Remove.
	(__arm_vrmlsldavhxq_p_s32): Remove.
	(__arm_vrmlaldavhq_p_u32): Remove.
	(__arm_vrmlaldavhq): Remove.
	(__arm_vrmlsldavhxq): Remove.
	(__arm_vrmlsldavhq): Remove.
	(__arm_vrmlaldavhxq): Remove.
	(__arm_vrmlaldavhq_p): Remove.
	(__arm_vrmlaldavhxq_p): Remove.
	(__arm_vrmlsldavhq_p): Remove.
	(__arm_vrmlsldavhxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc     |   4 +
 gcc/config/arm/arm-mve-builtins-base.def    |   4 +
 gcc/config/arm/arm-mve-builtins-base.h      |   4 +
 gcc/config/arm/arm-mve-builtins-functions.h |   8 +-
 gcc/config/arm/arm_mve.h                    | 182 --------------------
 5 files changed, 18 insertions(+), 184 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index af1a2c9942a..142ba9357a1 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -326,6 +326,10 @@ FUNCTION_WITHOUT_N_NO_F (vrev16q, VREV16Q)
 FUNCTION_WITHOUT_N (vrev32q, VREV32Q)
 FUNCTION_WITHOUT_N (vrev64q, VREV64Q)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
+FUNCTION_PRED_P_S_U (vrmlaldavhq, VRMLALDAVHQ)
+FUNCTION_PRED_P_S (vrmlaldavhxq, VRMLALDAVHXQ)
+FUNCTION_PRED_P_S (vrmlsldavhq, VRMLSLDAVHQ)
+FUNCTION_PRED_P_S (vrmlsldavhxq, VRMLSLDAVHXQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
 FUNCTION_ONLY_F (vrndq, VRNDQ)
 FUNCTION_ONLY_F (vrndaq, VRNDAQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index f7f353b34a7..1dd3ad3489b 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -96,6 +96,10 @@ DEF_MVE_FUNCTION (vrev16q, unary, integer_8, mx_or_none)
 DEF_MVE_FUNCTION (vrev32q, unary, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vrev64q, unary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vrmlaldavhq, binary_acc_int64, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlaldavhxq, binary_acc_int64, signed_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlsldavhq, binary_acc_int64, signed_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlsldavhxq, binary_acc_int64, signed_32, p_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 08d07a7c6d5..9604991b168 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -108,6 +108,10 @@ extern const function_base *const vrev16q;
 extern const function_base *const vrev32q;
 extern const function_base *const vrev64q;
 extern const function_base *const vrhaddq;
+extern const function_base *const vrmlaldavhq;
+extern const function_base *const vrmlaldavhxq;
+extern const function_base *const vrmlsldavhq;
+extern const function_base *const vrmlsldavhxq;
 extern const function_base *const vrmulhq;
 extern const function_base *const vrndaq;
 extern const function_base *const vrndmq;
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index ea926e42b81..77a6269f0da 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -409,8 +409,12 @@ public:
   {
     insn_code code;
 
-    if ((m_unspec_for_sint == VADDLVQ_S)
-	|| m_unspec_for_sint == VADDLVAQ_S)
+    if (m_unspec_for_sint == VADDLVQ_S
+	|| m_unspec_for_sint == VADDLVAQ_S
+	|| m_unspec_for_sint == VRMLALDAVHQ_S
+	|| m_unspec_for_sint == VRMLALDAVHXQ_S
+	|| m_unspec_for_sint == VRMLSLDAVHQ_S
+	|| m_unspec_for_sint == VRMLSLDAVHXQ_S)
       {
 	switch (e.pred)
 	  {
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 50e9ecbfc85..e662c0c4677 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -57,10 +57,6 @@
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vqdmulltq(__a, __b) __arm_vqdmulltq(__a, __b)
 #define vqdmullbq(__a, __b) __arm_vqdmullbq(__a, __b)
-#define vrmlaldavhq(__a, __b) __arm_vrmlaldavhq(__a, __b)
-#define vrmlsldavhxq(__a, __b) __arm_vrmlsldavhxq(__a, __b)
-#define vrmlsldavhq(__a, __b) __arm_vrmlsldavhq(__a, __b)
-#define vrmlaldavhxq(__a, __b) __arm_vrmlaldavhxq(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
@@ -85,10 +81,6 @@
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
-#define vrmlaldavhq_p(__a, __b, __p) __arm_vrmlaldavhq_p(__a, __b, __p)
-#define vrmlaldavhxq_p(__a, __b, __p) __arm_vrmlaldavhxq_p(__a, __b, __p)
-#define vrmlsldavhq_p(__a, __b, __p) __arm_vrmlsldavhq_p(__a, __b, __p)
-#define vrmlsldavhxq_p(__a, __b, __p) __arm_vrmlsldavhxq_p(__a, __b, __p)
 #define vmlaldavaq(__a, __b, __c) __arm_vmlaldavaq(__a, __b, __c)
 #define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c)
 #define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c)
@@ -416,15 +408,10 @@
 #define vcaddq_rot270_f32(__a, __b) __arm_vcaddq_rot270_f32(__a, __b)
 #define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b)
 #define vbicq_n_s32(__a,  __imm) __arm_vbicq_n_s32(__a,  __imm)
-#define vrmlaldavhq_u32(__a, __b) __arm_vrmlaldavhq_u32(__a, __b)
 #define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p)
 #define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
 #define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
 #define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
-#define vrmlsldavhxq_s32(__a, __b) __arm_vrmlsldavhxq_s32(__a, __b)
-#define vrmlsldavhq_s32(__a, __b) __arm_vrmlsldavhq_s32(__a, __b)
-#define vrmlaldavhxq_s32(__a, __b) __arm_vrmlaldavhxq_s32(__a, __b)
-#define vrmlaldavhq_s32(__a, __b) __arm_vrmlaldavhq_s32(__a, __b)
 #define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
 #define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
 #define vbicq_m_n_s16(__a,  __imm, __p) __arm_vbicq_m_n_s16(__a,  __imm, __p)
@@ -528,11 +515,6 @@
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
 #define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
-#define vrmlaldavhq_p_s32(__a, __b, __p) __arm_vrmlaldavhq_p_s32(__a, __b, __p)
-#define vrmlaldavhxq_p_s32(__a, __b, __p) __arm_vrmlaldavhxq_p_s32(__a, __b, __p)
-#define vrmlsldavhq_p_s32(__a, __b, __p) __arm_vrmlsldavhq_p_s32(__a, __b, __p)
-#define vrmlsldavhxq_p_s32(__a, __b, __p) __arm_vrmlsldavhxq_p_s32(__a, __b, __p)
-#define vrmlaldavhq_p_u32(__a, __b, __p) __arm_vrmlaldavhq_p_u32(__a, __b, __p)
 #define vmvnq_m_n_s16(__inactive,  __imm, __p) __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
 #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
 #define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c)
@@ -1919,13 +1901,6 @@ __arm_vbicq_n_s32 (int32x4_t __a, const int __imm)
   return __builtin_mve_vbicq_n_sv4si (__a, __imm);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vrmlaldavhq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vctp8q_m (uint32_t __a, mve_pred16_t __p)
@@ -1954,34 +1929,6 @@ __arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
   return __builtin_mve_vctp16q_mv8bi (__a, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrmlsldavhxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrmlsldavhq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhxq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrmlaldavhxq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vrmlaldavhq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
@@ -2617,41 +2564,6 @@ __arm_vrmlsldavhaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
   return __builtin_mve_vrmlsldavhaxq_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhxq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlsldavhq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlsldavhxq_p_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhq_p_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s16 (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -9069,41 +8981,6 @@ __arm_vbicq (int32x4_t __a, const int __imm)
  return __arm_vbicq_n_s32 (__a, __imm);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vrmlaldavhq_u32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vrmlsldavhxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vrmlsldavhq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhxq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vrmlaldavhxq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vrmlaldavhq_s32 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_n (int16x8_t __a, const int __imm, mve_pred16_t __p)
@@ -9727,41 +9604,6 @@ __arm_vrmlsldavhaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
  return __arm_vrmlsldavhaxq_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhxq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrmlsldavhq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrmlsldavhxq_p_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhq_p_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -17188,34 +17030,10 @@ extern void *__ARM_undef;
 
 #define __arm_vrmlaldavhaxq(p0,p1,p2) __arm_vrmlaldavhaxq_s32(p0,p1,p2)
 
-#define __arm_vrmlaldavhq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vrmlaldavhq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vrmlaldavhxq(p0,p1) __arm_vrmlaldavhxq_s32(p0,p1)
-
-#define __arm_vrmlaldavhxq_p(p0,p1,p2) __arm_vrmlaldavhxq_p_s32(p0,p1,p2)
-
 #define __arm_vrmlsldavhaq(p0,p1,p2) __arm_vrmlsldavhaq_s32(p0,p1,p2)
 
 #define __arm_vrmlsldavhaxq(p0,p1,p2) __arm_vrmlsldavhaxq_s32(p0,p1,p2)
 
-#define __arm_vrmlsldavhq(p0,p1) __arm_vrmlsldavhq_s32(p0,p1)
-
-#define __arm_vrmlsldavhq_p(p0,p1,p2) __arm_vrmlsldavhq_p_s32(p0,p1,p2)
-
-#define __arm_vrmlsldavhxq(p0,p1) __arm_vrmlsldavhxq_s32(p0,p1)
-
-#define __arm_vrmlsldavhxq_p(p0,p1,p2) __arm_vrmlsldavhxq_p_s32(p0,p1,p2)
-
 #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 16/24] arm: [MVE intrinsics] add binary_acca_int64 shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (13 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 15/24] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 17/24] arm: [MVE intrinsics] factorize vmlaldavaq vmlaldavaxq vmlsldavaq vmlsldavaxq Christophe Lyon
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_acca_int64 shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_acca_int64): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_acca_int64): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 37 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 38 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index f1c3844953a..af770fd3e39 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -452,6 +452,43 @@ struct binary_acca_int32_def : public overloaded_base<0>
 };
 SHAPE (binary_acca_int32)
 
+/* [u]int64_t vfoo[_<t0>]([u]int64_t, <T0>_t, <T0>_t)
+
+   Example: vmlaldavaq.
+   int64_t [__arm_]vmlaldavaq[_s16](int64_t add, int16x8_t m1, int16x8_t m2)
+   int64_t [__arm_]vmlaldavaq_p[_s16](int64_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p)  */
+struct binary_acca_int64_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx64,sx64,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    unsigned int last_arg = i;
+    for (i = 1; i < last_arg; i++)
+      if (!r.require_matching_vector_type (i, type))
+	return error_mark_node;
+
+    if (!r.require_integer_immediate (0))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_acca_int64)
+
 /* <T0>_t vfoo[_n_t0](<T0>_t, const int)
 
    Shape for vector shift right operations that take a vector first
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 73e82d2fd7a..1c4254122bc 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -40,6 +40,7 @@ namespace arm_mve
     extern const function_shape *const binary_acc_int32;
     extern const function_shape *const binary_acc_int64;
     extern const function_shape *const binary_acca_int32;
+    extern const function_shape *const binary_acca_int64;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
     extern const function_shape *const binary_maxvminv;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 17/24] arm: [MVE intrinsics] factorize vmlaldavaq vmlaldavaxq vmlsldavaq vmlsldavaxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (14 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 16/24] arm: [MVE intrinsics] add binary_acca_int64 shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 18/24] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmlaldavaq, vmlaldavaxq, vmlsldavaq, vmlsldavaxq builtins so
that they use the same parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VMLxLDAVAxQ, MVE_VMLxLDAVAxQ_P):
	New.
	(mve_insn): Add vmlaldava, vmlaldavax, vmlsldava, vmlsldavax.
	(supf): Add VMLALDAVAXQ_P_S, VMLALDAVAXQ_S, VMLSLDAVAQ_P_S,
	VMLSLDAVAQ_S, VMLSLDAVAXQ_P_S, VMLSLDAVAXQ_S.
	* config/arm/mve.md (mve_vmlaldavaq_<supf><mode>)
	(mve_vmlsldavaq_s<mode>, mve_vmlsldavaxq_s<mode>)
	(mve_vmlaldavaxq_s<mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vmlaldavaq_p_<supf><mode>, mve_vmlaldavaxq_p_<supf><mode>)
	(mve_vmlsldavaq_p_s<mode>, mve_vmlsldavaxq_p_s<mode>): Merge into
	...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  28 +++++++++
 gcc/config/arm/mve.md       | 121 +++++-------------------------------
 2 files changed, 42 insertions(+), 107 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 729127d8586..7a88bc91182 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -741,6 +741,20 @@ (define_int_iterator MVE_VMLxLDAVxQ_P [
 		     VMLSLDAVXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VMLxLDAVAxQ [
+		     VMLALDAVAQ_S VMLALDAVAQ_U
+		     VMLALDAVAXQ_S
+		     VMLSLDAVAQ_S
+		     VMLSLDAVAXQ_S
+		     ])
+
+(define_int_iterator MVE_VMLxLDAVAxQ_P [
+		     VMLALDAVAQ_P_S VMLALDAVAQ_P_U
+		     VMLALDAVAXQ_P_S
+		     VMLSLDAVAQ_P_S
+		     VMLSLDAVAXQ_P_S
+		     ])
+
 (define_int_iterator MVE_VRMLxLDAVxQ [
 		     VRMLALDAVHQ_S VRMLALDAVHQ_U
 		     VRMLALDAVHXQ_S
@@ -883,6 +897,10 @@ (define_int_attr mve_insn [
 		 (VMLADAVQ_S "vmladav") (VMLADAVQ_U "vmladav")
 		 (VMLADAVXQ_P_S "vmladavx")
 		 (VMLADAVXQ_S "vmladavx")
+		 (VMLALDAVAQ_P_S "vmlaldava") (VMLALDAVAQ_P_U "vmlaldava")
+		 (VMLALDAVAQ_S "vmlaldava") (VMLALDAVAQ_U "vmlaldava")
+		 (VMLALDAVAXQ_P_S "vmlaldavax")
+		 (VMLALDAVAXQ_S "vmlaldavax")
 		 (VMLALDAVQ_P_S "vmlaldav") (VMLALDAVQ_P_U "vmlaldav")
 		 (VMLALDAVQ_S "vmlaldav") (VMLALDAVQ_U "vmlaldav")
 		 (VMLALDAVXQ_P_S "vmlaldavx")
@@ -897,6 +915,10 @@ (define_int_attr mve_insn [
 		 (VMLSDAVQ_S "vmlsdav")
 		 (VMLSDAVXQ_P_S "vmlsdavx")
 		 (VMLSDAVXQ_S "vmlsdavx")
+		 (VMLSLDAVAQ_P_S "vmlsldava")
+		 (VMLSLDAVAQ_S "vmlsldava")
+		 (VMLSLDAVAXQ_P_S "vmlsldavax")
+		 (VMLSLDAVAXQ_S "vmlsldavax")
 		 (VMLSLDAVQ_P_S "vmlsldav")
 		 (VMLSLDAVQ_S "vmlsldav")
 		 (VMLSLDAVXQ_P_S "vmlsldavx")
@@ -2351,6 +2373,12 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VRMLSLDAVHQ_S "s")
 		       (VRMLSLDAVHXQ_P_S "s")
 		       (VRMLSLDAVHXQ_S "s")
+		       (VMLALDAVAXQ_P_S "s")
+		       (VMLALDAVAXQ_S "s")
+		       (VMLSLDAVAQ_P_S "s")
+		       (VMLSLDAVAQ_S "s")
+		       (VMLSLDAVAXQ_P_S "s")
+		       (VMLSLDAVAXQ_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index e2259aa48e9..c6fd634b5c0 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2550,34 +2550,21 @@ (define_insn "@mve_<mve_insn>q_p_f<mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vmlaldavaq_s, vmlaldavaq_u])
+;; [vmlaldavaq_s, vmlaldavaq_u]
+;; [vmlaldavaxq_s]
+;; [vmlsldavaq_s]
+;; [vmlsldavaxq_s]
 ;;
-(define_insn "mve_vmlaldavaq_<supf><mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")]
-	 VMLALDAVAQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlaldava.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlaldavaxq_s])
-;;
-(define_insn "mve_vmlaldavaxq_s<mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
 		       (match_operand:MVE_5 2 "s_register_operand" "w")
 		       (match_operand:MVE_5 3 "s_register_operand" "w")]
-	 VMLALDAVAXQ_S))
+	 MVE_VMLxLDAVAxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vmlaldavax.s%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2600,38 +2587,6 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vmlsldavaq_s])
-;;
-(define_insn "mve_vmlsldavaq_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")]
-	 VMLSLDAVAQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsldava.s%#<V_sz_elem> %Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlsldavaxq_s])
-;;
-(define_insn "mve_vmlsldavaxq_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")]
-	 VMLSLDAVAXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmlsldavax.s%#<V_sz_elem> %Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vmovlbq_m_u, vmovlbq_m_s])
 ;; [vmovltq_m_u, vmovltq_m_s])
@@ -3336,36 +3291,22 @@ (define_insn "mve_vhcaddq_rot90_m_s<mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vmlaldavaq_p_u, vmlaldavaq_p_s])
+;; [vmlaldavaq_p_u, vmlaldavaq_p_s]
+;; [vmlaldavaxq_p_s]
+;; [vmlsldavaq_p_s]
+;; [vmlsldavaxq_p_s]
 ;;
-(define_insn "mve_vmlaldavaq_p_<supf><mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLALDAVAQ_P))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlaldavat.<supf>%#<V_sz_elem>	%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlaldavaxq_p_s])
-;;
-(define_insn "mve_vmlaldavaxq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
 		       (match_operand:MVE_5 2 "s_register_operand" "w")
 		       (match_operand:MVE_5 3 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLALDAVAXQ_P))
+	 MVE_VMLxLDAVAxQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmlaldavaxt.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3432,40 +3373,6 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vmlsldavaq_p_s])
-;;
-(define_insn "mve_vmlsldavaq_p_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLSLDAVAQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsldavat.s%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmlsldavaxq_p_s])
-;;
-(define_insn "mve_vmlsldavaxq_p_s<mode>"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMLSLDAVAXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmlsldavaxt.s%#<V_sz_elem>\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vmullbq_poly_m_p])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 18/24] arm: [MVE intrinsics] rework vmlaldavaq vmlaldavaxq vmlsldavaq vmlsldavaxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (15 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 17/24] arm: [MVE intrinsics] factorize vmlaldavaq vmlaldavaxq vmlsldavaq vmlsldavaxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 19/24] arm: [MVE intrinsics] add ternary shape Christophe Lyon
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmlaldavaq, vmlaldavaxq, vmlsldavaq, vmlsldavaxq using the
new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmlaldavaq, vmlaldavaxq)
	(vmlsldavaq, vmlsldavaxq): New.
	* config/arm/arm-mve-builtins-base.def (vmlaldavaq, vmlaldavaxq)
	(vmlsldavaq, vmlsldavaxq): New.
	* config/arm/arm-mve-builtins-base.h (vmlaldavaq, vmlaldavaxq)
	(vmlsldavaq, vmlsldavaxq): New.
	* config/arm/arm_mve.h (vmlaldavaq): Remove.
	(vmlaldavaxq): Remove.
	(vmlsldavaq): Remove.
	(vmlsldavaxq): Remove.
	(vmlaldavaq_p): Remove.
	(vmlaldavaxq_p): Remove.
	(vmlsldavaq_p): Remove.
	(vmlsldavaxq_p): Remove.
	(vmlaldavaq_s16): Remove.
	(vmlaldavaxq_s16): Remove.
	(vmlsldavaq_s16): Remove.
	(vmlsldavaxq_s16): Remove.
	(vmlaldavaq_u16): Remove.
	(vmlaldavaq_s32): Remove.
	(vmlaldavaxq_s32): Remove.
	(vmlsldavaq_s32): Remove.
	(vmlsldavaxq_s32): Remove.
	(vmlaldavaq_u32): Remove.
	(vmlaldavaq_p_s32): Remove.
	(vmlaldavaq_p_s16): Remove.
	(vmlaldavaq_p_u32): Remove.
	(vmlaldavaq_p_u16): Remove.
	(vmlaldavaxq_p_s32): Remove.
	(vmlaldavaxq_p_s16): Remove.
	(vmlsldavaq_p_s32): Remove.
	(vmlsldavaq_p_s16): Remove.
	(vmlsldavaxq_p_s32): Remove.
	(vmlsldavaxq_p_s16): Remove.
	(__arm_vmlaldavaq_s16): Remove.
	(__arm_vmlaldavaxq_s16): Remove.
	(__arm_vmlsldavaq_s16): Remove.
	(__arm_vmlsldavaxq_s16): Remove.
	(__arm_vmlaldavaq_u16): Remove.
	(__arm_vmlaldavaq_s32): Remove.
	(__arm_vmlaldavaxq_s32): Remove.
	(__arm_vmlsldavaq_s32): Remove.
	(__arm_vmlsldavaxq_s32): Remove.
	(__arm_vmlaldavaq_u32): Remove.
	(__arm_vmlaldavaq_p_s32): Remove.
	(__arm_vmlaldavaq_p_s16): Remove.
	(__arm_vmlaldavaq_p_u32): Remove.
	(__arm_vmlaldavaq_p_u16): Remove.
	(__arm_vmlaldavaxq_p_s32): Remove.
	(__arm_vmlaldavaxq_p_s16): Remove.
	(__arm_vmlsldavaq_p_s32): Remove.
	(__arm_vmlsldavaq_p_s16): Remove.
	(__arm_vmlsldavaxq_p_s32): Remove.
	(__arm_vmlsldavaxq_p_s16): Remove.
	(__arm_vmlaldavaq): Remove.
	(__arm_vmlaldavaxq): Remove.
	(__arm_vmlsldavaq): Remove.
	(__arm_vmlsldavaxq): Remove.
	(__arm_vmlaldavaq_p): Remove.
	(__arm_vmlaldavaxq_p): Remove.
	(__arm_vmlsldavaq_p): Remove.
	(__arm_vmlsldavaxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   4 +
 gcc/config/arm/arm-mve-builtins-base.def |   4 +
 gcc/config/arm/arm-mve-builtins-base.h   |   4 +
 gcc/config/arm/arm_mve.h                 | 368 -----------------------
 4 files changed, 12 insertions(+), 368 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 142ba9357a1..2b0c800013c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -285,12 +285,16 @@ FUNCTION_PRED_P_S (vmladavaxq, VMLADAVAXQ)
 FUNCTION_PRED_P_S_U (vmladavaq, VMLADAVAQ)
 FUNCTION_PRED_P_S_U (vmladavq, VMLADAVQ)
 FUNCTION_PRED_P_S (vmladavxq, VMLADAVXQ)
+FUNCTION_PRED_P_S_U (vmlaldavaq, VMLALDAVAQ)
+FUNCTION_PRED_P_S (vmlaldavaxq, VMLALDAVAXQ)
 FUNCTION_PRED_P_S_U (vmlaldavq, VMLALDAVQ)
 FUNCTION_PRED_P_S (vmlaldavxq, VMLALDAVXQ)
 FUNCTION_PRED_P_S (vmlsdavaq, VMLSDAVAQ)
 FUNCTION_PRED_P_S (vmlsdavaxq, VMLSDAVAXQ)
 FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ)
 FUNCTION_PRED_P_S (vmlsdavxq, VMLSDAVXQ)
+FUNCTION_PRED_P_S (vmlsldavaq, VMLSLDAVAQ)
+FUNCTION_PRED_P_S (vmlsldavaxq, VMLSLDAVAXQ)
 FUNCTION_PRED_P_S (vmlsldavq, VMLSLDAVQ)
 FUNCTION_PRED_P_S (vmlsldavxq, VMLSLDAVXQ)
 FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 1dd3ad3489b..d61badb99d9 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -54,12 +54,16 @@ DEF_MVE_FUNCTION (vmladavaq, binary_acca_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmladavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmladavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmladavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlaldavaq, binary_acca_int64, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vmlaldavaxq, binary_acca_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlaldavq, binary_acc_int64, integer_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlaldavxq, binary_acc_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavaq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavxq, binary_acc_int32, all_signed, p_or_none)
+DEF_MVE_FUNCTION (vmlsldavaq, binary_acca_int64, signed_16_32, p_or_none)
+DEF_MVE_FUNCTION (vmlsldavaxq, binary_acca_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlsldavq, binary_acc_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlsldavxq, binary_acc_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 9604991b168..47bb8712fe5 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -67,12 +67,16 @@ extern const function_base *const vmladavaq;
 extern const function_base *const vmladavaxq;
 extern const function_base *const vmladavq;
 extern const function_base *const vmladavxq;
+extern const function_base *const vmlaldavaq;
+extern const function_base *const vmlaldavaxq;
 extern const function_base *const vmlaldavq;
 extern const function_base *const vmlaldavxq;
 extern const function_base *const vmlsdavaq;
 extern const function_base *const vmlsdavaxq;
 extern const function_base *const vmlsdavq;
 extern const function_base *const vmlsdavxq;
+extern const function_base *const vmlsldavaq;
+extern const function_base *const vmlsldavaxq;
 extern const function_base *const vmlsldavq;
 extern const function_base *const vmlsldavxq;
 extern const function_base *const vmovlbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index e662c0c4677..506ac3371e4 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -81,10 +81,6 @@
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
-#define vmlaldavaq(__a, __b, __c) __arm_vmlaldavaq(__a, __b, __c)
-#define vmlaldavaxq(__a, __b, __c) __arm_vmlaldavaxq(__a, __b, __c)
-#define vmlsldavaq(__a, __b, __c) __arm_vmlsldavaq(__a, __b, __c)
-#define vmlsldavaxq(__a, __b, __c) __arm_vmlsldavaxq(__a, __b, __c)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
@@ -111,10 +107,6 @@
 #define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
 #define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
-#define vmlaldavaq_p(__a, __b, __c, __p) __arm_vmlaldavaq_p(__a, __b, __c, __p)
-#define vmlaldavaxq_p(__a, __b, __c, __p) __arm_vmlaldavaxq_p(__a, __b, __c, __p)
-#define vmlsldavaq_p(__a, __b, __c, __p) __arm_vmlsldavaq_p(__a, __b, __c, __p)
-#define vmlsldavaxq_p(__a, __b, __c, __p) __arm_vmlsldavaxq_p(__a, __b, __c, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
 #define vqdmullbq_m(__inactive, __a, __b, __p) __arm_vqdmullbq_m(__inactive, __a, __b, __p)
@@ -524,10 +516,6 @@
 #define vfmaq_n_f16(__a, __b, __c) __arm_vfmaq_n_f16(__a, __b, __c)
 #define vfmasq_n_f16(__a, __b, __c) __arm_vfmasq_n_f16(__a, __b, __c)
 #define vfmsq_f16(__a, __b, __c) __arm_vfmsq_f16(__a, __b, __c)
-#define vmlaldavaq_s16(__a, __b, __c) __arm_vmlaldavaq_s16(__a, __b, __c)
-#define vmlaldavaxq_s16(__a, __b, __c) __arm_vmlaldavaxq_s16(__a, __b, __c)
-#define vmlsldavaq_s16(__a, __b, __c) __arm_vmlsldavaq_s16(__a, __b, __c)
-#define vmlsldavaxq_s16(__a, __b, __c) __arm_vmlsldavaxq_s16(__a, __b, __c)
 #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
@@ -538,7 +526,6 @@
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
-#define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b, __c)
 #define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
@@ -548,10 +535,6 @@
 #define vfmaq_n_f32(__a, __b, __c) __arm_vfmaq_n_f32(__a, __b, __c)
 #define vfmasq_n_f32(__a, __b, __c) __arm_vfmasq_n_f32(__a, __b, __c)
 #define vfmsq_f32(__a, __b, __c) __arm_vfmsq_f32(__a, __b, __c)
-#define vmlaldavaq_s32(__a, __b, __c) __arm_vmlaldavaq_s32(__a, __b, __c)
-#define vmlaldavaxq_s32(__a, __b, __c) __arm_vmlaldavaxq_s32(__a, __b, __c)
-#define vmlsldavaq_s32(__a, __b, __c) __arm_vmlsldavaq_s32(__a, __b, __c)
-#define vmlsldavaxq_s32(__a, __b, __c) __arm_vmlsldavaxq_s32(__a, __b, __c)
 #define vcvtmq_m_s32_f32(__inactive, __a, __p) __arm_vcvtmq_m_s32_f32(__inactive, __a, __p)
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
@@ -562,7 +545,6 @@
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
-#define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b, __c)
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
@@ -678,16 +660,6 @@
 #define vsliq_m_n_u8(__a, __b,  __imm, __p) __arm_vsliq_m_n_u8(__a, __b,  __imm, __p)
 #define vsliq_m_n_u32(__a, __b,  __imm, __p) __arm_vsliq_m_n_u32(__a, __b,  __imm, __p)
 #define vsliq_m_n_u16(__a, __b,  __imm, __p) __arm_vsliq_m_n_u16(__a, __b,  __imm, __p)
-#define vmlaldavaq_p_s32(__a, __b, __c, __p) __arm_vmlaldavaq_p_s32(__a, __b, __c, __p)
-#define vmlaldavaq_p_s16(__a, __b, __c, __p) __arm_vmlaldavaq_p_s16(__a, __b, __c, __p)
-#define vmlaldavaq_p_u32(__a, __b, __c, __p) __arm_vmlaldavaq_p_u32(__a, __b, __c, __p)
-#define vmlaldavaq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaq_p_u16(__a, __b, __c, __p)
-#define vmlaldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s32(__a, __b, __c, __p)
-#define vmlaldavaxq_p_s16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s16(__a, __b, __c, __p)
-#define vmlsldavaq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaq_p_s32(__a, __b, __c, __p)
-#define vmlsldavaq_p_s16(__a, __b, __c, __p) __arm_vmlsldavaq_p_s16(__a, __b, __c, __p)
-#define vmlsldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaxq_p_s32(__a, __b, __c, __p)
-#define vmlsldavaxq_p_s16(__a, __b, __c, __p) __arm_vmlsldavaxq_p_s16(__a, __b, __c, __p)
 #define vmullbq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p8(__inactive, __a, __b, __p)
 #define vmullbq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p16(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p8(__inactive, __a, __b, __p)
@@ -2571,34 +2543,6 @@ __arm_vmvnq_m_n_s16 (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_sv8hi (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlaldavaq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlaldavaxq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlsldavaq_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_s16 (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
-  return __builtin_mve_vmlsldavaxq_sv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2606,13 +2550,6 @@ __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_uv8hi (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
-{
-  return __builtin_mve_vmlaldavaq_uv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2620,34 +2557,6 @@ __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_sv4si (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlaldavaq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlaldavaxq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlsldavaq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vmlsldavaxq_sv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2655,13 +2564,6 @@ __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_n_uv4si (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_mve_vmlaldavaq_uv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -3439,76 +3341,6 @@ __arm_vsliq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16
   return __builtin_mve_vsliq_m_n_uv8hi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaq_p_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaq_p_uv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavaq_p_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavaxq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsldavaxq_p_sv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_poly_m_p8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -9611,34 +9443,6 @@ __arm_vmvnq_m (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_s16 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlaldavaq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlaldavaxq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlsldavaq_s16 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq (int64_t __a, int16x8_t __b, int16x8_t __c)
-{
- return __arm_vmlsldavaxq_s16 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -9646,13 +9450,6 @@ __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_u16 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq (uint64_t __a, uint16x8_t __b, uint16x8_t __c)
-{
- return __arm_vmlaldavaq_u16 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -9660,34 +9457,6 @@ __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_s32 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlaldavaq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlaldavaxq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlsldavaq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vmlsldavaxq_s32 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -9695,13 +9464,6 @@ __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
  return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
-{
- return __arm_vmlaldavaq_u32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -10479,76 +10241,6 @@ __arm_vsliq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p
  return __arm_vsliq_m_n_u16 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaq_p_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaq_p (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaq_p_u16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsldavaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsldavaq_p_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsldavaxq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsldavaxq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlsldavaxq_p_s16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_poly_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -16633,36 +16325,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vmlaldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavaq_p_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vmlaldavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vmlsldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vmlsldavaxq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_p_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_p_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3) __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
 
 #define __arm_vrmlsldavhaq_p(p0,p1,p2,p3) __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
@@ -16966,36 +16628,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
 
-#define __arm_vmlaldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavaq_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavaq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vmlaldavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vmlsldavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vmlsldavaxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavaxq_s16(__p0, __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavaxq_s32(__p0, __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 19/24] arm: [MVE intrinsics] add ternary shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (16 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 18/24] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 20/24] arm: [MVE intrinsics] factorize vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq Christophe Lyon
                   ` (5 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the ternary shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (ternary): New.
	* config/arm/arm-mve-builtins-shapes.h (ternary): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 26 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 27 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index af770fd3e39..4455a253579 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1163,6 +1163,32 @@ struct inherent_def : public nonoverloaded_base
 };
 SHAPE (inherent)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, <T0>_t)
+
+   i.e. the standard shape for ternary operations that operate on
+   uniform types.
+
+   Example: vqrdmlsdhxq.
+   int8x16_t [__arm_]vqrdmlsdhxq[_s8](int8x16_t inactive, int8x16_t a, int8x16_t b)
+   int8x16_t [__arm_]vqrdmlsdhxq_m[_s8](int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)  */
+struct ternary_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform_opt_n (3);
+  }
+};
+SHAPE (ternary)
+
 /* <T0>_t vfoo[_t0](<T0>_t)
 
    i.e. the standard shape for unary operations that operate on
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 1c4254122bc..b3ddd0a9e8d 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -56,6 +56,7 @@ namespace arm_mve
     extern const function_shape *const cmp;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
+    extern const function_shape *const ternary;
     extern const function_shape *const unary;
     extern const function_shape *const unary_acc;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 20/24] arm: [MVE intrinsics] factorize vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (17 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 19/24] arm: [MVE intrinsics] add ternary shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 21/24] arm: [MVE intrinsics] rework vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq Christophe Lyon
                   ` (4 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqdmladhq, vqdmladhxq, vqdmlsdhq, vqdmlsdhxq, vqrdmladhq,
vqrdmladhxq, vqrdmlsdhq, vqrdmlsdhxq builtins so that they use the
same parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VQxDMLxDHxQ_S): New.
	(mve_insn): Add vqdmladh, vqdmladhx, vqdmlsdh, vqdmlsdhx,
	vqrdmladh, vqrdmladhx, vqrdmlsdh, vqrdmlsdhx.
	(supf): Add VQDMLADHQ_S, VQDMLADHXQ_S, VQDMLSDHQ_S, VQDMLSDHXQ_S,
	VQRDMLADHQ_S,VQRDMLADHXQ_S, VQRDMLSDHQ_S, VQRDMLSDHXQ_S.
	* config/arm/mve.md (mve_vqrdmladhq_s<mode>)
	(mve_vqrdmladhxq_s<mode>, mve_vqrdmlsdhq_s<mode>)
	(mve_vqrdmlsdhxq_s<mode>, mve_vqdmlsdhxq_s<mode>)
	(mve_vqdmlsdhq_s<mode>, mve_vqdmladhxq_s<mode>)
	(mve_vqdmladhq_s<mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  27 ++++++++
 gcc/config/arm/mve.md       | 127 ++++--------------------------------
 2 files changed, 38 insertions(+), 116 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7a88bc91182..c23ca7361c1 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -755,6 +755,17 @@ (define_int_iterator MVE_VMLxLDAVAxQ_P [
 		     VMLSLDAVAXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VQxDMLxDHxQ_S [
+		     VQDMLADHQ_S
+		     VQDMLADHXQ_S
+		     VQDMLSDHQ_S
+		     VQDMLSDHXQ_S
+		     VQRDMLADHQ_S
+		     VQRDMLADHXQ_S
+		     VQRDMLSDHQ_S
+		     VQRDMLSDHXQ_S
+		     ])
+
 (define_int_iterator MVE_VRMLxLDAVxQ [
 		     VRMLALDAVHQ_S VRMLALDAVHQ_U
 		     VRMLALDAVHXQ_S
@@ -948,11 +959,15 @@ (define_int_attr mve_insn [
 		 (VQADDQ_N_S "vqadd") (VQADDQ_N_U "vqadd")
 		 (VQADDQ_S "vqadd") (VQADDQ_U "vqadd")
 		 (VQDMLADHQ_M_S "vqdmladh")
+		 (VQDMLADHQ_S "vqdmladh")
 		 (VQDMLADHXQ_M_S "vqdmladhx")
+		 (VQDMLADHXQ_S "vqdmladhx")
 		 (VQDMLAHQ_M_N_S "vqdmlah")
 		 (VQDMLASHQ_M_N_S "vqdmlash")
 		 (VQDMLSDHQ_M_S "vqdmlsdh")
+		 (VQDMLSDHQ_S "vqdmlsdh")
 		 (VQDMLSDHXQ_M_S "vqdmlsdhx")
+		 (VQDMLSDHXQ_S "vqdmlsdhx")
 		 (VQDMULHQ_M_N_S "vqdmulh")
 		 (VQDMULHQ_M_S "vqdmulh")
 		 (VQDMULHQ_N_S "vqdmulh")
@@ -968,11 +983,15 @@ (define_int_attr mve_insn [
 		 (VQNEGQ_M_S "vqneg")
 		 (VQNEGQ_S "vqneg")
 		 (VQRDMLADHQ_M_S "vqrdmladh")
+		 (VQRDMLADHQ_S "vqrdmladh")
 		 (VQRDMLADHXQ_M_S "vqrdmladhx")
+		 (VQRDMLADHXQ_S "vqrdmladhx")
 		 (VQRDMLAHQ_M_N_S "vqrdmlah")
 		 (VQRDMLASHQ_M_N_S "vqrdmlash")
 		 (VQRDMLSDHQ_M_S "vqrdmlsdh")
+		 (VQRDMLSDHQ_S "vqrdmlsdh")
 		 (VQRDMLSDHXQ_M_S "vqrdmlsdhx")
+		 (VQRDMLSDHXQ_S "vqrdmlsdhx")
 		 (VQRDMULHQ_M_N_S "vqrdmulh")
 		 (VQRDMULHQ_M_S "vqrdmulh")
 		 (VQRDMULHQ_N_S "vqrdmulh")
@@ -2379,6 +2398,14 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMLSLDAVAQ_S "s")
 		       (VMLSLDAVAXQ_P_S "s")
 		       (VMLSLDAVAXQ_S "s")
+		       (VQDMLADHQ_S "s")
+		       (VQDMLADHXQ_S "s")
+		       (VQDMLSDHQ_S "s")
+		       (VQDMLSDHXQ_S "s")
+		       (VQRDMLADHQ_S "s")
+		       (VQRDMLADHXQ_S "s")
+		       (VQRDMLSDHQ_S "s")
+		       (VQRDMLSDHXQ_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index c6fd634b5c0..bf4d18455fe 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2051,34 +2051,25 @@ (define_insn "mve_vqdmlashq_n_<supf><mode>"
 ])
 
 ;;
-;; [vqrdmladhq_s])
+;; [vqdmladhq_s]
+;; [vqdmladhxq_s]
+;; [vqdmlsdhq_s]
+;; [vqdmlsdhxq_s]
+;; [vqrdmladhq_s]
+;; [vqrdmladhxq_s]
+;; [vqrdmlsdhq_s]
+;; [vqrdmlsdhxq_s]
 ;;
-(define_insn "mve_vqrdmladhq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQRDMLADHQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrdmladh.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqrdmladhxq_s])
-;;
-(define_insn "mve_vqrdmladhxq_s<mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQRDMLADHXQ_S))
+	 MVE_VQxDMLxDHxQ_S))
   ]
   "TARGET_HAVE_MVE"
-  "vqrdmladhx.s%#<V_sz_elem>\t%q0, %q2, %q3"
+  "<mve_insn>.s%#<V_sz_elem>\t%q0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2114,38 +2105,6 @@ (define_insn "mve_vqrdmlashq_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqrdmlsdhq_s])
-;;
-(define_insn "mve_vqrdmlsdhq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQRDMLSDHQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrdmlsdh.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqrdmlsdhxq_s])
-;;
-(define_insn "mve_vqrdmlsdhxq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQRDMLSDHXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrdmlsdhx.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vqrshlq_m_n_s, vqrshlq_m_n_u]
 ;; [vrshlq_m_n_s, vrshlq_m_n_u]
@@ -2228,70 +2187,6 @@ (define_insn "mve_vsriq_n_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqdmlsdhxq_s])
-;;
-(define_insn "mve_vqdmlsdhxq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQDMLSDHXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmlsdhx.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmlsdhq_s])
-;;
-(define_insn "mve_vqdmlsdhq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQDMLSDHQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmlsdh.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmladhxq_s])
-;;
-(define_insn "mve_vqdmladhxq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQDMLADHXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmladhx.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmladhq_s])
-;;
-(define_insn "mve_vqdmladhq_s<mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")]
-	 VQDMLADHQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmladh.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vabsq_m_f]
 ;; [vnegq_m_f]
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 21/24] arm: [MVE intrinsics] rework vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (18 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 20/24] arm: [MVE intrinsics] factorize vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 22/24] arm: [MVE intrinsics] add ternary_n shape Christophe Lyon
                   ` (3 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqrdmladhq, vqrdmladhxq, vqrdmlsdhq, vqrdmlsdhxq vqdmladhq,
vqdmladhxq, vqdmlsdhq, vqdmlsdhxq using the new MVE builtins
framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vqdmladhq, vqdmladhxq)
	(vqdmlsdhq, vqdmlsdhxq, vqrdmladhq, vqrdmladhxq, vqrdmlsdhq)
	(vqrdmlsdhxq): New.
	* config/arm/arm-mve-builtins-base.def (vqdmladhq, vqdmladhxq)
	(vqdmlsdhq, vqdmlsdhxq, vqrdmladhq, vqrdmladhxq, vqrdmlsdhq)
	(vqrdmlsdhxq): New.
	* config/arm/arm-mve-builtins-base.h (vqdmladhq, vqdmladhxq)
	(vqdmlsdhq, vqdmlsdhxq, vqrdmladhq, vqrdmladhxq, vqrdmlsdhq)
	(vqrdmlsdhxq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vqrdmladhq,
	vqrdmladhxq, vqrdmlsdhq, vqrdmlsdhxq vqdmladhq, vqdmladhxq,
	vqdmlsdhq, vqdmlsdhxq.
	* config/arm/arm_mve.h (vqrdmlsdhxq): Remove.
	(vqrdmlsdhq): Remove.
	(vqrdmladhxq): Remove.
	(vqrdmladhq): Remove.
	(vqdmlsdhxq): Remove.
	(vqdmlsdhq): Remove.
	(vqdmladhxq): Remove.
	(vqdmladhq): Remove.
	(vqdmladhq_m): Remove.
	(vqdmladhxq_m): Remove.
	(vqdmlsdhq_m): Remove.
	(vqdmlsdhxq_m): Remove.
	(vqrdmladhq_m): Remove.
	(vqrdmladhxq_m): Remove.
	(vqrdmlsdhq_m): Remove.
	(vqrdmlsdhxq_m): Remove.
	(vqrdmlsdhxq_s8): Remove.
	(vqrdmlsdhq_s8): Remove.
	(vqrdmladhxq_s8): Remove.
	(vqrdmladhq_s8): Remove.
	(vqdmlsdhxq_s8): Remove.
	(vqdmlsdhq_s8): Remove.
	(vqdmladhxq_s8): Remove.
	(vqdmladhq_s8): Remove.
	(vqrdmlsdhxq_s16): Remove.
	(vqrdmlsdhq_s16): Remove.
	(vqrdmladhxq_s16): Remove.
	(vqrdmladhq_s16): Remove.
	(vqdmlsdhxq_s16): Remove.
	(vqdmlsdhq_s16): Remove.
	(vqdmladhxq_s16): Remove.
	(vqdmladhq_s16): Remove.
	(vqrdmlsdhxq_s32): Remove.
	(vqrdmlsdhq_s32): Remove.
	(vqrdmladhxq_s32): Remove.
	(vqrdmladhq_s32): Remove.
	(vqdmlsdhxq_s32): Remove.
	(vqdmlsdhq_s32): Remove.
	(vqdmladhxq_s32): Remove.
	(vqdmladhq_s32): Remove.
	(vqdmladhq_m_s8): Remove.
	(vqdmladhq_m_s32): Remove.
	(vqdmladhq_m_s16): Remove.
	(vqdmladhxq_m_s8): Remove.
	(vqdmladhxq_m_s32): Remove.
	(vqdmladhxq_m_s16): Remove.
	(vqdmlsdhq_m_s8): Remove.
	(vqdmlsdhq_m_s32): Remove.
	(vqdmlsdhq_m_s16): Remove.
	(vqdmlsdhxq_m_s8): Remove.
	(vqdmlsdhxq_m_s32): Remove.
	(vqdmlsdhxq_m_s16): Remove.
	(vqrdmladhq_m_s8): Remove.
	(vqrdmladhq_m_s32): Remove.
	(vqrdmladhq_m_s16): Remove.
	(vqrdmladhxq_m_s8): Remove.
	(vqrdmladhxq_m_s32): Remove.
	(vqrdmladhxq_m_s16): Remove.
	(vqrdmlsdhq_m_s8): Remove.
	(vqrdmlsdhq_m_s32): Remove.
	(vqrdmlsdhq_m_s16): Remove.
	(vqrdmlsdhxq_m_s8): Remove.
	(vqrdmlsdhxq_m_s32): Remove.
	(vqrdmlsdhxq_m_s16): Remove.
	(__arm_vqrdmlsdhxq_s8): Remove.
	(__arm_vqrdmlsdhq_s8): Remove.
	(__arm_vqrdmladhxq_s8): Remove.
	(__arm_vqrdmladhq_s8): Remove.
	(__arm_vqdmlsdhxq_s8): Remove.
	(__arm_vqdmlsdhq_s8): Remove.
	(__arm_vqdmladhxq_s8): Remove.
	(__arm_vqdmladhq_s8): Remove.
	(__arm_vqrdmlsdhxq_s16): Remove.
	(__arm_vqrdmlsdhq_s16): Remove.
	(__arm_vqrdmladhxq_s16): Remove.
	(__arm_vqrdmladhq_s16): Remove.
	(__arm_vqdmlsdhxq_s16): Remove.
	(__arm_vqdmlsdhq_s16): Remove.
	(__arm_vqdmladhxq_s16): Remove.
	(__arm_vqdmladhq_s16): Remove.
	(__arm_vqrdmlsdhxq_s32): Remove.
	(__arm_vqrdmlsdhq_s32): Remove.
	(__arm_vqrdmladhxq_s32): Remove.
	(__arm_vqrdmladhq_s32): Remove.
	(__arm_vqdmlsdhxq_s32): Remove.
	(__arm_vqdmlsdhq_s32): Remove.
	(__arm_vqdmladhxq_s32): Remove.
	(__arm_vqdmladhq_s32): Remove.
	(__arm_vqdmladhq_m_s8): Remove.
	(__arm_vqdmladhq_m_s32): Remove.
	(__arm_vqdmladhq_m_s16): Remove.
	(__arm_vqdmladhxq_m_s8): Remove.
	(__arm_vqdmladhxq_m_s32): Remove.
	(__arm_vqdmladhxq_m_s16): Remove.
	(__arm_vqdmlsdhq_m_s8): Remove.
	(__arm_vqdmlsdhq_m_s32): Remove.
	(__arm_vqdmlsdhq_m_s16): Remove.
	(__arm_vqdmlsdhxq_m_s8): Remove.
	(__arm_vqdmlsdhxq_m_s32): Remove.
	(__arm_vqdmlsdhxq_m_s16): Remove.
	(__arm_vqrdmladhq_m_s8): Remove.
	(__arm_vqrdmladhq_m_s32): Remove.
	(__arm_vqrdmladhq_m_s16): Remove.
	(__arm_vqrdmladhxq_m_s8): Remove.
	(__arm_vqrdmladhxq_m_s32): Remove.
	(__arm_vqrdmladhxq_m_s16): Remove.
	(__arm_vqrdmlsdhq_m_s8): Remove.
	(__arm_vqrdmlsdhq_m_s32): Remove.
	(__arm_vqrdmlsdhq_m_s16): Remove.
	(__arm_vqrdmlsdhxq_m_s8): Remove.
	(__arm_vqrdmlsdhxq_m_s32): Remove.
	(__arm_vqrdmlsdhxq_m_s16): Remove.
	(__arm_vqrdmlsdhxq): Remove.
	(__arm_vqrdmlsdhq): Remove.
	(__arm_vqrdmladhxq): Remove.
	(__arm_vqrdmladhq): Remove.
	(__arm_vqdmlsdhxq): Remove.
	(__arm_vqdmlsdhq): Remove.
	(__arm_vqdmladhxq): Remove.
	(__arm_vqdmladhq): Remove.
	(__arm_vqdmladhq_m): Remove.
	(__arm_vqdmladhxq_m): Remove.
	(__arm_vqdmlsdhq_m): Remove.
	(__arm_vqdmlsdhxq_m): Remove.
	(__arm_vqrdmladhq_m): Remove.
	(__arm_vqrdmladhxq_m): Remove.
	(__arm_vqrdmlsdhq_m): Remove.
	(__arm_vqrdmlsdhxq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   8 +
 gcc/config/arm/arm-mve-builtins-base.def |   8 +
 gcc/config/arm/arm-mve-builtins-base.h   |   8 +
 gcc/config/arm/arm-mve-builtins.cc       |   8 +
 gcc/config/arm/arm_mve.h                 | 928 -----------------------
 5 files changed, 32 insertions(+), 928 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 2b0c800013c..95fa2d27049 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -311,7 +311,15 @@ FUNCTION_WITHOUT_N_NO_F (vqmovnbq, VQMOVNBQ)
 FUNCTION_WITHOUT_N_NO_F (vqmovntq, VQMOVNTQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqmovunbq, VQMOVUNBQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqmovuntq, VQMOVUNTQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqdmladhq, VQDMLADHQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqdmladhxq, VQDMLADHXQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhq, VQDMLSDHQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhxq, VQDMLSDHXQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhq, VQRDMLADHQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhxq, VQRDMLADHXQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqrdmlsdhq, VQRDMLSDHQ)
+FUNCTION_WITHOUT_N_NO_U_F (vqrdmlsdhxq, VQRDMLSDHXQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqnegq, VQNEGQ)
 FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
 FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index d61badb99d9..5c06ebc15cc 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -76,12 +76,20 @@ DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqdmladhq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmladhxq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmlsdhq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmlsdhxq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovunbq, binary_move_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovuntq, binary_move_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqnegq, unary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmladhq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmladhxq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmlsdhq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmlsdhxq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrshlq, binary_round_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 47bb8712fe5..194cb170958 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -89,12 +89,20 @@ extern const function_base *const vnegq;
 extern const function_base *const vorrq;
 extern const function_base *const vqabsq;
 extern const function_base *const vqaddq;
+extern const function_base *const vqdmladhq;
+extern const function_base *const vqdmladhxq;
+extern const function_base *const vqdmlsdhq;
+extern const function_base *const vqdmlsdhxq;
 extern const function_base *const vqdmulhq;
 extern const function_base *const vqmovnbq;
 extern const function_base *const vqmovntq;
 extern const function_base *const vqmovunbq;
 extern const function_base *const vqmovuntq;
 extern const function_base *const vqnegq;
+extern const function_base *const vqrdmladhq;
+extern const function_base *const vqrdmladhxq;
+extern const function_base *const vqrdmlsdhq;
+extern const function_base *const vqrdmlsdhxq;
 extern const function_base *const vqrdmulhq;
 extern const function_base *const vqrshlq;
 extern const function_base *const vqrshrnbq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 59cfaf6e5b1..2095d96b593 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -689,6 +689,14 @@ function_instance::has_inactive_argument () const
       || base == functions::vqmovunbq
       || base == functions::vqmovuntq
       || (base == functions::vorrq && mode_suffix_id == MODE_n)
+      || base == functions::vqdmladhq
+      || base == functions::vqdmladhxq
+      || base == functions::vqdmlsdhq
+      || base == functions::vqdmlsdhxq
+      || base == functions::vqrdmladhq
+      || base == functions::vqrdmladhxq
+      || base == functions::vqrdmlsdhq
+      || base == functions::vqrdmlsdhxq
       || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
       || base == functions::vqrshrnbq
       || base == functions::vqrshrntq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 506ac3371e4..bb503037cc3 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -70,14 +70,6 @@
 #define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
-#define vqrdmlsdhxq(__inactive, __a, __b) __arm_vqrdmlsdhxq(__inactive, __a, __b)
-#define vqrdmlsdhq(__inactive, __a, __b) __arm_vqrdmlsdhq(__inactive, __a, __b)
-#define vqrdmladhxq(__inactive, __a, __b) __arm_vqrdmladhxq(__inactive, __a, __b)
-#define vqrdmladhq(__inactive, __a, __b) __arm_vqrdmladhq(__inactive, __a, __b)
-#define vqdmlsdhxq(__inactive, __a, __b) __arm_vqdmlsdhxq(__inactive, __a, __b)
-#define vqdmlsdhq(__inactive, __a, __b) __arm_vqdmlsdhq(__inactive, __a, __b)
-#define vqdmladhxq(__inactive, __a, __b) __arm_vqdmladhxq(__inactive, __a, __b)
-#define vqdmladhq(__inactive, __a, __b) __arm_vqdmladhq(__inactive, __a, __b)
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
@@ -94,18 +86,10 @@
 #define vmullbq_int_m(__inactive, __a, __b, __p) __arm_vmullbq_int_m(__inactive, __a, __b, __p)
 #define vmulltq_int_m(__inactive, __a, __b, __p) __arm_vmulltq_int_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
-#define vqdmladhq_m(__inactive, __a, __b, __p) __arm_vqdmladhq_m(__inactive, __a, __b, __p)
 #define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, __c, __p)
-#define vqdmladhxq_m(__inactive, __a, __b, __p) __arm_vqdmladhxq_m(__inactive, __a, __b, __p)
 #define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, __p)
-#define vqdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m(__inactive, __a, __b, __p)
-#define vqdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqdmlsdhxq_m(__inactive, __a, __b, __p)
-#define vqrdmladhq_m(__inactive, __a, __b, __p) __arm_vqrdmladhq_m(__inactive, __a, __b, __p)
-#define vqrdmladhxq_m(__inactive, __a, __b, __p) __arm_vqrdmladhxq_m(__inactive, __a, __b, __p)
 #define vqrdmlahq_m(__a, __b, __c, __p) __arm_vqrdmlahq_m(__a, __b, __c, __p)
 #define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b, __c, __p)
-#define vqrdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m(__inactive, __a, __b, __p)
-#define vqrdmlsdhxq_m(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m(__inactive, __a, __b, __p)
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
@@ -434,18 +418,10 @@
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
-#define vqrdmlsdhxq_s8(__inactive, __a, __b) __arm_vqrdmlsdhxq_s8(__inactive, __a, __b)
-#define vqrdmlsdhq_s8(__inactive, __a, __b) __arm_vqrdmlsdhq_s8(__inactive, __a, __b)
 #define vqrdmlashq_n_s8(__a, __b, __c) __arm_vqrdmlashq_n_s8(__a, __b, __c)
 #define vqrdmlahq_n_s8(__a, __b, __c) __arm_vqrdmlahq_n_s8(__a, __b, __c)
-#define vqrdmladhxq_s8(__inactive, __a, __b) __arm_vqrdmladhxq_s8(__inactive, __a, __b)
-#define vqrdmladhq_s8(__inactive, __a, __b) __arm_vqrdmladhq_s8(__inactive, __a, __b)
-#define vqdmlsdhxq_s8(__inactive, __a, __b) __arm_vqdmlsdhxq_s8(__inactive, __a, __b)
-#define vqdmlsdhq_s8(__inactive, __a, __b) __arm_vqdmlsdhq_s8(__inactive, __a, __b)
 #define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
 #define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
-#define vqdmladhxq_s8(__inactive, __a, __b) __arm_vqdmladhxq_s8(__inactive, __a, __b)
-#define vqdmladhq_s8(__inactive, __a, __b) __arm_vqdmladhq_s8(__inactive, __a, __b)
 #define vmlasq_n_s8(__a, __b, __c) __arm_vmlasq_n_s8(__a, __b, __c)
 #define vmlaq_n_s8(__a, __b, __c) __arm_vmlaq_n_s8(__a, __b, __c)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
@@ -458,18 +434,10 @@
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
-#define vqrdmlsdhxq_s16(__inactive, __a, __b) __arm_vqrdmlsdhxq_s16(__inactive, __a, __b)
-#define vqrdmlsdhq_s16(__inactive, __a, __b) __arm_vqrdmlsdhq_s16(__inactive, __a, __b)
 #define vqrdmlashq_n_s16(__a, __b, __c) __arm_vqrdmlashq_n_s16(__a, __b, __c)
 #define vqrdmlahq_n_s16(__a, __b, __c) __arm_vqrdmlahq_n_s16(__a, __b, __c)
-#define vqrdmladhxq_s16(__inactive, __a, __b) __arm_vqrdmladhxq_s16(__inactive, __a, __b)
-#define vqrdmladhq_s16(__inactive, __a, __b) __arm_vqrdmladhq_s16(__inactive, __a, __b)
-#define vqdmlsdhxq_s16(__inactive, __a, __b) __arm_vqdmlsdhxq_s16(__inactive, __a, __b)
-#define vqdmlsdhq_s16(__inactive, __a, __b) __arm_vqdmlsdhq_s16(__inactive, __a, __b)
 #define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, __b, __c)
 #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
-#define vqdmladhxq_s16(__inactive, __a, __b) __arm_vqdmladhxq_s16(__inactive, __a, __b)
-#define vqdmladhq_s16(__inactive, __a, __b) __arm_vqdmladhq_s16(__inactive, __a, __b)
 #define vmlasq_n_s16(__a, __b, __c) __arm_vmlasq_n_s16(__a, __b, __c)
 #define vmlaq_n_s16(__a, __b, __c) __arm_vmlaq_n_s16(__a, __b, __c)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
@@ -482,18 +450,10 @@
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
-#define vqrdmlsdhxq_s32(__inactive, __a, __b) __arm_vqrdmlsdhxq_s32(__inactive, __a, __b)
-#define vqrdmlsdhq_s32(__inactive, __a, __b) __arm_vqrdmlsdhq_s32(__inactive, __a, __b)
 #define vqrdmlashq_n_s32(__a, __b, __c) __arm_vqrdmlashq_n_s32(__a, __b, __c)
 #define vqrdmlahq_n_s32(__a, __b, __c) __arm_vqrdmlahq_n_s32(__a, __b, __c)
-#define vqrdmladhxq_s32(__inactive, __a, __b) __arm_vqrdmladhxq_s32(__inactive, __a, __b)
-#define vqrdmladhq_s32(__inactive, __a, __b) __arm_vqrdmladhq_s32(__inactive, __a, __b)
-#define vqdmlsdhxq_s32(__inactive, __a, __b) __arm_vqdmlsdhxq_s32(__inactive, __a, __b)
-#define vqdmlsdhq_s32(__inactive, __a, __b) __arm_vqdmlsdhq_s32(__inactive, __a, __b)
 #define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, __b, __c)
 #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
-#define vqdmladhxq_s32(__inactive, __a, __b) __arm_vqdmladhxq_s32(__inactive, __a, __b)
-#define vqdmladhq_s32(__inactive, __a, __b) __arm_vqdmladhq_s32(__inactive, __a, __b)
 #define vmlasq_n_s32(__a, __b, __c) __arm_vmlasq_n_s32(__a, __b, __c)
 #define vmlaq_n_s32(__a, __b, __c) __arm_vmlaq_n_s32(__a, __b, __c)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
@@ -618,42 +578,18 @@
 #define vornq_m_u8(__inactive, __a, __b, __p) __arm_vornq_m_u8(__inactive, __a, __b, __p)
 #define vornq_m_u32(__inactive, __a, __b, __p) __arm_vornq_m_u32(__inactive, __a, __b, __p)
 #define vornq_m_u16(__inactive, __a, __b, __p) __arm_vornq_m_u16(__inactive, __a, __b, __p)
-#define vqdmladhq_m_s8(__inactive, __a, __b, __p) __arm_vqdmladhq_m_s8(__inactive, __a, __b, __p)
-#define vqdmladhq_m_s32(__inactive, __a, __b, __p) __arm_vqdmladhq_m_s32(__inactive, __a, __b, __p)
-#define vqdmladhq_m_s16(__inactive, __a, __b, __p) __arm_vqdmladhq_m_s16(__inactive, __a, __b, __p)
-#define vqdmladhxq_m_s8(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p)
-#define vqdmladhxq_m_s32(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p)
-#define vqdmladhxq_m_s16(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p)
 #define vqdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
 #define vqdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
 #define vqdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
-#define vqdmlsdhq_m_s8(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m_s8(__inactive, __a, __b, __p)
-#define vqdmlsdhq_m_s32(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m_s32(__inactive, __a, __b, __p)
-#define vqdmlsdhq_m_s16(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m_s16(__inactive, __a, __b, __p)
-#define vqdmlsdhxq_m_s8(__inactive, __a, __b, __p) __arm_vqdmlsdhxq_m_s8(__inactive, __a, __b, __p)
-#define vqdmlsdhxq_m_s32(__inactive, __a, __b, __p) __arm_vqdmlsdhxq_m_s32(__inactive, __a, __b, __p)
-#define vqdmlsdhxq_m_s16(__inactive, __a, __b, __p) __arm_vqdmlsdhxq_m_s16(__inactive, __a, __b, __p)
-#define vqrdmladhq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmladhq_m_s8(__inactive, __a, __b, __p)
-#define vqrdmladhq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmladhq_m_s32(__inactive, __a, __b, __p)
-#define vqrdmladhq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmladhq_m_s16(__inactive, __a, __b, __p)
-#define vqrdmladhxq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmladhxq_m_s8(__inactive, __a, __b, __p)
-#define vqrdmladhxq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmladhxq_m_s32(__inactive, __a, __b, __p)
-#define vqrdmladhxq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmladhxq_m_s16(__inactive, __a, __b, __p)
 #define vqrdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s8(__a, __b, __c, __p)
 #define vqrdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s32(__a, __b, __c, __p)
 #define vqrdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s16(__a, __b, __c, __p)
 #define vqrdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s8(__a, __b, __c, __p)
 #define vqrdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s32(__a, __b, __c, __p)
 #define vqrdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s16(__a, __b, __c, __p)
-#define vqrdmlsdhq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m_s8(__inactive, __a, __b, __p)
-#define vqrdmlsdhq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m_s32(__inactive, __a, __b, __p)
-#define vqrdmlsdhq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmlsdhq_m_s16(__inactive, __a, __b, __p)
-#define vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s8(__inactive, __a, __b, __p)
-#define vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s32(__inactive, __a, __b, __p)
-#define vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p) __arm_vqrdmlsdhxq_m_s16(__inactive, __a, __b, __p)
 #define vsliq_m_n_s8(__a, __b,  __imm, __p) __arm_vsliq_m_n_s8(__a, __b,  __imm, __p)
 #define vsliq_m_n_s32(__a, __b,  __imm, __p) __arm_vsliq_m_n_s32(__a, __b,  __imm, __p)
 #define vsliq_m_n_s16(__a, __b,  __imm, __p) __arm_vsliq_m_n_s16(__a, __b,  __imm, __p)
@@ -2053,20 +1989,6 @@ __arm_vmvnq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrdmlsdhxq_sv16qi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrdmlsdhq_sv16qi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -2088,34 +2010,6 @@ __arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
   return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrdmladhxq_sv16qi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqrdmladhq_sv16qi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqdmlsdhxq_sv16qi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqdmlsdhq_sv16qi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -2123,20 +2017,6 @@ __arm_vqdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
   return __builtin_mve_vqdmlahq_n_sv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqdmladhxq_sv16qi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vqdmladhq_sv16qi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -2221,20 +2101,6 @@ __arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmlsdhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmlsdhq_sv8hi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -2256,34 +2122,6 @@ __arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
   return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmladhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmladhq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmlsdhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmlsdhq_sv8hi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -2291,20 +2129,6 @@ __arm_vqdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
   return __builtin_mve_vqdmlahq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmladhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmladhq_sv8hi (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -2389,20 +2213,6 @@ __arm_vmvnq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrdmlsdhxq_sv4si (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrdmlsdhq_sv4si (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -2424,34 +2234,6 @@ __arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
   return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrdmladhxq_sv4si (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqrdmladhq_sv4si (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmlsdhxq_sv4si (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmlsdhq_sv4si (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -2459,20 +2241,6 @@ __arm_vqdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
   return __builtin_mve_vqdmlahq_n_sv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmladhxq_sv4si (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmladhq_sv4si (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -3047,48 +2815,6 @@ __arm_vornq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vornq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhxq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhxq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmladhxq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -3110,90 +2836,6 @@ __arm_vqdmlahq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t
   return __builtin_mve_vqdmlahq_m_n_sv8hi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhxq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhxq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhxq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhxq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmladhxq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -3257,48 +2899,6 @@ __arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t
   return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhxq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhxq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlsdhxq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -8953,20 +8553,6 @@ __arm_vmvnq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrdmlsdhxq_s8 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrdmlsdhq_s8 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -8988,34 +8574,6 @@ __arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
  return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrdmladhxq_s8 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqrdmladhq_s8 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqdmlsdhxq_s8 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqdmlsdhq_s8 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -9023,20 +8581,6 @@ __arm_vqdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
  return __arm_vqdmlahq_n_s8 (__a, __b, __c);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqdmladhxq_s8 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
-{
- return __arm_vqdmladhq_s8 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int8x16_t __a, int8x16_t __b, int8_t __c)
@@ -9121,20 +8665,6 @@ __arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrdmlsdhxq_s16 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrdmlsdhq_s16 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -9156,34 +8686,6 @@ __arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
  return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrdmladhxq_s16 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqrdmladhq_s16 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmlsdhxq_s16 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmlsdhq_s16 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -9191,20 +8693,6 @@ __arm_vqdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
  return __arm_vqdmlahq_n_s16 (__a, __b, __c);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmladhxq_s16 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmladhq_s16 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int16x8_t __a, int16x8_t __b, int16_t __c)
@@ -9289,20 +8777,6 @@ __arm_vmvnq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrdmlsdhxq_s32 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrdmlsdhq_s32 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -9324,34 +8798,6 @@ __arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
  return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrdmladhxq_s32 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqrdmladhq_s32 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmlsdhxq_s32 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmlsdhq_s32 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -9359,20 +8805,6 @@ __arm_vqdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
  return __arm_vqdmlahq_n_s32 (__a, __b, __c);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmladhxq_s32 (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmladhq_s32 (__inactive, __a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlasq (int32x4_t __a, int32x4_t __b, int32_t __c)
@@ -9947,48 +9379,6 @@ __arm_vornq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vornq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhxq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhxq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmladhxq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmlahq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -10010,90 +9400,6 @@ __arm_vqdmlahq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
  return __arm_vqdmlahq_m_n_s16 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhxq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhxq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhxq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhxq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmladhxq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
@@ -10157,48 +9463,6 @@ __arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
  return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhxq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhxq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqrdmlsdhxq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -14538,22 +13802,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqrdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vqrdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -14597,54 +13845,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
 
-#define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqrdmladhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmladhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -15524,22 +14724,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vqrdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqrdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -15584,30 +14768,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
 
-#define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqrdmladhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmlsdhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -15650,30 +14810,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
 
-#define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhxq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
-#define __arm_vqdmladhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -16309,60 +15445,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
 
-#define __arm_vqrdmlsdhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vqrdmlsdhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmlsdhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3) __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
 
 #define __arm_vrmlsldavhaq_p(p0,p1,p2,p3) __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
 
 #define __arm_vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p_s32(p0,p1,p2,p3)
 
-#define __arm_vqdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vqdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vqdmlsdhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vqdmlsdhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqdmlsdhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vmvnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -16487,22 +15575,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vqrdmladhq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
-#define __arm_vqrdmladhxq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqrdmladhxq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmladhxq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmladhxq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 22/24] arm: [MVE intrinsics] add ternary_n shape
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (19 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 21/24] arm: [MVE intrinsics] rework vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 23/24] arm: [MVE intrinsics] factorize vmlaq_n vmlasq_n vqdmlahq_n vqdmlashq_n vqrdmlahq_n vqrdmlashq_n Christophe Lyon
                   ` (2 subsequent siblings)
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the ternary_n shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (ternary_n): New.
	* config/arm/arm-mve-builtins-shapes.h (ternary_n): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 27 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 4455a253579..5a299a272f5 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1189,6 +1189,33 @@ struct ternary_def : public overloaded_base<0>
 };
 SHAPE (ternary)
 
+/* <T0>_t vfoo[_n_t0](<T0>_t, <T0>_t, <S0>_t)
+
+   i.e. the standard shape for ternary operations that operate on a
+   pair of vectors of the same type as the destination, and take a
+   third scalar argument of the same type as the vector elements.
+
+   Example: vmlaq.
+   int8x16_t [__arm_]vmlaq[_n_s8](int8x16_t add, int8x16_t m1, int8_t m2)
+   int8x16_t [__arm_]vmlaq_m[_n_s8](int8x16_t add, int8x16_t m1, int8_t m2, mve_pred16_t p)  */
+struct ternary_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2, 1);
+  }
+};
+SHAPE (ternary_n)
+
 /* <T0>_t vfoo[_t0](<T0>_t)
 
    i.e. the standard shape for unary operations that operate on
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index b3ddd0a9e8d..a28cd6a1547 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -57,6 +57,7 @@ namespace arm_mve
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const ternary;
+    extern const function_shape *const ternary_n;
     extern const function_shape *const unary;
     extern const function_shape *const unary_acc;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 23/24] arm: [MVE intrinsics] factorize vmlaq_n vmlasq_n vqdmlahq_n vqdmlashq_n vqrdmlahq_n vqrdmlashq_n
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (20 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 22/24] arm: [MVE intrinsics] add ternary_n shape Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 12:19 ` [PATCH 24/24] arm: [MVE intrinsics] rework vmlaq vmlasq vqdmlahq vqdmlashq vqrdmlahq vqrdmlashq Christophe Lyon
  2023-05-11 16:38 ` [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Kyrylo Tkachov
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmlaq_n, vmlasq_n, vqdmlahq_n, vqdmlashq_n, vqrdmlahq_n,
vqrdmlashq_n builtins so that they use the same parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VMLxQ_N): New.
	(mve_insn): Add vmla, vmlas, vqdmlah, vqdmlash, vqrdmlah,
	vqrdmlash.
	(supf): Add VQDMLAHQ_N_S, VQDMLASHQ_N_S, VQRDMLAHQ_N_S,
	VQRDMLASHQ_N_S.
	* config/arm/mve.md (mve_vmlaq_n_<supf><mode>)
	(mve_vmlasq_n_<supf><mode>, mve_vqdmlahq_n_<supf><mode>)
	(mve_vqdmlashq_n_<supf><mode>, mve_vqrdmlahq_n_<supf><mode>)
	(mve_vqrdmlashq_n_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 19 ++++++++
 gcc/config/arm/mve.md       | 93 ++++---------------------------------
 2 files changed, 28 insertions(+), 84 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c23ca7361c1..abd904da11e 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -699,6 +699,15 @@ (define_int_iterator MVE_VMAXAVMINAQ_M [
 		     VMINAQ_M_S
 		     ])
 
+(define_int_iterator MVE_VMLxQ_N [
+		     VMLAQ_N_S VMLAQ_N_U
+		     VMLASQ_N_S VMLASQ_N_U
+		     VQDMLAHQ_N_S
+		     VQDMLASHQ_N_S
+		     VQRDMLAHQ_N_S
+		     VQRDMLASHQ_N_S
+		     ])
+
 (define_int_iterator MVE_VMLxDAVQ [
 		     VMLADAVQ_S VMLADAVQ_U
 		     VMLADAVXQ_S
@@ -917,7 +926,9 @@ (define_int_attr mve_insn [
 		 (VMLALDAVXQ_P_S "vmlaldavx")
 		 (VMLALDAVXQ_S "vmlaldavx")
 		 (VMLAQ_M_N_S "vmla") (VMLAQ_M_N_U "vmla")
+		 (VMLAQ_N_S "vmla") (VMLAQ_N_U "vmla")
 		 (VMLASQ_M_N_S "vmlas") (VMLASQ_M_N_U "vmlas")
+		 (VMLASQ_N_S "vmlas") (VMLASQ_N_U "vmlas")
 		 (VMLSDAVAQ_P_S "vmlsdava")
 		 (VMLSDAVAQ_S "vmlsdava")
 		 (VMLSDAVAXQ_P_S "vmlsdavax")
@@ -963,7 +974,9 @@ (define_int_attr mve_insn [
 		 (VQDMLADHXQ_M_S "vqdmladhx")
 		 (VQDMLADHXQ_S "vqdmladhx")
 		 (VQDMLAHQ_M_N_S "vqdmlah")
+		 (VQDMLAHQ_N_S "vqdmlah")
 		 (VQDMLASHQ_M_N_S "vqdmlash")
+		 (VQDMLASHQ_N_S "vqdmlash")
 		 (VQDMLSDHQ_M_S "vqdmlsdh")
 		 (VQDMLSDHQ_S "vqdmlsdh")
 		 (VQDMLSDHXQ_M_S "vqdmlsdhx")
@@ -987,7 +1000,9 @@ (define_int_attr mve_insn [
 		 (VQRDMLADHXQ_M_S "vqrdmladhx")
 		 (VQRDMLADHXQ_S "vqrdmladhx")
 		 (VQRDMLAHQ_M_N_S "vqrdmlah")
+		 (VQRDMLAHQ_N_S "vqrdmlah")
 		 (VQRDMLASHQ_M_N_S "vqrdmlash")
+		 (VQRDMLASHQ_N_S "vqrdmlash")
 		 (VQRDMLSDHQ_M_S "vqrdmlsdh")
 		 (VQRDMLSDHQ_S "vqrdmlsdh")
 		 (VQRDMLSDHXQ_M_S "vqrdmlsdhx")
@@ -2406,6 +2421,10 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQRDMLADHXQ_S "s")
 		       (VQRDMLSDHQ_S "s")
 		       (VQRDMLSDHXQ_S "s")
+		       (VQDMLAHQ_N_S "s")
+		       (VQDMLASHQ_N_S "s")
+		       (VQRDMLAHQ_N_S "s")
+		       (VQRDMLASHQ_N_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index bf4d18455fe..14634cbf333 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1955,34 +1955,23 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vmlaq_n_u, vmlaq_n_s])
+;; [vmlaq_n_u, vmlaq_n_s]
+;; [vmlasq_n_u, vmlasq_n_s]
+;; [vqdmlahq_n_s]
+;; [vqdmlashq_n_s]
+;; [vqrdmlahq_n_s]
+;; [vqrdmlashq_n_s]
 ;;
-(define_insn "mve_vmlaq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VMLAQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmla.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmlasq_n_u, vmlasq_n_s])
-;;
-(define_insn "mve_vmlasq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VMLASQ_N))
+	 MVE_VMLxQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vmlas.<supf>%#<V_sz_elem>	%q0, %q2, %3"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2018,38 +2007,6 @@ (define_insn "@mve_vpselq_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqdmlahq_n_s])
-;;
-(define_insn "mve_vqdmlahq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VQDMLAHQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmlah.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmlashq_n_s])
-;;
-(define_insn "mve_vqdmlashq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VQDMLASHQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vqdmladhq_s]
 ;; [vqdmladhxq_s]
@@ -2073,38 +2030,6 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
   [(set_attr "type" "mve_move")
 ])
 
-;;
-;; [vqrdmlahq_n_s])
-;;
-(define_insn "mve_vqrdmlahq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VQRDMLAHQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrdmlah.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqrdmlashq_n_s])
-;;
-(define_insn "mve_vqrdmlashq_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VQRDMLASHQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqrdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vqrshlq_m_n_s, vqrshlq_m_n_u]
 ;; [vrshlq_m_n_s, vrshlq_m_n_u]
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 24/24] arm: [MVE intrinsics] rework vmlaq vmlasq vqdmlahq vqdmlashq vqrdmlahq vqrdmlashq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (21 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 23/24] arm: [MVE intrinsics] factorize vmlaq_n vmlasq_n vqdmlahq_n vqdmlashq_n vqrdmlahq_n vqrdmlashq_n Christophe Lyon
@ 2023-05-11 12:19 ` Christophe Lyon
  2023-05-11 16:38 ` [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Kyrylo Tkachov
  23 siblings, 0 replies; 25+ messages in thread
From: Christophe Lyon @ 2023-05-11 12:19 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmlaq, vmlasq, vqdmlahq, vqdmlashq, vqrdmlahq, vqrdmlashq
using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmlaq, vmlasq, vqdmlahq)
	(vqdmlashq, vqrdmlahq, vqrdmlashq): New.
	* config/arm/arm-mve-builtins-base.def (vmlaq, vmlasq, vqdmlahq)
	(vqdmlashq, vqrdmlahq, vqrdmlashq): New.
	* config/arm/arm-mve-builtins-base.h (vmlaq, vmlasq, vqdmlahq)
	(vqdmlashq, vqrdmlahq, vqrdmlashq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vmlaq, vmlasq,
	vqdmlahq, vqdmlashq, vqrdmlahq, vqrdmlashq.
	* config/arm/arm_mve.h (vqrdmlashq): Remove.
	(vqrdmlahq): Remove.
	(vqdmlashq): Remove.
	(vqdmlahq): Remove.
	(vmlasq): Remove.
	(vmlaq): Remove.
	(vmlaq_m): Remove.
	(vmlasq_m): Remove.
	(vqdmlashq_m): Remove.
	(vqdmlahq_m): Remove.
	(vqrdmlahq_m): Remove.
	(vqrdmlashq_m): Remove.
	(vmlasq_n_u8): Remove.
	(vmlaq_n_u8): Remove.
	(vqrdmlashq_n_s8): Remove.
	(vqrdmlahq_n_s8): Remove.
	(vqdmlahq_n_s8): Remove.
	(vqdmlashq_n_s8): Remove.
	(vmlasq_n_s8): Remove.
	(vmlaq_n_s8): Remove.
	(vmlasq_n_u16): Remove.
	(vmlaq_n_u16): Remove.
	(vqrdmlashq_n_s16): Remove.
	(vqrdmlahq_n_s16): Remove.
	(vqdmlashq_n_s16): Remove.
	(vqdmlahq_n_s16): Remove.
	(vmlasq_n_s16): Remove.
	(vmlaq_n_s16): Remove.
	(vmlasq_n_u32): Remove.
	(vmlaq_n_u32): Remove.
	(vqrdmlashq_n_s32): Remove.
	(vqrdmlahq_n_s32): Remove.
	(vqdmlashq_n_s32): Remove.
	(vqdmlahq_n_s32): Remove.
	(vmlasq_n_s32): Remove.
	(vmlaq_n_s32): Remove.
	(vmlaq_m_n_s8): Remove.
	(vmlaq_m_n_s32): Remove.
	(vmlaq_m_n_s16): Remove.
	(vmlaq_m_n_u8): Remove.
	(vmlaq_m_n_u32): Remove.
	(vmlaq_m_n_u16): Remove.
	(vmlasq_m_n_s8): Remove.
	(vmlasq_m_n_s32): Remove.
	(vmlasq_m_n_s16): Remove.
	(vmlasq_m_n_u8): Remove.
	(vmlasq_m_n_u32): Remove.
	(vmlasq_m_n_u16): Remove.
	(vqdmlashq_m_n_s8): Remove.
	(vqdmlashq_m_n_s32): Remove.
	(vqdmlashq_m_n_s16): Remove.
	(vqdmlahq_m_n_s8): Remove.
	(vqdmlahq_m_n_s32): Remove.
	(vqdmlahq_m_n_s16): Remove.
	(vqrdmlahq_m_n_s8): Remove.
	(vqrdmlahq_m_n_s32): Remove.
	(vqrdmlahq_m_n_s16): Remove.
	(vqrdmlashq_m_n_s8): Remove.
	(vqrdmlashq_m_n_s32): Remove.
	(vqrdmlashq_m_n_s16): Remove.
	(__arm_vmlasq_n_u8): Remove.
	(__arm_vmlaq_n_u8): Remove.
	(__arm_vqrdmlashq_n_s8): Remove.
	(__arm_vqdmlashq_n_s8): Remove.
	(__arm_vqrdmlahq_n_s8): Remove.
	(__arm_vqdmlahq_n_s8): Remove.
	(__arm_vmlasq_n_s8): Remove.
	(__arm_vmlaq_n_s8): Remove.
	(__arm_vmlasq_n_u16): Remove.
	(__arm_vmlaq_n_u16): Remove.
	(__arm_vqrdmlashq_n_s16): Remove.
	(__arm_vqdmlashq_n_s16): Remove.
	(__arm_vqrdmlahq_n_s16): Remove.
	(__arm_vqdmlahq_n_s16): Remove.
	(__arm_vmlasq_n_s16): Remove.
	(__arm_vmlaq_n_s16): Remove.
	(__arm_vmlasq_n_u32): Remove.
	(__arm_vmlaq_n_u32): Remove.
	(__arm_vqrdmlashq_n_s32): Remove.
	(__arm_vqdmlashq_n_s32): Remove.
	(__arm_vqrdmlahq_n_s32): Remove.
	(__arm_vqdmlahq_n_s32): Remove.
	(__arm_vmlasq_n_s32): Remove.
	(__arm_vmlaq_n_s32): Remove.
	(__arm_vmlaq_m_n_s8): Remove.
	(__arm_vmlaq_m_n_s32): Remove.
	(__arm_vmlaq_m_n_s16): Remove.
	(__arm_vmlaq_m_n_u8): Remove.
	(__arm_vmlaq_m_n_u32): Remove.
	(__arm_vmlaq_m_n_u16): Remove.
	(__arm_vmlasq_m_n_s8): Remove.
	(__arm_vmlasq_m_n_s32): Remove.
	(__arm_vmlasq_m_n_s16): Remove.
	(__arm_vmlasq_m_n_u8): Remove.
	(__arm_vmlasq_m_n_u32): Remove.
	(__arm_vmlasq_m_n_u16): Remove.
	(__arm_vqdmlahq_m_n_s8): Remove.
	(__arm_vqdmlahq_m_n_s32): Remove.
	(__arm_vqdmlahq_m_n_s16): Remove.
	(__arm_vqrdmlahq_m_n_s8): Remove.
	(__arm_vqrdmlahq_m_n_s32): Remove.
	(__arm_vqrdmlahq_m_n_s16): Remove.
	(__arm_vqrdmlashq_m_n_s8): Remove.
	(__arm_vqrdmlashq_m_n_s32): Remove.
	(__arm_vqrdmlashq_m_n_s16): Remove.
	(__arm_vqdmlashq_m_n_s8): Remove.
	(__arm_vqdmlashq_m_n_s16): Remove.
	(__arm_vqdmlashq_m_n_s32): Remove.
	(__arm_vmlasq): Remove.
	(__arm_vmlaq): Remove.
	(__arm_vqrdmlashq): Remove.
	(__arm_vqdmlashq): Remove.
	(__arm_vqrdmlahq): Remove.
	(__arm_vqdmlahq): Remove.
	(__arm_vmlaq_m): Remove.
	(__arm_vmlasq_m): Remove.
	(__arm_vqdmlahq_m): Remove.
	(__arm_vqrdmlahq_m): Remove.
	(__arm_vqrdmlashq_m): Remove.
	(__arm_vqdmlashq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   6 +
 gcc/config/arm/arm-mve-builtins-base.def |   6 +
 gcc/config/arm/arm-mve-builtins-base.h   |   6 +
 gcc/config/arm/arm-mve-builtins.cc       |   6 +
 gcc/config/arm/arm_mve.h                 | 934 +----------------------
 5 files changed, 44 insertions(+), 914 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 95fa2d27049..ca2fb67a07c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -289,6 +289,8 @@ FUNCTION_PRED_P_S_U (vmlaldavaq, VMLALDAVAQ)
 FUNCTION_PRED_P_S (vmlaldavaxq, VMLALDAVAXQ)
 FUNCTION_PRED_P_S_U (vmlaldavq, VMLALDAVQ)
 FUNCTION_PRED_P_S (vmlaldavxq, VMLALDAVXQ)
+FUNCTION_ONLY_N_NO_F (vmlaq, VMLAQ)
+FUNCTION_ONLY_N_NO_F (vmlasq, VMLASQ)
 FUNCTION_PRED_P_S (vmlsdavaq, VMLSDAVAQ)
 FUNCTION_PRED_P_S (vmlsdavaxq, VMLSDAVAXQ)
 FUNCTION_PRED_P_S (vmlsdavq, VMLSDAVQ)
@@ -313,11 +315,15 @@ FUNCTION_WITHOUT_N_NO_U_F (vqmovunbq, VQMOVUNBQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqmovuntq, VQMOVUNTQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmladhq, VQDMLADHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmladhxq, VQDMLADHXQ)
+FUNCTION_ONLY_N_NO_U_F (vqdmlahq, VQDMLAHQ)
+FUNCTION_ONLY_N_NO_U_F (vqdmlashq, VQDMLASHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhq, VQDMLSDHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhxq, VQDMLSDHXQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhq, VQRDMLADHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhxq, VQRDMLADHXQ)
+FUNCTION_ONLY_N_NO_U_F (vqrdmlahq, VQRDMLAHQ)
+FUNCTION_ONLY_N_NO_U_F (vqrdmlashq, VQRDMLASHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmlsdhq, VQRDMLSDHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmlsdhxq, VQRDMLSDHXQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqnegq, VQNEGQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 5c06ebc15cc..601384d5a95 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -58,6 +58,8 @@ DEF_MVE_FUNCTION (vmlaldavaq, binary_acca_int64, integer_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlaldavaxq, binary_acca_int64, signed_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlaldavq, binary_acc_int64, integer_16_32, p_or_none)
 DEF_MVE_FUNCTION (vmlaldavxq, binary_acc_int64, signed_16_32, p_or_none)
+DEF_MVE_FUNCTION (vmlaq, ternary_n, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vmlasq, ternary_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vmlsdavaq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavaxq, binary_acca_int32, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmlsdavq, binary_acc_int32, all_integer, p_or_none)
@@ -78,6 +80,8 @@ DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmladhq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmladhxq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmlahq, ternary_n, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmlashq, ternary_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmlsdhq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmlsdhxq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
@@ -88,6 +92,8 @@ DEF_MVE_FUNCTION (vqmovuntq, binary_move_narrow_unsigned, signed_16_32, m_or_non
 DEF_MVE_FUNCTION (vqnegq, unary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmladhq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmladhxq, ternary, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmlahq, ternary_n, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqrdmlashq, ternary_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmlsdhq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmlsdhxq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqrdmulhq, binary_opt_n, all_signed, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 194cb170958..574ed97a4b3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -71,6 +71,8 @@ extern const function_base *const vmlaldavaq;
 extern const function_base *const vmlaldavaxq;
 extern const function_base *const vmlaldavq;
 extern const function_base *const vmlaldavxq;
+extern const function_base *const vmlaq;
+extern const function_base *const vmlasq;
 extern const function_base *const vmlsdavaq;
 extern const function_base *const vmlsdavaxq;
 extern const function_base *const vmlsdavq;
@@ -91,6 +93,8 @@ extern const function_base *const vqabsq;
 extern const function_base *const vqaddq;
 extern const function_base *const vqdmladhq;
 extern const function_base *const vqdmladhxq;
+extern const function_base *const vqdmlahq;
+extern const function_base *const vqdmlashq;
 extern const function_base *const vqdmlsdhq;
 extern const function_base *const vqdmlsdhxq;
 extern const function_base *const vqdmulhq;
@@ -101,6 +105,8 @@ extern const function_base *const vqmovuntq;
 extern const function_base *const vqnegq;
 extern const function_base *const vqrdmladhq;
 extern const function_base *const vqrdmladhxq;
+extern const function_base *const vqrdmlahq;
+extern const function_base *const vqrdmlashq;
 extern const function_base *const vqrdmlsdhq;
 extern const function_base *const vqrdmlsdhxq;
 extern const function_base *const vqrdmulhq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 2095d96b593..c157a3ec8a3 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -682,6 +682,8 @@ function_instance::has_inactive_argument () const
       || base == functions::vmaxnmaq
       || base == functions::vminaq
       || base == functions::vminnmaq
+      || base == functions::vmlaq
+      || base == functions::vmlasq
       || base == functions::vmovnbq
       || base == functions::vmovntq
       || base == functions::vqmovnbq
@@ -691,10 +693,14 @@ function_instance::has_inactive_argument () const
       || (base == functions::vorrq && mode_suffix_id == MODE_n)
       || base == functions::vqdmladhq
       || base == functions::vqdmladhxq
+      || base == functions::vqdmlahq
+      || base == functions::vqdmlashq
       || base == functions::vqdmlsdhq
       || base == functions::vqdmlsdhxq
       || base == functions::vqrdmladhq
       || base == functions::vqrdmladhxq
+      || base == functions::vqrdmlahq
+      || base == functions::vqrdmlashq
       || base == functions::vqrdmlsdhq
       || base == functions::vqrdmlsdhxq
       || (base == functions::vqrshlq && mode_suffix_id == MODE_n)
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index bb503037cc3..09b9564ed48 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -61,13 +61,7 @@
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
-#define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
-#define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
-#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
-#define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c)
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
-#define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
-#define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
@@ -81,15 +75,9 @@
 #define vcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot270_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m(__inactive, __a, __b, __p)
-#define vmlaq_m(__a, __b, __c, __p) __arm_vmlaq_m(__a, __b, __c, __p)
-#define vmlasq_m(__a, __b, __c, __p) __arm_vmlasq_m(__a, __b, __c, __p)
 #define vmullbq_int_m(__inactive, __a, __b, __p) __arm_vmullbq_int_m(__inactive, __a, __b, __p)
 #define vmulltq_int_m(__inactive, __a, __b, __p) __arm_vmulltq_int_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
-#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, __c, __p)
-#define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, __p)
-#define vqrdmlahq_m(__a, __b, __c, __p) __arm_vqrdmlahq_m(__a, __b, __c, __p)
-#define vqrdmlashq_m(__a, __b, __c, __p) __arm_vqrdmlashq_m(__a, __b, __c, __p)
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
@@ -413,49 +401,25 @@
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
-#define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
-#define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
-#define vqrdmlashq_n_s8(__a, __b, __c) __arm_vqrdmlashq_n_s8(__a, __b, __c)
-#define vqrdmlahq_n_s8(__a, __b, __c) __arm_vqrdmlahq_n_s8(__a, __b, __c)
-#define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
-#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
-#define vmlasq_n_s8(__a, __b, __c) __arm_vmlasq_n_s8(__a, __b, __c)
-#define vmlaq_n_s8(__a, __b, __c) __arm_vmlaq_n_s8(__a, __b, __c)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
 #define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
 #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
-#define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
-#define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
-#define vqrdmlashq_n_s16(__a, __b, __c) __arm_vqrdmlashq_n_s16(__a, __b, __c)
-#define vqrdmlahq_n_s16(__a, __b, __c) __arm_vqrdmlahq_n_s16(__a, __b, __c)
-#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, __b, __c)
-#define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
-#define vmlasq_n_s16(__a, __b, __c) __arm_vmlasq_n_s16(__a, __b, __c)
-#define vmlaq_n_s16(__a, __b, __c) __arm_vmlaq_n_s16(__a, __b, __c)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
 #define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
 #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
-#define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
-#define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
-#define vqrdmlashq_n_s32(__a, __b, __c) __arm_vqrdmlashq_n_s32(__a, __b, __c)
-#define vqrdmlahq_n_s32(__a, __b, __c) __arm_vqrdmlahq_n_s32(__a, __b, __c)
-#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, __b, __c)
-#define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
-#define vmlasq_n_s32(__a, __b, __c) __arm_vmlasq_n_s32(__a, __b, __c)
-#define vmlaq_n_s32(__a, __b, __c) __arm_vmlaq_n_s32(__a, __b, __c)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
 #define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
 #define vpselq_u64(__a, __b, __p) __arm_vpselq_u64(__a, __b, __p)
@@ -548,18 +512,6 @@
 #define vhcaddq_rot90_m_s8(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s8(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s32(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s32(__inactive, __a, __b, __p)
 #define vhcaddq_rot90_m_s16(__inactive, __a, __b, __p) __arm_vhcaddq_rot90_m_s16(__inactive, __a, __b, __p)
-#define vmlaq_m_n_s8(__a, __b, __c, __p) __arm_vmlaq_m_n_s8(__a, __b, __c, __p)
-#define vmlaq_m_n_s32(__a, __b, __c, __p) __arm_vmlaq_m_n_s32(__a, __b, __c, __p)
-#define vmlaq_m_n_s16(__a, __b, __c, __p) __arm_vmlaq_m_n_s16(__a, __b, __c, __p)
-#define vmlaq_m_n_u8(__a, __b, __c, __p) __arm_vmlaq_m_n_u8(__a, __b, __c, __p)
-#define vmlaq_m_n_u32(__a, __b, __c, __p) __arm_vmlaq_m_n_u32(__a, __b, __c, __p)
-#define vmlaq_m_n_u16(__a, __b, __c, __p) __arm_vmlaq_m_n_u16(__a, __b, __c, __p)
-#define vmlasq_m_n_s8(__a, __b, __c, __p) __arm_vmlasq_m_n_s8(__a, __b, __c, __p)
-#define vmlasq_m_n_s32(__a, __b, __c, __p) __arm_vmlasq_m_n_s32(__a, __b, __c, __p)
-#define vmlasq_m_n_s16(__a, __b, __c, __p) __arm_vmlasq_m_n_s16(__a, __b, __c, __p)
-#define vmlasq_m_n_u8(__a, __b, __c, __p) __arm_vmlasq_m_n_u8(__a, __b, __c, __p)
-#define vmlasq_m_n_u32(__a, __b, __c, __p) __arm_vmlasq_m_n_u32(__a, __b, __c, __p)
-#define vmlasq_m_n_u16(__a, __b, __c, __p) __arm_vmlasq_m_n_u16(__a, __b, __c, __p)
 #define vmullbq_int_m_s8(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s8(__inactive, __a, __b, __p)
 #define vmullbq_int_m_s32(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s32(__inactive, __a, __b, __p)
 #define vmullbq_int_m_s16(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s16(__inactive, __a, __b, __p)
@@ -578,18 +530,6 @@
 #define vornq_m_u8(__inactive, __a, __b, __p) __arm_vornq_m_u8(__inactive, __a, __b, __p)
 #define vornq_m_u32(__inactive, __a, __b, __p) __arm_vornq_m_u32(__inactive, __a, __b, __p)
 #define vornq_m_u16(__inactive, __a, __b, __p) __arm_vornq_m_u16(__inactive, __a, __b, __p)
-#define vqdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
-#define vqdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
-#define vqdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
-#define vqdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
-#define vqdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
-#define vqdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
-#define vqrdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s8(__a, __b, __c, __p)
-#define vqrdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s32(__a, __b, __c, __p)
-#define vqrdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqrdmlahq_m_n_s16(__a, __b, __c, __p)
-#define vqrdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s8(__a, __b, __c, __p)
-#define vqrdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s32(__a, __b, __c, __p)
-#define vqrdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqrdmlashq_m_n_s16(__a, __b, __c, __p)
 #define vsliq_m_n_s8(__a, __b,  __imm, __p) __arm_vsliq_m_n_s8(__a, __b,  __imm, __p)
 #define vsliq_m_n_s32(__a, __b,  __imm, __p) __arm_vsliq_m_n_s32(__a, __b,  __imm, __p)
 #define vsliq_m_n_s16(__a, __b,  __imm, __p) __arm_vsliq_m_n_s16(__a, __b,  __imm, __p)
@@ -1954,20 +1894,6 @@ __arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vmlasq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vmlaq_n_uv16qi (__a, __b, __c);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -1989,48 +1915,6 @@ __arm_vmvnq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vmlasq_n_sv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
-  return __builtin_mve_vmlaq_n_sv16qi (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -2066,20 +1950,6 @@ __arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vmlasq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vmlaq_n_uv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -2101,48 +1971,6 @@ __arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vmlasq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vmlaq_n_sv8hi (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -2178,20 +2006,6 @@ __arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vmlasq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vmlaq_n_uv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -2213,48 +2027,6 @@ __arm_vmvnq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vmvnq_m_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vmlasq_n_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
-  return __builtin_mve_vmlaq_n_sv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -2605,90 +2377,6 @@ __arm_vhcaddq_rot90_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, m
   return __builtin_mve_vhcaddq_rot90_m_sv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_uv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaq_m_n_uv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_uv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlasq_m_n_uv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_int_m_s8 (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -2815,90 +2503,6 @@ __arm_vornq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vornq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlahq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlahq_m_n_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlahq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlahq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlahq_m_n_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlahq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlashq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlashq_m_n_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqrdmlashq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -8518,20 +8122,6 @@ __arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_u8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vmlasq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vmlaq_n_u8 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -8553,158 +8143,60 @@ __arm_vmvnq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vqrdmlashq_n_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vqdmlashq_n_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vqdmlahq_n_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vmlasq_n_s8 (__a, __b, __c);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (int8x16_t __a, int8x16_t __b, int8_t __c)
-{
- return __arm_vmlaq_n_s8 (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
 {
- return __arm_vsriq_n_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (int8x16_t __a, int8x16_t __b, const int __imm)
-{
- return __arm_vsliq_n_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_u16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vmlasq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vmlaq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsriq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsliq_n_u16 (__a, __b, __imm);
+ return __arm_vsriq_n_s8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
+__arm_vsliq (int8x16_t __a, int8x16_t __b, const int __imm)
 {
- return __arm_vmvnq_m_s16 (__inactive, __a, __p);
+ return __arm_vsliq_n_s8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vpselq (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
 {
- return __arm_vqrdmlashq_n_s16 (__a, __b, __c);
+ return __arm_vpselq_u16 (__a, __b, __p);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vpselq (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 {
- return __arm_vqdmlashq_n_s16 (__a, __b, __c);
+ return __arm_vpselq_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 {
- return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
+ return __arm_vmvnq_m_u16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
- return __arm_vqdmlahq_n_s16 (__a, __b, __c);
+ return __arm_vsriq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
- return __arm_vmlasq_n_s16 (__a, __b, __c);
+ return __arm_vsliq_n_u16 (__a, __b, __imm);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (int16x8_t __a, int16x8_t __b, int16_t __c)
+__arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
 {
- return __arm_vmlaq_n_s16 (__a, __b, __c);
+ return __arm_vmvnq_m_s16 (__inactive, __a, __p);
 }
 
 __extension__ extern __inline int16x8_t
@@ -8742,20 +8234,6 @@ __arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_u32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vmlasq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vmlaq_n_u32 (__a, __b, __c);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -8777,48 +8255,6 @@ __arm_vmvnq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
  return __arm_vmvnq_m_s32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vqrdmlashq_n_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vqdmlashq_n_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vqdmlahq_n_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vmlasq_n_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq (int32x4_t __a, int32x4_t __b, int32_t __c)
-{
- return __arm_vmlaq_n_s32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -9169,90 +8605,6 @@ __arm_vhcaddq_rot90_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_p
  return __arm_vhcaddq_rot90_m_s16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (uint8x16_t __a, uint8x16_t __b, uint8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_u8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (uint32x4_t __a, uint32x4_t __b, uint32_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaq_m (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaq_m_n_u16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (uint8x16_t __a, uint8x16_t __b, uint8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_u8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (uint32x4_t __a, uint32x4_t __b, uint32_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlasq_m (uint16x8_t __a, uint16x8_t __b, uint16_t __c, mve_pred16_t __p)
-{
- return __arm_vmlasq_m_n_u16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_int_m (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -9379,90 +8731,6 @@ __arm_vornq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vornq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlahq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlahq_m_n_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlahq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlahq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlahq_m_n_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlahq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlashq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlashq_m_n_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vqrdmlashq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
-{
- return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsliq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -13771,17 +13039,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int)));})
-
 #define __arm_vsriq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -13802,49 +13059,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vqrdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vqdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -14744,52 +13958,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vqrdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int)));})
-
-#define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int)));})
-
 #define __arm_vpselq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -14802,14 +13970,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vpselq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
   int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint64x2_t]: __arm_vpselq_u64 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint64x2_t), p2));})
 
-#define __arm_vqdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int)));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -15411,30 +14571,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vqrdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
-#define __arm_vqrdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
-#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
 #define __arm_vsliq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -15498,28 +14634,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vhcaddq_rot90_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vhcaddq_rot90_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
-#define __arm_vmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlaq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
-#define __arm_vmlasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmlasq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
 #define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -15549,14 +14663,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
 
-#define __arm_vqdmlahq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
 #define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq
  2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
                   ` (22 preceding siblings ...)
  2023-05-11 12:19 ` [PATCH 24/24] arm: [MVE intrinsics] rework vmlaq vmlasq vqdmlahq vqdmlashq vqrdmlahq vqrdmlashq Christophe Lyon
@ 2023-05-11 16:38 ` Kyrylo Tkachov
  23 siblings, 0 replies; 25+ messages in thread
From: Kyrylo Tkachov @ 2023-05-11 16:38 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Thursday, May 11, 2023 1:19 PM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq
> 
> Factorize vaddlvaq builtins so that they use parameterized names.

This series is ok (the changes look quite regular throughout).
Thanks,
Kyrill

> 
> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (mve_insn): Add vaddlva.
> 	* config/arm/mve.md (mve_vaddlvaq_<supf>v4si): Rename into ...
> 	(@mve_<mve_insn>q_<supf>v4si): ... this.
> 	(mve_vaddlvaq_p_<supf>v4si): Rename into ...
> 	(@mve_<mve_insn>q_p_<supf>v4si): ... this.
> ---
>  gcc/config/arm/iterators.md | 2 ++
>  gcc/config/arm/mve.md       | 8 ++++----
>  2 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 2f6de937ef7..ff146afd913 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -759,6 +759,8 @@ (define_int_attr mve_insn [
>  		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
>  		 (VABSQ_M_F "vabs")
>  		 (VABSQ_M_S "vabs")
> +		 (VADDLVAQ_P_S "vaddlva") (VADDLVAQ_P_U "vaddlva")
> +		 (VADDLVAQ_S "vaddlva") (VADDLVAQ_U "vaddlva")
>  		 (VADDLVQ_P_S "vaddlv") (VADDLVQ_P_U "vaddlv")
>  		 (VADDLVQ_S "vaddlv") (VADDLVQ_U "vaddlv")
>  		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd")
> (VADDQ_M_N_F "vadd")
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index f5cb8ef48ef..b548eced4f5 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1222,7 +1222,7 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
>  ;;
>  ;; [vaddlvaq_s vaddlvaq_u])
>  ;;
> -(define_insn "mve_vaddlvaq_<supf>v4si"
> +(define_insn "@mve_<mve_insn>q_<supf>v4si"
>    [
>     (set (match_operand:DI 0 "s_register_operand" "=r")
>  	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
> @@ -1230,7 +1230,7 @@ (define_insn "mve_vaddlvaq_<supf>v4si"
>  	 VADDLVAQ))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vaddlva.<supf>32\t%Q0, %R0, %q2"
> +  "<mve_insn>.<supf>32\t%Q0, %R0, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -2534,7 +2534,7 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
>  ;;
>  ;; [vaddlvaq_p_s vaddlvaq_p_u])
>  ;;
> -(define_insn "mve_vaddlvaq_p_<supf>v4si"
> +(define_insn "@mve_<mve_insn>q_p_<supf>v4si"
>    [
>     (set (match_operand:DI 0 "s_register_operand" "=r")
>  	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
> @@ -2543,7 +2543,7 @@ (define_insn "mve_vaddlvaq_p_<supf>v4si"
>  	 VADDLVAQ_P))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vaddlvat.<supf>32\t%Q0, %R0, %q2"
> +  "vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
>  ;;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2023-05-11 16:38 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-11 12:18 [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Christophe Lyon
2023-05-11 12:18 ` [PATCH 02/24] arm: [MVE intrinsics] add unary_widen_acc shape Christophe Lyon
2023-05-11 12:18 ` [PATCH 03/24] arm: [MVE intrinsics] rework vaddlvaq Christophe Lyon
2023-05-11 12:18 ` [PATCH 04/24] arm: [MVE intrinsics] add binary_acc_int32 shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 05/24] arm: [MVE intrinsics] factorize vmladav vmladavx vmlsdav vmlsdavx vmladava vmladavax vmlsdava vmlsdavax Christophe Lyon
2023-05-11 12:19 ` [PATCH 06/24] arm: [MVE intrinsics] rework vmladavq vmladavxq vmlsdavq vmlsdavxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 07/24] arm: [MVE intrinsics] add binary_acca_int32 shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 08/24] arm: [MVE intrinsics] rework vmladavaq vmladavaxq vmlsdavaq vmlsdavaxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 09/24] arm: [MVE intrinsics] factorize vabavq Christophe Lyon
2023-05-11 12:19 ` [PATCH 10/24] arm: [MVE intrinsics] rework vabavq Christophe Lyon
2023-05-11 12:19 ` [PATCH 11/24] arm: [MVE intrinsics] add binary_acc_int64 shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 12/24] arm: [MVE intrinsics] factorize vmlaldavq vmlaldavxq vmlsldavq vmlsldavxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 13/24] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-11 12:19 ` [PATCH 14/24] arm: [MVE intrinsics] factorize vrmlaldavhq vrmlaldavhxq vrmlsldavhq vrmlsldavhxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 15/24] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-11 12:19 ` [PATCH 16/24] arm: [MVE intrinsics] add binary_acca_int64 shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 17/24] arm: [MVE intrinsics] factorize vmlaldavaq vmlaldavaxq vmlsldavaq vmlsldavaxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 18/24] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-11 12:19 ` [PATCH 19/24] arm: [MVE intrinsics] add ternary shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 20/24] arm: [MVE intrinsics] factorize vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 21/24] arm: [MVE intrinsics] rework vqrdmladhq vqrdmladhxq vqrdmlsdhq vqrdmlsdhxq vqdmladhq vqdmladhxq vqdmlsdhq vqdmlsdhxq Christophe Lyon
2023-05-11 12:19 ` [PATCH 22/24] arm: [MVE intrinsics] add ternary_n shape Christophe Lyon
2023-05-11 12:19 ` [PATCH 23/24] arm: [MVE intrinsics] factorize vmlaq_n vmlasq_n vqdmlahq_n vqdmlashq_n vqrdmlahq_n vqrdmlashq_n Christophe Lyon
2023-05-11 12:19 ` [PATCH 24/24] arm: [MVE intrinsics] rework vmlaq vmlasq vqdmlahq vqdmlashq vqrdmlahq vqrdmlashq Christophe Lyon
2023-05-11 16:38 ` [PATCH 01/24] arm: [MVE intrinsics] factorize vaddlvaq Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).