public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics
@ 2020-10-06 15:59 Christophe Lyon
  2020-10-06 15:59 ` [PATCH v2] arm: [MVE] Remove illegal intrinsics Christophe Lyon
  2020-10-07 10:34 ` [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Kyrylo Tkachov
  0 siblings, 2 replies; 4+ messages in thread
From: Christophe Lyon @ 2020-10-06 15:59 UTC (permalink / raw)
  To: gcc-patches

This patch adds:
vqdmlashq_m_n_s16
vqdmlashq_m_n_s32
vqdmlashq_m_n_s8
vqdmlashq_n_s16
vqdmlashq_n_s32
vqdmlashq_n_s8

v2: rebased after Srinath's reorganization patch

2020-10-05  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	PR target/96914
	* config/arm/arm_mve.h (vqdmlashq, vqdmlashq_m): Define.
	* config/arm/arm_mve_builtins.def (vqdmlashq_n_s)
	(vqdmlashq_m_n_s,): New.
	* config/arm/unspecs.md (VQDMLASHQ_N_S, VQDMLASHQ_M_N_S): New
	unspecs.
	* config/arm/iterators.md (VQDMLASHQ_N_S, VQDMLASHQ_M_N_S): New
	attributes.
	(VQDMLASHQ_N): New iterator.
	* config/arm/mve.md (mve_vqdmlashq_n_, mve_vqdmlashq_m_n_s): New
	patterns.

	gcc/tetsuite/
	PR target/96914
	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c: New test.
	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c: New test.
	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c: New test.
	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c: New test.
	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c: New test.
	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c: New test.
---
 gcc/config/arm/arm_mve.h                           | 116 +++++++++++++++++++++
 gcc/config/arm/arm_mve_builtins.def                |   2 +
 gcc/config/arm/iterators.md                        |   3 +
 gcc/config/arm/mve.md                              |  33 ++++++
 gcc/config/arm/unspecs.md                          |   2 +
 .../arm/mve/intrinsics/vqdmlashq_m_n_s16.c         |  23 ++++
 .../arm/mve/intrinsics/vqdmlashq_m_n_s32.c         |  23 ++++
 .../arm/mve/intrinsics/vqdmlashq_m_n_s8.c          |  23 ++++
 .../arm/mve/intrinsics/vqdmlashq_n_s16.c           |  21 ++++
 .../arm/mve/intrinsics/vqdmlashq_n_s32.c           |  21 ++++
 .../gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c |  21 ++++
 11 files changed, 288 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
 create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index d9bfb203..7626ad1 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -141,6 +141,7 @@
 #define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a, __p)
 #define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
 #define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
+#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
 #define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c)
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
@@ -260,6 +261,7 @@
 #define vorrq_m(__inactive, __a, __b, __p) __arm_vorrq_m(__inactive, __a, __b, __p)
 #define vqaddq_m(__inactive, __a, __b, __p) __arm_vqaddq_m(__inactive, __a, __b, __p)
 #define vqdmladhq_m(__inactive, __a, __b, __p) __arm_vqdmladhq_m(__inactive, __a, __b, __p)
+#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, __c, __p)
 #define vqdmladhxq_m(__inactive, __a, __b, __p) __arm_vqdmladhxq_m(__inactive, __a, __b, __p)
 #define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, __p)
 #define vqdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m(__inactive, __a, __b, __p)
@@ -1307,6 +1309,7 @@
 #define vqdmlsdhxq_s8(__inactive, __a, __b) __arm_vqdmlsdhxq_s8(__inactive, __a, __b)
 #define vqdmlsdhq_s8(__inactive, __a, __b) __arm_vqdmlsdhq_s8(__inactive, __a, __b)
 #define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
+#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
 #define vqdmladhxq_s8(__inactive, __a, __b) __arm_vqdmladhxq_s8(__inactive, __a, __b)
 #define vqdmladhq_s8(__inactive, __a, __b) __arm_vqdmladhq_s8(__inactive, __a, __b)
 #define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c)
@@ -1391,6 +1394,7 @@
 #define vqrdmladhq_s16(__inactive, __a, __b) __arm_vqrdmladhq_s16(__inactive, __a, __b)
 #define vqdmlsdhxq_s16(__inactive, __a, __b) __arm_vqdmlsdhxq_s16(__inactive, __a, __b)
 #define vqdmlsdhq_s16(__inactive, __a, __b) __arm_vqdmlsdhq_s16(__inactive, __a, __b)
+#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, __b, __c)
 #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
 #define vqdmladhxq_s16(__inactive, __a, __b) __arm_vqdmladhxq_s16(__inactive, __a, __b)
 #define vqdmladhq_s16(__inactive, __a, __b) __arm_vqdmladhq_s16(__inactive, __a, __b)
@@ -1476,6 +1480,7 @@
 #define vqrdmladhq_s32(__inactive, __a, __b) __arm_vqrdmladhq_s32(__inactive, __a, __b)
 #define vqdmlsdhxq_s32(__inactive, __a, __b) __arm_vqdmlsdhxq_s32(__inactive, __a, __b)
 #define vqdmlsdhq_s32(__inactive, __a, __b) __arm_vqdmlsdhq_s32(__inactive, __a, __b)
+#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, __b, __c)
 #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
 #define vqdmladhxq_s32(__inactive, __a, __b) __arm_vqdmladhxq_s32(__inactive, __a, __b)
 #define vqdmladhq_s32(__inactive, __a, __b) __arm_vqdmladhq_s32(__inactive, __a, __b)
@@ -1902,6 +1907,9 @@
 #define vqdmladhxq_m_s8(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p)
 #define vqdmladhxq_m_s32(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p)
 #define vqdmladhxq_m_s16(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p)
+#define vqdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
@@ -7425,6 +7433,13 @@ __arm_vqrdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
@@ -8020,6 +8035,13 @@ __arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
@@ -8615,6 +8637,13 @@ __arm_vqrdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
@@ -11142,6 +11171,27 @@ __arm_vqrdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
 {
   return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
@@ -24212,6 +24262,13 @@ __arm_vqrdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+ return __arm_vqdmlashq_n_s8 (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
 {
  return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
@@ -24807,6 +24864,13 @@ __arm_vqrdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+ return __arm_vqdmlashq_n_s16 (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
  return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
@@ -25402,6 +25466,13 @@ __arm_vqrdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+ return __arm_vqdmlashq_n_s32 (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
  return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
@@ -27929,6 +28000,27 @@ __arm_vqrdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
 {
  return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p);
@@ -36798,6 +36890,14 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
 
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
 #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -39270,6 +39370,14 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
 
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
 #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -40811,6 +40919,14 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
 
+#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+
 #define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 753e40a..9f3ecfe 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -384,6 +384,7 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmladhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhxq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlahq_n_s, v16qi, v8hi, v4si)
+VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlashq_n_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhxq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmlsdavaxq_s, v16qi, v8hi, v4si)
@@ -574,6 +575,7 @@ VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s, v16qi, v8hi, v4si)
+VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi, v8hi, v4si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7f8c235..0dbf1b2 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1285,6 +1285,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
 		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
 		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u")
+		       (VQDMLASHQ_N_S "s")
 		       (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
 		       (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
 		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
@@ -1326,6 +1327,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u")
 		       (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
 		       (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u")
+		       (VQDMLASHQ_M_N_S "s")
 		       (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
 		       (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u")
 		       (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u")
@@ -1577,6 +1579,7 @@ (define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
 (define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
 (define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
 (define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
+(define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
 (define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U])
 (define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U])
 (define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4322adf..d406ab1 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3678,6 +3678,22 @@ (define_insn "mve_vqdmlahq_n_<supf><mode>"
 ])
 
 ;;
+;; [vqdmlashq_n_s])
+;;
+(define_insn "mve_vqdmlashq_n_<supf><mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+		       (match_operand:MVE_2 2 "s_register_operand" "w")
+		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
+	 VQDMLASHQ_N))
+  ]
+  "TARGET_HAVE_MVE"
+  "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
+  [(set_attr "type" "mve_move")
+])
+
+;;
 ;; [vqnegq_m_s])
 ;;
 (define_insn "mve_vqnegq_m_s<mode>"
@@ -5904,6 +5920,23 @@ (define_insn "mve_vqdmlahq_m_n_s<mode>"
    (set_attr "length""8")])
 
 ;;
+;; [vqdmlashq_m_n_s])
+;;
+(define_insn "mve_vqdmlashq_m_n_s<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+		       (match_operand:MVE_2 2 "s_register_operand" "w")
+		       (match_operand:<V_elem> 3 "s_register_operand" "r")
+		       (match_operand:HI 4 "vpr_register_operand" "Up")]
+	 VQDMLASHQ_M_N_S))
+  ]
+  "TARGET_HAVE_MVE"
+  "vpst\;vqdmlasht.s%#<V_sz_elem>\t%q0, %q2, %3"
+  [(set_attr "type" "mve_move")
+   (set_attr "length""8")])
+
+;;
 ;; [vqrdmlahq_m_n_s])
 ;;
 (define_insn "mve_vqrdmlahq_m_n_s<mode>"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index caee18a..a98ac09 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -877,6 +877,7 @@ (define_c_enum "unspec" [
   VQABSQ_M_S
   VQDMLAHQ_N_S
   VQDMLAHQ_N_U
+  VQDMLASHQ_N_S
   VQNEGQ_M_S
   VQRDMLADHQ_S
   VQRDMLADHXQ_S
@@ -1069,6 +1070,7 @@ (define_c_enum "unspec" [
   VRHADDQ_M_S
   VMULQ_M_S
   VMULQ_M_U
+  VQDMLASHQ_M_N_S
   VQRDMLASHQ_M_N_S
   VRSHLQ_M_S
   VRSHLQ_M_U
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
new file mode 100644
index 0000000..7c2e5cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m_n_s16 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s16"  }  } */
+
+int16x8_t
+foo1 (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
new file mode 100644
index 0000000..cea9d9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m_n_s32 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s32"  }  } */
+
+int32x4_t
+foo1 (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
new file mode 100644
index 0000000..83ee258
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
@@ -0,0 +1,23 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int8x16_t
+foo (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m_n_s8 (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s8"  }  } */
+
+int8x16_t
+foo1 (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p)
+{
+  return vqdmlashq_m (a, b, c, p);
+}
+
+/* { dg-final { scan-assembler "vpst" } } */
+/* { dg-final { scan-assembler "vqdmlasht.s8"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
new file mode 100644
index 0000000..c71a61c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
@@ -0,0 +1,21 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t a, int16x8_t b, int16_t c)
+{
+  return vqdmlashq_n_s16 (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s16"  }  } */
+
+int16x8_t
+foo1 (int16x8_t a, int16x8_t b, int16_t c)
+{
+  return vqdmlashq (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
new file mode 100644
index 0000000..61f6c66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
@@ -0,0 +1,21 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32x4_t a, int32x4_t b, int32_t c)
+{
+  return vqdmlashq_n_s32 (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s32"  }  } */
+
+int32x4_t
+foo1 (int32x4_t a, int32x4_t b, int32_t c)
+{
+  return vqdmlashq (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
new file mode 100644
index 0000000..a078928
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
@@ -0,0 +1,21 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+
+#include "arm_mve.h"
+
+int8x16_t
+foo (int8x16_t a, int8x16_t b, int8_t c)
+{
+  return vqdmlashq_n_s8 (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s8"  }  } */
+
+int8x16_t
+foo1 (int8x16_t a, int8x16_t b, int8_t c)
+{
+  return vqdmlashq (a, b, c);
+}
+
+/* { dg-final { scan-assembler "vqdmlash.s8"  }  } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] arm: [MVE] Remove illegal intrinsics
  2020-10-06 15:59 [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Christophe Lyon
@ 2020-10-06 15:59 ` Christophe Lyon
       [not found]   ` <DB7PR08MB3002AAC33F48C8D8BAA376E7930D0@DB7PR08MB3002.eurprd08.prod.outlook.com>
  2020-10-07 10:34 ` [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Kyrylo Tkachov
  1 sibling, 1 reply; 4+ messages in thread
From: Christophe Lyon @ 2020-10-06 15:59 UTC (permalink / raw)
  To: gcc-patches

A few MVE intrinsics had an unsigned variant implement while they are
supported by the hardware.  This patch removes them:
__arm_vqrdmlashq_n_u8
__arm_vqrdmlahq_n_u8
__arm_vqdmlahq_n_u8
__arm_vqrdmlashq_n_u16
__arm_vqrdmlahq_n_u16
__arm_vqdmlahq_n_u16
__arm_vqrdmlashq_n_u32
__arm_vqrdmlahq_n_u32
__arm_vqdmlahq_n_u32
__arm_vmlaldavaxq_p_u32
__arm_vmlaldavaxq_p_u16

v2: rebased after Srinath's reorganization patch

2020-10-06  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	PR target/96914
	* config/arm/arm_mve.h (vqrdmlashq_n_u8, vqrdmlashq_n_u16)
	(vqrdmlashq_n_u32, vqrdmlahq_n_u8, vqrdmlahq_n_u16)
	(vqrdmlahq_n_u32, vqdmlahq_n_u8, vqdmlahq_n_u16, vqdmlahq_n_u32)
	(vmlaldavaxq_p_u16, vmlaldavaxq_p_u32): Remove.
	* config/arm/arm_mve_builtins.def (vqrdmlashq_n_u, vqrdmlahq_n_u)
	(vqdmlahq_n_u, vmlaldavaxq_p_u): Remove.
	* config/arm/unspecs.md (VQDMLAHQ_N_U, VQRDMLAHQ_N_U)
	(VQRDMLASHQ_N_U)
	(VMLALDAVAXQ_P_U): Remove unspecs.
	* config/arm/iterators.md (VQDMLAHQ_N_U, VQRDMLAHQ_N_U)
	(VQRDMLASHQ_N_U, VMLALDAVAXQ_P_U): Remove attributes.
	(VQDMLAHQ_N, VQRDMLAHQ_N, VQRDMLASHQ_N, VMLALDAVAXQ_P): Remove
	unsigned variants from iterators.
	* config/arm/mve.md (mve_vqdmlahq_n_<supf><mode>)
	(mve_vqrdmlahq_n_<supf><mode>)
	(mve_vqrdmlashq_n_<supf><mode>, mve_vmlaldavaxq_p_<supf><mode>):
	Update comment.

	gcc/testsuite/
	PR target/96914
	* gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c: Remove.
	* gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c: Remove.
	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c: Remove.
---
 gcc/config/arm/arm_mve.h                           | 199 +--------------------
 gcc/config/arm/arm_mve_builtins.def                |   4 -
 gcc/config/arm/iterators.md                        |  16 +-
 gcc/config/arm/mve.md                              |   8 +-
 gcc/config/arm/unspecs.md                          |   4 -
 .../arm/mve/intrinsics/vmlaldavaxq_p_u16.c         |  21 ---
 .../arm/mve/intrinsics/vmlaldavaxq_p_u32.c         |  21 ---
 .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c |  21 ---
 .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c |  21 ---
 .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c  |  21 ---
 .../arm/mve/intrinsics/vqrdmlahq_n_u16.c           |  21 ---
 .../arm/mve/intrinsics/vqrdmlahq_n_u32.c           |  21 ---
 .../gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c |  21 ---
 .../arm/mve/intrinsics/vqrdmlashq_n_u16.c          |  21 ---
 .../arm/mve/intrinsics/vqrdmlashq_n_u32.c          |  21 ---
 .../arm/mve/intrinsics/vqrdmlashq_n_u8.c           |  21 ---
 16 files changed, 19 insertions(+), 443 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
 delete mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c

diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 7626ad1..ccdac67 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -1237,9 +1237,6 @@
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
 #define vrev64q_m_u8(__inactive, __a, __p) __arm_vrev64q_m_u8(__inactive, __a, __p)
-#define vqrdmlashq_n_u8(__a, __b, __c) __arm_vqrdmlashq_n_u8(__a, __b, __c)
-#define vqrdmlahq_n_u8(__a, __b, __c) __arm_vqrdmlahq_n_u8(__a, __b, __c)
-#define vqdmlahq_n_u8(__a, __b, __c) __arm_vqdmlahq_n_u8(__a, __b, __c)
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
@@ -1323,9 +1320,6 @@
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
 #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
 #define vrev64q_m_u16(__inactive, __a, __p) __arm_vrev64q_m_u16(__inactive, __a, __p)
-#define vqrdmlashq_n_u16(__a, __b, __c) __arm_vqrdmlashq_n_u16(__a, __b, __c)
-#define vqrdmlahq_n_u16(__a, __b, __c) __arm_vqrdmlahq_n_u16(__a, __b, __c)
-#define vqdmlahq_n_u16(__a, __b, __c) __arm_vqdmlahq_n_u16(__a, __b, __c)
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
@@ -1409,9 +1403,6 @@
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
 #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
 #define vrev64q_m_u32(__inactive, __a, __p) __arm_vrev64q_m_u32(__inactive, __a, __p)
-#define vqrdmlashq_n_u32(__a, __b, __c) __arm_vqrdmlashq_n_u32(__a, __b, __c)
-#define vqrdmlahq_n_u32(__a, __b, __c) __arm_vqrdmlahq_n_u32(__a, __b, __c)
-#define vqdmlahq_n_u32(__a, __b, __c) __arm_vqdmlahq_n_u32(__a, __b, __c)
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
@@ -2033,8 +2024,6 @@
 #define vmlaldavaq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaq_p_u16(__a, __b, __c, __p)
 #define vmlaldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s32(__a, __b, __c, __p)
 #define vmlaldavaxq_p_s16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s16(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u32(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u16(__a, __b, __c, __p)
 #define vmlsldavaq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaq_p_s32(__a, __b, __c, __p)
 #define vmlsldavaq_p_s16(__a, __b, __c, __p) __arm_vmlsldavaq_p_s16(__a, __b, __c, __p)
 #define vmlsldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaxq_p_s32(__a, __b, __c, __p)
@@ -6970,27 +6959,6 @@ __arm_vrev64q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
@@ -7573,27 +7541,6 @@ __arm_vrev64q_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
@@ -8175,27 +8122,6 @@ __arm_vrev64q_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
@@ -11862,20 +11788,6 @@ __arm_vmlaldavaxq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t
   return __builtin_mve_vmlaldavaxq_p_sv8hi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_uv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlsldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -23800,27 +23712,6 @@ __arm_vrev64q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlashq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u8 (__inactive, __a, __p);
@@ -24402,27 +24293,6 @@ __arm_vrev64q_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlashq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u16 (__inactive, __a, __p);
@@ -25004,27 +24874,6 @@ __arm_vrev64q_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlashq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u32 (__inactive, __a, __p);
@@ -28691,20 +28540,6 @@ __arm_vmlaldavaxq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p
  return __arm_vmlaldavaxq_p_s16 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlsldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -36885,10 +36720,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36904,10 +36736,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36926,10 +36755,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39365,10 +39191,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39384,10 +39207,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39518,10 +39338,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -41184,9 +41001,7 @@ extern void *__ARM_undef;
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_p_s16 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavaxq_p_u16 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavaxq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
 #define __arm_vmlsldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 9f3ecfe..ac92818 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -312,9 +312,6 @@ VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
 VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di)
 VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlashq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlahq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqdmlahq_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaq_n_u, v16qi, v8hi, v4si)
@@ -617,7 +614,6 @@ VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshlq_m_n_s, v16qi, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulltq_poly_m_p, v16qi, v8hi)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmullbq_poly_m_p, v16qi, v8hi)
-VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaxq_p_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaq_p_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrntq_m_n_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrnbq_m_n_u, v8hi, v4si)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0dbf1b2..f934872 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1284,10 +1284,10 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s")
 		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
 		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
-		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u")
+		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
 		       (VQDMLASHQ_N_S "s")
-		       (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
-		       (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
+		       (VQRDMLAHQ_N_S "s")
+		       (VQRDMLASHQ_N_S "s")
 		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
 		       (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s")
 		       (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u")
@@ -1360,7 +1360,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
 		       (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
 		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
-		       (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u")
+		       (VMLALDAVAXQ_P_S "s")
 		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
 		       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
 		       (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
@@ -1578,10 +1578,10 @@ (define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U])
 (define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
 (define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
 (define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
-(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
+(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S])
 (define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
-(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U])
-(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U])
+(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S])
+(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S])
 (define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
 (define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
 (define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
@@ -1663,7 +1663,7 @@ (define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U])
 (define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
 (define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
 (define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S])
-(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U VMLALDAVAXQ_P_S])
+(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_S])
 (define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S])
 (define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U])
 (define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index d406ab1..0d77601 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -3662,7 +3662,7 @@ (define_insn "mve_vqabsq_m_s<mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vqdmlahq_n_s, vqdmlahq_n_u])
+;; [vqdmlahq_n_s])
 ;;
 (define_insn "mve_vqdmlahq_n_<supf><mode>"
   [
@@ -3742,7 +3742,7 @@ (define_insn "mve_vqrdmladhxq_s<mode>"
 ])
 
 ;;
-;; [vqrdmlahq_n_s, vqrdmlahq_n_u])
+;; [vqrdmlahq_n_s])
 ;;
 (define_insn "mve_vqrdmlahq_n_<supf><mode>"
   [
@@ -3758,7 +3758,7 @@ (define_insn "mve_vqrdmlahq_n_<supf><mode>"
 ])
 
 ;;
-;; [vqrdmlashq_n_s, vqrdmlashq_n_u])
+;; [vqrdmlashq_n_s])
 ;;
 (define_insn "mve_vqrdmlashq_n_<supf><mode>"
   [
@@ -6498,7 +6498,7 @@ (define_insn "mve_vmlaldavaq_p_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vmlaldavaxq_p_u, vmlaldavaxq_p_s])
+;; [vmlaldavaxq_p_s])
 ;;
 (define_insn "mve_vmlaldavaxq_p_<supf><mode>"
   [
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a98ac09..a3844e9 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -876,15 +876,12 @@ (define_c_enum "unspec" [
   VPSELQ_U
   VQABSQ_M_S
   VQDMLAHQ_N_S
-  VQDMLAHQ_N_U
   VQDMLASHQ_N_S
   VQNEGQ_M_S
   VQRDMLADHQ_S
   VQRDMLADHXQ_S
   VQRDMLAHQ_N_S
-  VQRDMLAHQ_N_U
   VQRDMLASHQ_N_S
-  VQRDMLASHQ_N_U
   VQRDMLSDHQ_S
   VQRDMLSDHXQ_S
   VQRSHLQ_M_N_S
@@ -1149,7 +1146,6 @@ (define_c_enum "unspec" [
   VQDMLADHQ_M_S
   VMLALDAVAQ_P_U
   VMLALDAVAQ_P_S
-  VMLALDAVAXQ_P_U
   VQRSHRNBQ_M_N_U
   VQRSHRNBQ_M_N_S
   VQRSHRNTQ_M_N_S
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
deleted file mode 100644
index 704d0e7..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint64_t
-foo (uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p)
-{
-  return vmlaldavaxq_p_u16 (a, b, c, p);
-}
-
-/* { dg-final { scan-assembler "vmlaldavaxt.u16"  }  } */
-
-uint64_t
-foo1 (uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p)
-{
-  return vmlaldavaxq_p (a, b, c, p);
-}
-
-/* { dg-final { scan-assembler "vmlaldavaxt.u16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
deleted file mode 100644
index 84dca18..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint64_t
-foo (uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p)
-{
-  return vmlaldavaxq_p_u32 (a, b, c, p);
-}
-
-/* { dg-final { scan-assembler "vmlaldavaxt.u32"  }  } */
-
-uint64_t
-foo1 (uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p)
-{
-  return vmlaldavaxq_p (a, b, c, p);
-}
-
-/* { dg-final { scan-assembler "vmlaldavaxt.u32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
deleted file mode 100644
index 67ebb79..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint16x8_t
-foo (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqdmlahq_n_u16 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s16"  }  } */
-
-uint16x8_t
-foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
deleted file mode 100644
index d82bca1..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint32x4_t
-foo (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqdmlahq_n_u32 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s32"  }  } */
-
-uint32x4_t
-foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
deleted file mode 100644
index 4b4c246..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint8x16_t
-foo (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqdmlahq_n_u8 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s8"  }  } */
-
-uint8x16_t
-foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqdmlah.s8"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
deleted file mode 100644
index 6fe4f77..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint16x8_t
-foo (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqrdmlahq_n_u16 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s16"  }  } */
-
-uint16x8_t
-foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqrdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
deleted file mode 100644
index 8205403..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint32x4_t
-foo (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqrdmlahq_n_u32 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s32"  }  } */
-
-uint32x4_t
-foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqrdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
deleted file mode 100644
index ed5c3a0..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint8x16_t
-foo (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqrdmlahq_n_u8 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s8"  }  } */
-
-uint8x16_t
-foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqrdmlahq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlah.s8"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
deleted file mode 100644
index 6c5f280..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint16x8_t
-foo (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqrdmlashq_n_u16 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s16"  }  } */
-
-uint16x8_t
-foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
-{
-  return vqrdmlashq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
deleted file mode 100644
index daf520d..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint32x4_t
-foo (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqrdmlashq_n_u32 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s32"  }  } */
-
-uint32x4_t
-foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
-{
-  return vqrdmlashq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
deleted file mode 100644
index 59b0e39..0000000
--- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* { dg-require-effective-target arm_v8_1m_mve_ok } */
-/* { dg-add-options arm_v8_1m_mve } */
-/* { dg-additional-options "-O2" } */
-
-#include "arm_mve.h"
-
-uint8x16_t
-foo (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqrdmlashq_n_u8 (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s8"  }  } */
-
-uint8x16_t
-foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
-{
-  return vqrdmlashq (a, b, c);
-}
-
-/* { dg-final { scan-assembler "vqrdmlash.s8"  }  } */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2] arm: [MVE] Remove illegal intrinsics
       [not found]   ` <DB7PR08MB3002AAC33F48C8D8BAA376E7930D0@DB7PR08MB3002.eurprd08.prod.outlook.com>
@ 2020-10-06 16:13     ` Kyrylo Tkachov
  0 siblings, 0 replies; 4+ messages in thread
From: Kyrylo Tkachov @ 2020-10-06 16:13 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches

With gcc-patches on too.
Not sure why the reply-all function fails for your address....
Kyrill

> -----Original Message-----
> From: Kyrylo Tkachov
> Sent: 06 October 2020 17:13
> To: Christophe Lyon <christophe.lyon@linaro.org>
> Subject: RE: [PATCH v2] arm: [MVE] Remove illegal intrinsics
> 
> 
> 
> > -----Original Message-----
> > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> > Christophe Lyon via Gcc-patches
> > Sent: 06 October 2020 16:59
> > To: gcc-patches@gcc.gnu.org
> > Subject: [PATCH v2] arm: [MVE] Remove illegal intrinsics
> >
> > A few MVE intrinsics had an unsigned variant implement while they are
> > supported by the hardware.  This patch removes them:
> > __arm_vqrdmlashq_n_u8
> > __arm_vqrdmlahq_n_u8
> > __arm_vqdmlahq_n_u8
> > __arm_vqrdmlashq_n_u16
> > __arm_vqrdmlahq_n_u16
> > __arm_vqdmlahq_n_u16
> > __arm_vqrdmlashq_n_u32
> > __arm_vqrdmlahq_n_u32
> > __arm_vqdmlahq_n_u32
> > __arm_vmlaldavaxq_p_u32
> > __arm_vmlaldavaxq_p_u16
> >
> > v2: rebased after Srinath's reorganization patch
> 
> Ok.
> Thanks,
> Kyrill
> 
> >
> > 2020-10-06  Christophe Lyon  <christophe.lyon@linaro.org>
> >
> > 	gcc/
> > 	PR target/96914
> > 	* config/arm/arm_mve.h (vqrdmlashq_n_u8, vqrdmlashq_n_u16)
> > 	(vqrdmlashq_n_u32, vqrdmlahq_n_u8, vqrdmlahq_n_u16)
> > 	(vqrdmlahq_n_u32, vqdmlahq_n_u8, vqdmlahq_n_u16,
> > vqdmlahq_n_u32)
> > 	(vmlaldavaxq_p_u16, vmlaldavaxq_p_u32): Remove.
> > 	* config/arm/arm_mve_builtins.def (vqrdmlashq_n_u,
> > vqrdmlahq_n_u)
> > 	(vqdmlahq_n_u, vmlaldavaxq_p_u): Remove.
> > 	* config/arm/unspecs.md (VQDMLAHQ_N_U, VQRDMLAHQ_N_U)
> > 	(VQRDMLASHQ_N_U)
> > 	(VMLALDAVAXQ_P_U): Remove unspecs.
> > 	* config/arm/iterators.md (VQDMLAHQ_N_U, VQRDMLAHQ_N_U)
> > 	(VQRDMLASHQ_N_U, VMLALDAVAXQ_P_U): Remove attributes.
> > 	(VQDMLAHQ_N, VQRDMLAHQ_N, VQRDMLASHQ_N,
> > VMLALDAVAXQ_P): Remove
> > 	unsigned variants from iterators.
> > 	* config/arm/mve.md (mve_vqdmlahq_n_<supf><mode>)
> > 	(mve_vqrdmlahq_n_<supf><mode>)
> > 	(mve_vqrdmlashq_n_<supf><mode>,
> > mve_vmlaldavaxq_p_<supf><mode>):
> > 	Update comment.
> >
> > 	gcc/testsuite/
> > 	PR target/96914
> > 	* gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c: Remove.
> > 	* gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c: Remove.
> > ---
> >  gcc/config/arm/arm_mve.h                           | 199 +--------------------
> >  gcc/config/arm/arm_mve_builtins.def                |   4 -
> >  gcc/config/arm/iterators.md                        |  16 +-
> >  gcc/config/arm/mve.md                              |   8 +-
> >  gcc/config/arm/unspecs.md                          |   4 -
> >  .../arm/mve/intrinsics/vmlaldavaxq_p_u16.c         |  21 ---
> >  .../arm/mve/intrinsics/vmlaldavaxq_p_u32.c         |  21 ---
> >  .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c |  21 ---
> >  .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c |  21 ---
> >  .../gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c  |  21 ---
> >  .../arm/mve/intrinsics/vqrdmlahq_n_u16.c           |  21 ---
> >  .../arm/mve/intrinsics/vqrdmlahq_n_u32.c           |  21 ---
> >  .../gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c |  21 ---
> >  .../arm/mve/intrinsics/vqrdmlashq_n_u16.c          |  21 ---
> >  .../arm/mve/intrinsics/vqrdmlashq_n_u32.c          |  21 ---
> >  .../arm/mve/intrinsics/vqrdmlashq_n_u8.c           |  21 ---
> >  16 files changed, 19 insertions(+), 443 deletions(-)
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
> >  delete mode 100644
> > gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
> >
> > diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> > index 7626ad1..ccdac67 100644
> > --- a/gcc/config/arm/arm_mve.h
> > +++ b/gcc/config/arm/arm_mve.h
> > @@ -1237,9 +1237,6 @@
> >  #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
> >  #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
> >  #define vrev64q_m_u8(__inactive, __a, __p)
> > __arm_vrev64q_m_u8(__inactive, __a, __p)
> > -#define vqrdmlashq_n_u8(__a, __b, __c) __arm_vqrdmlashq_n_u8(__a,
> __b,
> > __c)
> > -#define vqrdmlahq_n_u8(__a, __b, __c) __arm_vqrdmlahq_n_u8(__a, __b,
> > __c)
> > -#define vqdmlahq_n_u8(__a, __b, __c) __arm_vqdmlahq_n_u8(__a, __b,
> > __c)
> >  #define vmvnq_m_u8(__inactive, __a, __p)
> __arm_vmvnq_m_u8(__inactive,
> > __a, __p)
> >  #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
> >  #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
> > @@ -1323,9 +1320,6 @@
> >  #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
> >  #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
> >  #define vrev64q_m_u16(__inactive, __a, __p)
> > __arm_vrev64q_m_u16(__inactive, __a, __p)
> > -#define vqrdmlashq_n_u16(__a, __b, __c) __arm_vqrdmlashq_n_u16(__a,
> > __b, __c)
> > -#define vqrdmlahq_n_u16(__a, __b, __c) __arm_vqrdmlahq_n_u16(__a,
> __b,
> > __c)
> > -#define vqdmlahq_n_u16(__a, __b, __c) __arm_vqdmlahq_n_u16(__a,
> __b,
> > __c)
> >  #define vmvnq_m_u16(__inactive, __a, __p)
> > __arm_vmvnq_m_u16(__inactive, __a, __p)
> >  #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
> >  #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
> > @@ -1409,9 +1403,6 @@
> >  #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
> >  #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
> >  #define vrev64q_m_u32(__inactive, __a, __p)
> > __arm_vrev64q_m_u32(__inactive, __a, __p)
> > -#define vqrdmlashq_n_u32(__a, __b, __c) __arm_vqrdmlashq_n_u32(__a,
> > __b, __c)
> > -#define vqrdmlahq_n_u32(__a, __b, __c) __arm_vqrdmlahq_n_u32(__a,
> __b,
> > __c)
> > -#define vqdmlahq_n_u32(__a, __b, __c) __arm_vqdmlahq_n_u32(__a,
> __b,
> > __c)
> >  #define vmvnq_m_u32(__inactive, __a, __p)
> > __arm_vmvnq_m_u32(__inactive, __a, __p)
> >  #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
> >  #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
> > @@ -2033,8 +2024,6 @@
> >  #define vmlaldavaq_p_u16(__a, __b, __c, __p)
> > __arm_vmlaldavaq_p_u16(__a, __b, __c, __p)
> >  #define vmlaldavaxq_p_s32(__a, __b, __c, __p)
> > __arm_vmlaldavaxq_p_s32(__a, __b, __c, __p)
> >  #define vmlaldavaxq_p_s16(__a, __b, __c, __p)
> > __arm_vmlaldavaxq_p_s16(__a, __b, __c, __p)
> > -#define vmlaldavaxq_p_u32(__a, __b, __c, __p)
> > __arm_vmlaldavaxq_p_u32(__a, __b, __c, __p)
> > -#define vmlaldavaxq_p_u16(__a, __b, __c, __p)
> > __arm_vmlaldavaxq_p_u16(__a, __b, __c, __p)
> >  #define vmlsldavaq_p_s32(__a, __b, __c, __p)
> > __arm_vmlsldavaq_p_s32(__a, __b, __c, __p)
> >  #define vmlsldavaq_p_s16(__a, __b, __c, __p)
> > __arm_vmlsldavaq_p_s16(__a, __b, __c, __p)
> >  #define vmlsldavaxq_p_s32(__a, __b, __c, __p)
> > __arm_vmlsldavaxq_p_s32(__a, __b, __c, __p)
> > @@ -6970,27 +6959,6 @@ __arm_vrev64q_m_u8 (uint8x16_t __inactive,
> > uint8x16_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint8x16_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlashq_n_uv16qi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlahq_n_uv16qi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > -  return __builtin_mve_vqdmlahq_n_uv16qi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a,
> mve_pred16_t
> > __p)
> >  {
> >    return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
> > @@ -7573,27 +7541,6 @@ __arm_vrev64q_m_u16 (uint16x8_t __inactive,
> > uint16x8_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint16x8_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlashq_n_uv8hi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlahq_n_uv8hi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > -  return __builtin_mve_vqdmlahq_n_uv8hi (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a,
> > mve_pred16_t __p)
> >  {
> >    return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
> > @@ -8175,27 +8122,6 @@ __arm_vrev64q_m_u32 (uint32x4_t __inactive,
> > uint32x4_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint32x4_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlashq_n_uv4si (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > -  return __builtin_mve_vqrdmlahq_n_uv4si (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > -  return __builtin_mve_vqdmlahq_n_uv4si (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a,
> > mve_pred16_t __p)
> >  {
> >    return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
> > @@ -11862,20 +11788,6 @@ __arm_vmlaldavaxq_p_s16 (int64_t __a,
> > int16x8_t __b, int16x8_t __c, mve_pred16_t
> >    return __builtin_mve_vmlaldavaxq_p_sv8hi (__a, __b, __c, __p);
> >  }
> >
> > -__extension__ extern __inline uint64_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vmlaldavaxq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c,
> > mve_pred16_t __p)
> > -{
> > -  return __builtin_mve_vmlaldavaxq_p_uv4si (__a, __b, __c, __p);
> > -}
> > -
> > -__extension__ extern __inline uint64_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vmlaldavaxq_p_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c,
> > mve_pred16_t __p)
> > -{
> > -  return __builtin_mve_vmlaldavaxq_p_uv8hi (__a, __b, __c, __p);
> > -}
> > -
> >  __extension__ extern __inline int64_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmlsldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c,
> > mve_pred16_t __p)
> > @@ -23800,27 +23712,6 @@ __arm_vrev64q_m (uint8x16_t __inactive,
> > uint8x16_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint8x16_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > - return __arm_vqrdmlashq_n_u8 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > - return __arm_vqrdmlahq_n_u8 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
> > -{
> > - return __arm_vqdmlahq_n_u8 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint8x16_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t
> __p)
> >  {
> >   return __arm_vmvnq_m_u8 (__inactive, __a, __p);
> > @@ -24402,27 +24293,6 @@ __arm_vrev64q_m (uint16x8_t __inactive,
> > uint16x8_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint16x8_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > - return __arm_vqrdmlashq_n_u16 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > - return __arm_vqrdmlahq_n_u16 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
> > -{
> > - return __arm_vqdmlahq_n_u16 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint16x8_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t
> __p)
> >  {
> >   return __arm_vmvnq_m_u16 (__inactive, __a, __p);
> > @@ -25004,27 +24874,6 @@ __arm_vrev64q_m (uint32x4_t __inactive,
> > uint32x4_t __a, mve_pred16_t __p)
> >
> >  __extension__ extern __inline uint32x4_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlashq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > - return __arm_vqrdmlashq_n_u32 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqrdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > - return __arm_vqrdmlahq_n_u32 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vqdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
> > -{
> > - return __arm_vqdmlahq_n_u32 (__a, __b, __c);
> > -}
> > -
> > -__extension__ extern __inline uint32x4_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t
> __p)
> >  {
> >   return __arm_vmvnq_m_u32 (__inactive, __a, __p);
> > @@ -28691,20 +28540,6 @@ __arm_vmlaldavaxq_p (int64_t __a,
> int16x8_t
> > __b, int16x8_t __c, mve_pred16_t __p
> >   return __arm_vmlaldavaxq_p_s16 (__a, __b, __c, __p);
> >  }
> >
> > -__extension__ extern __inline uint64_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vmlaldavaxq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c,
> > mve_pred16_t __p)
> > -{
> > - return __arm_vmlaldavaxq_p_u32 (__a, __b, __c, __p);
> > -}
> > -
> > -__extension__ extern __inline uint64_t
> > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> > -__arm_vmlaldavaxq_p (uint64_t __a, uint16x8_t __b, uint16x8_t __c,
> > mve_pred16_t __p)
> > -{
> > - return __arm_vmlaldavaxq_p_u16 (__a, __b, __c, __p);
> > -}
> > -
> >  __extension__ extern __inline int64_t
> >  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >  __arm_vmlsldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c,
> > mve_pred16_t __p)
> > @@ -36885,10 +36720,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +	    int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -36904,10 +36736,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -36926,10 +36755,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -39365,10 +39191,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -39384,10 +39207,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -39518,10 +39338,7 @@ extern void *__ARM_undef;
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0,
> > int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),
> __ARM_mve_coerce(__p2,
> > int8_t)), \
> >    int
> >
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0,
> > int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),
> __ARM_mve_coerce(__p2,
> > int16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0,
> > uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),
> > __ARM_mve_coerce(__p2, uint8_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0,
> > uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> > __ARM_mve_coerce(__p2, uint16_t)), \
> > -  int
> >
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> > mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0,
> > uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> > __ARM_mve_coerce(__p2, uint32_t)));})
> > +  int
> >
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> > e_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0,
> > int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),
> __ARM_mve_coerce(__p2,
> > int32_t)));})
> >
> >  #define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > @@ -41184,9 +41001,7 @@ extern void *__ARM_undef;
> >    __typeof(p2) __p2 = (p2); \
> >    _Generic( (int
> >
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> > eid(__p2)])0, \
> >    int
> >
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_ty
> > pe_int16x8_t]: __arm_vmlaldavaxq_p_s16 (__ARM_mve_coerce(__p0,
> > int64_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> > int16x8_t), p3), \
> > -  int
> >
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_ty
> > pe_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0,
> > int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> > int32x4_t), p3), \
> > -  int
> >
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_t
> > ype_uint16x8_t]: __arm_vmlaldavaxq_p_u16 (__ARM_mve_coerce(__p0,
> > uint64_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2,
> > uint16x8_t), p3), \
> > -  int
> >
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_t
> > ype_uint32x4_t]: __arm_vmlaldavaxq_p_u32 (__ARM_mve_coerce(__p0,
> > uint64_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2,
> > uint32x4_t), p3));})
> > +  int
> >
> (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_ty
> > pe_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0,
> > int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> > int32x4_t), p3));})
> >
> >  #define __arm_vmlsldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> >    __typeof(p1) __p1 = (p1); \
> > diff --git a/gcc/config/arm/arm_mve_builtins.def
> > b/gcc/config/arm/arm_mve_builtins.def
> > index 9f3ecfe..ac92818 100644
> > --- a/gcc/config/arm/arm_mve_builtins.def
> > +++ b/gcc/config/arm/arm_mve_builtins.def
> > @@ -312,9 +312,6 @@ VAR3 (TERNOP_NONE_NONE_UNONE_IMM,
> > vshlcq_vec_s, v16qi, v8hi, v4si)
> >  VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi,
> > v4si, v2di)
> >  VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si,
> > v2di)
> >  VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi,
> > v8hi, v4si)
> > -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlashq_n_u,
> v16qi,
> > v8hi, v4si)
> > -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlahq_n_u, v16qi,
> > v8hi, v4si)
> > -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqdmlahq_n_u, v16qi,
> > v8hi, v4si)
> >  VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi,
> v8hi,
> > v4si)
> >  VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi,
> v8hi,
> > v4si)
> >  VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaq_n_u, v16qi,
> v8hi,
> > v4si)
> > @@ -617,7 +614,6 @@ VAR3
> (QUADOP_NONE_NONE_NONE_IMM_UNONE,
> > vrshrq_m_n_s, v16qi, v8hi, v4si)
> >  VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshlq_m_n_s, v16qi,
> > v8hi, v4si)
> >  VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE,
> > vmulltq_poly_m_p, v16qi, v8hi)
> >  VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE,
> > vmullbq_poly_m_p, v16qi, v8hi)
> > -VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE,
> > vmlaldavaxq_p_u, v8hi, v4si)
> >  VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE,
> > vmlaldavaq_p_u, v8hi, v4si)
> >  VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrntq_m_n_u,
> > v8hi, v4si)
> >  VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE,
> vshrnbq_m_n_u,
> > v8hi, v4si)
> > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> > index 0dbf1b2..f934872 100644
> > --- a/gcc/config/arm/iterators.md
> > +++ b/gcc/config/arm/iterators.md
> > @@ -1284,10 +1284,10 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> >  		       (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u")
> > (VMLAQ_N_S "s")
> >  		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
> >  		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
> > -		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
> > (VQDMLAHQ_N_U "u")
> > +		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
> >  		       (VQDMLASHQ_N_S "s")
> > -		       (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
> > -		       (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
> > +		       (VQRDMLAHQ_N_S "s")
> > +		       (VQRDMLASHQ_N_S "s")
> >  		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
> >  		       (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S
> > "s")
> >  		       (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U
> > "u")
> > @@ -1360,7 +1360,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> >  		       (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
> >  		       (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
> >  		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
> > -		       (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u")
> > +		       (VMLALDAVAXQ_P_S "s")
> >  		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
> >  		       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
> >  		       (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
> > @@ -1578,10 +1578,10 @@ (define_int_iterator VMLAQ_N [VMLAQ_N_S
> > VMLAQ_N_U])
> >  (define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
> >  (define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
> >  (define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
> > -(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
> > +(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S])
> >  (define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
> > -(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S
> > VQRDMLAHQ_N_U])
> > -(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S
> > VQRDMLASHQ_N_U])
> > +(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S])
> > +(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S])
> >  (define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S
> VQRSHLQ_M_N_U])
> >  (define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
> >  (define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
> > @@ -1663,7 +1663,7 @@ (define_int_iterator VABDQ_M [VABDQ_M_S
> > VABDQ_M_U])
> >  (define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
> >  (define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
> >  (define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U
> > VMLALDAVAQ_P_S])
> > -(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U
> > VMLALDAVAXQ_P_S])
> > +(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_S])
> >  (define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U
> > VQRSHRNBQ_M_N_S])
> >  (define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S
> > VQRSHRNTQ_M_N_U])
> >  (define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U
> > VQSHRNBQ_M_N_S])
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index d406ab1..0d77601 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -3662,7 +3662,7 @@ (define_insn "mve_vqabsq_m_s<mode>"
> >     (set_attr "length""8")])
> >
> >  ;;
> > -;; [vqdmlahq_n_s, vqdmlahq_n_u])
> > +;; [vqdmlahq_n_s])
> >  ;;
> >  (define_insn "mve_vqdmlahq_n_<supf><mode>"
> >    [
> > @@ -3742,7 +3742,7 @@ (define_insn "mve_vqrdmladhxq_s<mode>"
> >  ])
> >
> >  ;;
> > -;; [vqrdmlahq_n_s, vqrdmlahq_n_u])
> > +;; [vqrdmlahq_n_s])
> >  ;;
> >  (define_insn "mve_vqrdmlahq_n_<supf><mode>"
> >    [
> > @@ -3758,7 +3758,7 @@ (define_insn
> "mve_vqrdmlahq_n_<supf><mode>"
> >  ])
> >
> >  ;;
> > -;; [vqrdmlashq_n_s, vqrdmlashq_n_u])
> > +;; [vqrdmlashq_n_s])
> >  ;;
> >  (define_insn "mve_vqrdmlashq_n_<supf><mode>"
> >    [
> > @@ -6498,7 +6498,7 @@ (define_insn
> "mve_vmlaldavaq_p_<supf><mode>"
> >     (set_attr "length""8")])
> >
> >  ;;
> > -;; [vmlaldavaxq_p_u, vmlaldavaxq_p_s])
> > +;; [vmlaldavaxq_p_s])
> >  ;;
> >  (define_insn "mve_vmlaldavaxq_p_<supf><mode>"
> >    [
> > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > index a98ac09..a3844e9 100644
> > --- a/gcc/config/arm/unspecs.md
> > +++ b/gcc/config/arm/unspecs.md
> > @@ -876,15 +876,12 @@ (define_c_enum "unspec" [
> >    VPSELQ_U
> >    VQABSQ_M_S
> >    VQDMLAHQ_N_S
> > -  VQDMLAHQ_N_U
> >    VQDMLASHQ_N_S
> >    VQNEGQ_M_S
> >    VQRDMLADHQ_S
> >    VQRDMLADHXQ_S
> >    VQRDMLAHQ_N_S
> > -  VQRDMLAHQ_N_U
> >    VQRDMLASHQ_N_S
> > -  VQRDMLASHQ_N_U
> >    VQRDMLSDHQ_S
> >    VQRDMLSDHXQ_S
> >    VQRSHLQ_M_N_S
> > @@ -1149,7 +1146,6 @@ (define_c_enum "unspec" [
> >    VQDMLADHQ_M_S
> >    VMLALDAVAQ_P_U
> >    VMLALDAVAQ_P_S
> > -  VMLALDAVAXQ_P_U
> >    VQRSHRNBQ_M_N_U
> >    VQRSHRNBQ_M_N_S
> >    VQRSHRNTQ_M_N_S
> > diff --git
> > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
> > deleted file mode 100644
> > index 704d0e7..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u16.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint64_t
> > -foo (uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p)
> > -{
> > -  return vmlaldavaxq_p_u16 (a, b, c, p);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vmlaldavaxt.u16"  }  } */
> > -
> > -uint64_t
> > -foo1 (uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p)
> > -{
> > -  return vmlaldavaxq_p (a, b, c, p);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vmlaldavaxt.u16"  }  } */
> > diff --git
> > a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
> > deleted file mode 100644
> > index 84dca18..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmlaldavaxq_p_u32.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint64_t
> > -foo (uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p)
> > -{
> > -  return vmlaldavaxq_p_u32 (a, b, c, p);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vmlaldavaxt.u32"  }  } */
> > -
> > -uint64_t
> > -foo1 (uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p)
> > -{
> > -  return vmlaldavaxq_p (a, b, c, p);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vmlaldavaxt.u32"  }  } */
> > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
> > deleted file mode 100644
> > index 67ebb79..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u16.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint16x8_t
> > -foo (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqdmlahq_n_u16 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s16"  }  } */
> > -
> > -uint16x8_t
> > -foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s16"  }  } */
> > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
> > deleted file mode 100644
> > index d82bca1..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u32.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint32x4_t
> > -foo (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqdmlahq_n_u32 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s32"  }  } */
> > -
> > -uint32x4_t
> > -foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s32"  }  } */
> > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
> > deleted file mode 100644
> > index 4b4c246..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlahq_n_u8.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint8x16_t
> > -foo (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqdmlahq_n_u8 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s8"  }  } */
> > -
> > -uint8x16_t
> > -foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqdmlah.s8"  }  } */
> > diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
> > deleted file mode 100644
> > index 6fe4f77..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u16.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint16x8_t
> > -foo (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqrdmlahq_n_u16 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s16"  }  } */
> > -
> > -uint16x8_t
> > -foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqrdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s16"  }  } */
> > diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
> > deleted file mode 100644
> > index 8205403..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u32.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint32x4_t
> > -foo (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqrdmlahq_n_u32 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s32"  }  } */
> > -
> > -uint32x4_t
> > -foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqrdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s32"  }  } */
> > diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
> > deleted file mode 100644
> > index ed5c3a0..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlahq_n_u8.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint8x16_t
> > -foo (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqrdmlahq_n_u8 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s8"  }  } */
> > -
> > -uint8x16_t
> > -foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqrdmlahq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlah.s8"  }  } */
> > diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
> > deleted file mode 100644
> > index 6c5f280..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u16.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint16x8_t
> > -foo (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqrdmlashq_n_u16 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s16"  }  } */
> > -
> > -uint16x8_t
> > -foo1 (uint16x8_t a, uint16x8_t b, uint16_t c)
> > -{
> > -  return vqrdmlashq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s16"  }  } */
> > diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
> > deleted file mode 100644
> > index daf520d..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u32.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint32x4_t
> > -foo (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqrdmlashq_n_u32 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s32"  }  } */
> > -
> > -uint32x4_t
> > -foo1 (uint32x4_t a, uint32x4_t b, uint32_t c)
> > -{
> > -  return vqrdmlashq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s32"  }  } */
> > diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
> > b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
> > deleted file mode 100644
> > index 59b0e39..0000000
> > --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqrdmlashq_n_u8.c
> > +++ /dev/null
> > @@ -1,21 +0,0 @@
> > -/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > -/* { dg-add-options arm_v8_1m_mve } */
> > -/* { dg-additional-options "-O2" } */
> > -
> > -#include "arm_mve.h"
> > -
> > -uint8x16_t
> > -foo (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqrdmlashq_n_u8 (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s8"  }  } */
> > -
> > -uint8x16_t
> > -foo1 (uint8x16_t a, uint8x16_t b, uint8_t c)
> > -{
> > -  return vqrdmlashq (a, b, c);
> > -}
> > -
> > -/* { dg-final { scan-assembler "vqrdmlash.s8"  }  } */
> > --
> > 2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics
  2020-10-06 15:59 [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Christophe Lyon
  2020-10-06 15:59 ` [PATCH v2] arm: [MVE] Remove illegal intrinsics Christophe Lyon
@ 2020-10-07 10:34 ` Kyrylo Tkachov
  1 sibling, 0 replies; 4+ messages in thread
From: Kyrylo Tkachov @ 2020-10-07 10:34 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches

Hi Christophe

> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 06 October 2020 16:59
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics
> 
> This patch adds:
> vqdmlashq_m_n_s16
> vqdmlashq_m_n_s32
> vqdmlashq_m_n_s8
> vqdmlashq_n_s16
> vqdmlashq_n_s32
> vqdmlashq_n_s8
> 
> v2: rebased after Srinath's reorganization patch
> 

Ok.
Thanks,
Kyrill

> 2020-10-05  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	PR target/96914
> 	* config/arm/arm_mve.h (vqdmlashq, vqdmlashq_m): Define.
> 	* config/arm/arm_mve_builtins.def (vqdmlashq_n_s)
> 	(vqdmlashq_m_n_s,): New.
> 	* config/arm/unspecs.md (VQDMLASHQ_N_S, VQDMLASHQ_M_N_S):
> New
> 	unspecs.
> 	* config/arm/iterators.md (VQDMLASHQ_N_S,
> VQDMLASHQ_M_N_S): New
> 	attributes.
> 	(VQDMLASHQ_N): New iterator.
> 	* config/arm/mve.md (mve_vqdmlashq_n_, mve_vqdmlashq_m_n_s):
> New
> 	patterns.
> 
> 	gcc/tetsuite/
> 	PR target/96914
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c: New test.
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c: New test.
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c: New test.
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c: New test.
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c: New test.
> 	* gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c: New test.
> ---
>  gcc/config/arm/arm_mve.h                           | 116 +++++++++++++++++++++
>  gcc/config/arm/arm_mve_builtins.def                |   2 +
>  gcc/config/arm/iterators.md                        |   3 +
>  gcc/config/arm/mve.md                              |  33 ++++++
>  gcc/config/arm/unspecs.md                          |   2 +
>  .../arm/mve/intrinsics/vqdmlashq_m_n_s16.c         |  23 ++++
>  .../arm/mve/intrinsics/vqdmlashq_m_n_s32.c         |  23 ++++
>  .../arm/mve/intrinsics/vqdmlashq_m_n_s8.c          |  23 ++++
>  .../arm/mve/intrinsics/vqdmlashq_n_s16.c           |  21 ++++
>  .../arm/mve/intrinsics/vqdmlashq_n_s32.c           |  21 ++++
>  .../gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c |  21 ++++
>  11 files changed, 288 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
>  create mode 100644
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
> 
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index d9bfb203..7626ad1 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -141,6 +141,7 @@
>  #define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a,
> __p)
>  #define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
>  #define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
> +#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
>  #define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c)
>  #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a,
> __p)
>  #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
> @@ -260,6 +261,7 @@
>  #define vorrq_m(__inactive, __a, __b, __p) __arm_vorrq_m(__inactive, __a,
> __b, __p)
>  #define vqaddq_m(__inactive, __a, __b, __p) __arm_vqaddq_m(__inactive,
> __a, __b, __p)
>  #define vqdmladhq_m(__inactive, __a, __b, __p)
> __arm_vqdmladhq_m(__inactive, __a, __b, __p)
> +#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b,
> __c, __p)
>  #define vqdmladhxq_m(__inactive, __a, __b, __p)
> __arm_vqdmladhxq_m(__inactive, __a, __b, __p)
>  #define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c,
> __p)
>  #define vqdmlsdhq_m(__inactive, __a, __b, __p)
> __arm_vqdmlsdhq_m(__inactive, __a, __b, __p)
> @@ -1307,6 +1309,7 @@
>  #define vqdmlsdhxq_s8(__inactive, __a, __b)
> __arm_vqdmlsdhxq_s8(__inactive, __a, __b)
>  #define vqdmlsdhq_s8(__inactive, __a, __b)
> __arm_vqdmlsdhq_s8(__inactive, __a, __b)
>  #define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
> +#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b,
> __c)
>  #define vqdmladhxq_s8(__inactive, __a, __b)
> __arm_vqdmladhxq_s8(__inactive, __a, __b)
>  #define vqdmladhq_s8(__inactive, __a, __b)
> __arm_vqdmladhq_s8(__inactive, __a, __b)
>  #define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c)
> @@ -1391,6 +1394,7 @@
>  #define vqrdmladhq_s16(__inactive, __a, __b)
> __arm_vqrdmladhq_s16(__inactive, __a, __b)
>  #define vqdmlsdhxq_s16(__inactive, __a, __b)
> __arm_vqdmlsdhxq_s16(__inactive, __a, __b)
>  #define vqdmlsdhq_s16(__inactive, __a, __b)
> __arm_vqdmlsdhq_s16(__inactive, __a, __b)
> +#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a,
> __b, __c)
>  #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b,
> __c)
>  #define vqdmladhxq_s16(__inactive, __a, __b)
> __arm_vqdmladhxq_s16(__inactive, __a, __b)
>  #define vqdmladhq_s16(__inactive, __a, __b)
> __arm_vqdmladhq_s16(__inactive, __a, __b)
> @@ -1476,6 +1480,7 @@
>  #define vqrdmladhq_s32(__inactive, __a, __b)
> __arm_vqrdmladhq_s32(__inactive, __a, __b)
>  #define vqdmlsdhxq_s32(__inactive, __a, __b)
> __arm_vqdmlsdhxq_s32(__inactive, __a, __b)
>  #define vqdmlsdhq_s32(__inactive, __a, __b)
> __arm_vqdmlsdhq_s32(__inactive, __a, __b)
> +#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a,
> __b, __c)
>  #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b,
> __c)
>  #define vqdmladhxq_s32(__inactive, __a, __b)
> __arm_vqdmladhxq_s32(__inactive, __a, __b)
>  #define vqdmladhq_s32(__inactive, __a, __b)
> __arm_vqdmladhq_s32(__inactive, __a, __b)
> @@ -1902,6 +1907,9 @@
>  #define vqdmladhxq_m_s8(__inactive, __a, __b, __p)
> __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p)
>  #define vqdmladhxq_m_s32(__inactive, __a, __b, __p)
> __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p)
>  #define vqdmladhxq_m_s16(__inactive, __a, __b, __p)
> __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p)
> +#define vqdmlashq_m_n_s8(__a, __b, __c, __p)
> __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
> +#define vqdmlashq_m_n_s32(__a, __b, __c, __p)
> __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
> +#define vqdmlashq_m_n_s16(__a, __b, __c, __p)
> __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
>  #define vqdmlahq_m_n_s8(__a, __b, __c, __p)
> __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
>  #define vqdmlahq_m_n_s32(__a, __b, __c, __p)
> __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
>  #define vqdmlahq_m_n_s16(__a, __b, __c, __p)
> __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
> @@ -7425,6 +7433,13 @@ __arm_vqrdmlashq_n_s8 (int8x16_t __a,
> int8x16_t __b, int8_t __c)
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
> +{
> +  return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
>  {
>    return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
> @@ -8020,6 +8035,13 @@ __arm_vqrdmlashq_n_s16 (int16x8_t __a,
> int16x8_t __b, int16_t __c)
> 
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
> +{
> +  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
>  {
>    return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
> @@ -8615,6 +8637,13 @@ __arm_vqrdmlashq_n_s32 (int32x4_t __a,
> int32x4_t __b, int32_t __c)
> 
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
> +{
> +  return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
>  {
>    return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
> @@ -11142,6 +11171,27 @@ __arm_vqrdmlashq_m_n_s16 (int16x8_t __a,
> int16x8_t __b, int16_t __c, mve_pred16_
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c,
> mve_pred16_t __p)
> +{
> +  return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c,
> mve_pred16_t __p)
> +{
> +  return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c,
> mve_pred16_t __p)
> +{
> +  return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t
> __b, mve_pred16_t __p)
>  {
>    return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
> @@ -24212,6 +24262,13 @@ __arm_vqrdmlashq (int8x16_t __a, int8x16_t
> __b, int8_t __c)
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
> +{
> + return __arm_vqdmlashq_n_s8 (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
>  {
>   return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
> @@ -24807,6 +24864,13 @@ __arm_vqrdmlashq (int16x8_t __a, int16x8_t
> __b, int16_t __c)
> 
>  __extension__ extern __inline int16x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
> +{
> + return __arm_vqdmlashq_n_s16 (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
>  {
>   return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
> @@ -25402,6 +25466,13 @@ __arm_vqrdmlashq (int32x4_t __a, int32x4_t
> __b, int32_t __c)
> 
>  __extension__ extern __inline int32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
> +{
> + return __arm_vqdmlashq_n_s32 (__a, __b, __c);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
>  {
>   return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
> @@ -27929,6 +28000,27 @@ __arm_vqrdmlashq_m (int16x8_t __a,
> int16x8_t __b, int16_t __c, mve_pred16_t __p)
> 
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c,
> mve_pred16_t __p)
> +{
> + return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c,
> mve_pred16_t __p)
> +{
> + return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c,
> mve_pred16_t __p)
> +{
> + return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
> +}
> +
> +__extension__ extern __inline int8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  __arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b,
> mve_pred16_t __p)
>  {
>   return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p);
> @@ -36798,6 +36890,14 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16_t)), \
>    int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32_t)));})
> 
> +#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> +  __typeof(p1) __p1 = (p1); \
> +  __typeof(p2) __p2 = (p2); \
> +  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> +  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8_t)), \
> +  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16_t)), \
> +	    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32_t)));})
> +
>  #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -39270,6 +39370,14 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_
> mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0,
> uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),
> __ARM_mve_coerce(__p2, uint16_t)), \
>    int
> (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_
> mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0,
> uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),
> __ARM_mve_coerce(__p2, uint32_t)));})
> 
> +#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> +  __typeof(p1) __p1 = (p1); \
> +  __typeof(p2) __p2 = (p2); \
> +  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> +  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8_t)), \
> +  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16_t)), \
> +  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32_t)));})
> +
>  #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> @@ -40811,6 +40919,14 @@ extern void *__ARM_undef;
>    int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> e_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16_t), p3), \
>    int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> e_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32_t), p3));})
> 
> +#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> +  __typeof(p1) __p1 = (p1); \
> +  __typeof(p2) __p2 = (p2); \
> +  _Generic( (int
> (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typ
> eid(__p2)])0, \
> +  int
> (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0,
> int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2,
> int8_t), p3), \
> +  int
> (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0,
> int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2,
> int16_t), p3), \
> +  int
> (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mv
> e_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0,
> int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2,
> int32_t), p3));})
> +
>  #define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
>    __typeof(p1) __p1 = (p1); \
>    __typeof(p2) __p2 = (p2); \
> diff --git a/gcc/config/arm/arm_mve_builtins.def
> b/gcc/config/arm/arm_mve_builtins.def
> index 753e40a..9f3ecfe 100644
> --- a/gcc/config/arm/arm_mve_builtins.def
> +++ b/gcc/config/arm/arm_mve_builtins.def
> @@ -384,6 +384,7 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE,
> vqrdmladhq_s, v16qi, v8hi, v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhxq_s, v16qi, v8hi,
> v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhq_s, v16qi, v8hi, v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlahq_n_s, v16qi, v8hi,
> v4si)
> +VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlashq_n_s, v16qi, v8hi,
> v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhxq_s, v16qi, v8hi,
> v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhq_s, v16qi, v8hi, v4si)
>  VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmlsdavaxq_s, v16qi, v8hi, v4si)
> @@ -574,6 +575,7 @@ VAR3
> (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi,
> v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s,
> v16qi, v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s,
> v16qi, v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s,
> v16qi, v8hi, v4si)
> +VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s,
> v16qi, v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s,
> v16qi, v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s,
> v16qi, v8hi, v4si)
>  VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi,
> v8hi, v4si)
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 7f8c235..0dbf1b2 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -1285,6 +1285,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
>  		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
>  		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
> (VQDMLAHQ_N_U "u")
> +		       (VQDMLASHQ_N_S "s")
>  		       (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
>  		       (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
>  		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
> @@ -1326,6 +1327,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U
> "u")
>  		       (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
>  		       (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u")
> (VQSUBQ_M_U "u")
> +		       (VQDMLASHQ_M_N_S "s")
>  		       (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
>  		       (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s")
> (VRHADDQ_M_U "u")
>  		       (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u")
> (VHSUBQ_M_U "u")
> @@ -1577,6 +1579,7 @@ (define_int_iterator VMLASQ_N [VMLASQ_N_S
> VMLASQ_N_U])
>  (define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
>  (define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
>  (define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
> +(define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
>  (define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S
> VQRDMLAHQ_N_U])
>  (define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S
> VQRDMLASHQ_N_U])
>  (define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 4322adf..d406ab1 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -3678,6 +3678,22 @@ (define_insn "mve_vqdmlahq_n_<supf><mode>"
>  ])
> 
>  ;;
> +;; [vqdmlashq_n_s])
> +;;
> +(define_insn "mve_vqdmlashq_n_<supf><mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> +	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> +		       (match_operand:MVE_2 2 "s_register_operand" "w")
> +		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
> +	 VQDMLASHQ_N))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
> +  [(set_attr "type" "mve_move")
> +])
> +
> +;;
>  ;; [vqnegq_m_s])
>  ;;
>  (define_insn "mve_vqnegq_m_s<mode>"
> @@ -5904,6 +5920,23 @@ (define_insn "mve_vqdmlahq_m_n_s<mode>"
>     (set_attr "length""8")])
> 
>  ;;
> +;; [vqdmlashq_m_n_s])
> +;;
> +(define_insn "mve_vqdmlashq_m_n_s<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> +	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
> +		       (match_operand:MVE_2 2 "s_register_operand" "w")
> +		       (match_operand:<V_elem> 3 "s_register_operand" "r")
> +		       (match_operand:HI 4 "vpr_register_operand" "Up")]
> +	 VQDMLASHQ_M_N_S))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  "vpst\;vqdmlasht.s%#<V_sz_elem>\t%q0, %q2, %3"
> +  [(set_attr "type" "mve_move")
> +   (set_attr "length""8")])
> +
> +;;
>  ;; [vqrdmlahq_m_n_s])
>  ;;
>  (define_insn "mve_vqrdmlahq_m_n_s<mode>"
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index caee18a..a98ac09 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -877,6 +877,7 @@ (define_c_enum "unspec" [
>    VQABSQ_M_S
>    VQDMLAHQ_N_S
>    VQDMLAHQ_N_U
> +  VQDMLASHQ_N_S
>    VQNEGQ_M_S
>    VQRDMLADHQ_S
>    VQRDMLADHXQ_S
> @@ -1069,6 +1070,7 @@ (define_c_enum "unspec" [
>    VRHADDQ_M_S
>    VMULQ_M_S
>    VMULQ_M_U
> +  VQDMLASHQ_M_N_S
>    VQRDMLASHQ_M_N_S
>    VRSHLQ_M_S
>    VRSHLQ_M_U
> diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
> new file mode 100644
> index 0000000..7c2e5cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s16.c
> @@ -0,0 +1,23 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int16x8_t
> +foo (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m_n_s16 (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s16"  }  } */
> +
> +int16x8_t
> +foo1 (int16x8_t a, int16x8_t b, int16_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s16"  }  } */
> diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
> new file mode 100644
> index 0000000..cea9d9b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s32.c
> @@ -0,0 +1,23 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int32x4_t
> +foo (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m_n_s32 (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s32"  }  } */
> +
> +int32x4_t
> +foo1 (int32x4_t a, int32x4_t b, int32_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s32"  }  } */
> diff --git
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
> new file mode 100644
> index 0000000..83ee258
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_m_n_s8.c
> @@ -0,0 +1,23 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int8x16_t
> +foo (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m_n_s8 (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s8"  }  } */
> +
> +int8x16_t
> +foo1 (int8x16_t a, int8x16_t b, int8_t c, mve_pred16_t p)
> +{
> +  return vqdmlashq_m (a, b, c, p);
> +}
> +
> +/* { dg-final { scan-assembler "vpst" } } */
> +/* { dg-final { scan-assembler "vqdmlasht.s8"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
> new file mode 100644
> index 0000000..c71a61c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s16.c
> @@ -0,0 +1,21 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int16x8_t
> +foo (int16x8_t a, int16x8_t b, int16_t c)
> +{
> +  return vqdmlashq_n_s16 (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s16"  }  } */
> +
> +int16x8_t
> +foo1 (int16x8_t a, int16x8_t b, int16_t c)
> +{
> +  return vqdmlashq (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s16"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
> new file mode 100644
> index 0000000..61f6c66
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s32.c
> @@ -0,0 +1,21 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int32x4_t
> +foo (int32x4_t a, int32x4_t b, int32_t c)
> +{
> +  return vqdmlashq_n_s32 (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s32"  }  } */
> +
> +int32x4_t
> +foo1 (int32x4_t a, int32x4_t b, int32_t c)
> +{
> +  return vqdmlashq (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
> new file mode 100644
> index 0000000..a078928
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vqdmlashq_n_s8.c
> @@ -0,0 +1,21 @@
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O2" } */
> +
> +#include "arm_mve.h"
> +
> +int8x16_t
> +foo (int8x16_t a, int8x16_t b, int8_t c)
> +{
> +  return vqdmlashq_n_s8 (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s8"  }  } */
> +
> +int8x16_t
> +foo1 (int8x16_t a, int8x16_t b, int8_t c)
> +{
> +  return vqdmlashq (a, b, c);
> +}
> +
> +/* { dg-final { scan-assembler "vqdmlash.s8"  }  } */
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-07 10:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-06 15:59 [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Christophe Lyon
2020-10-06 15:59 ` [PATCH v2] arm: [MVE] Remove illegal intrinsics Christophe Lyon
     [not found]   ` <DB7PR08MB3002AAC33F48C8D8BAA376E7930D0@DB7PR08MB3002.eurprd08.prod.outlook.com>
2020-10-06 16:13     ` Kyrylo Tkachov
2020-10-07 10:34 ` [PATCH v2] arm: [MVE[ Add vqdmlashq intrinsics Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).