public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp
@ 2023-05-10 13:30 Christophe Lyon
  2023-05-10 13:30 ` [PATCH 02/20] arm: [MVE intrinsics] add cmp shape Christophe Lyon
                   ` (19 more replies)
  0 siblings, 20 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vcmp so that they use the same pattern.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_CMP_M, MVE_CMP_M_F, MVE_CMP_M_N)
	(MVE_CMP_M_N_F, mve_cmp_op1): New.
	(isu): Add VCMP*
	(supf): Likewise.
	* config/arm/mve.md (mve_vcmp<mve_cmp_op>q_n_<mode>): Rename into ...
	(@mve_vcmp<mve_cmp_op>q_n_<mode>): ... this.
	(mve_vcmpeqq_m_f<mode>, mve_vcmpgeq_m_f<mode>)
	(mve_vcmpgtq_m_f<mode>, mve_vcmpleq_m_f<mode>)
	(mve_vcmpltq_m_f<mode>, mve_vcmpneq_m_f<mode>): Merge into ...
	(@mve_vcmp<mve_cmp_op1>q_m_f<mode>): ... this.
	(mve_vcmpcsq_m_u<mode>, mve_vcmpeqq_m_<supf><mode>)
	(mve_vcmpgeq_m_s<mode>, mve_vcmpgtq_m_s<mode>)
	(mve_vcmphiq_m_u<mode>, mve_vcmpleq_m_s<mode>)
	(mve_vcmpltq_m_s<mode>, mve_vcmpneq_m_<supf><mode>): Merge into
	...
	(@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>): ... this.
	(mve_vcmpcsq_m_n_u<mode>, mve_vcmpeqq_m_n_<supf><mode>)
	(mve_vcmpgeq_m_n_s<mode>, mve_vcmpgtq_m_n_s<mode>)
	(mve_vcmphiq_m_n_u<mode>, mve_vcmpleq_m_n_s<mode>)
	(mve_vcmpltq_m_n_s<mode>, mve_vcmpneq_m_n_<supf><mode>): Merge
	into ...
	(@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>): ... this.
	(mve_vcmpeqq_m_n_f<mode>, mve_vcmpgeq_m_n_f<mode>)
	(mve_vcmpgtq_m_n_f<mode>, mve_vcmpleq_m_n_f<mode>)
	(mve_vcmpltq_m_n_f<mode>, mve_vcmpneq_m_n_f<mode>): Merge into ...
	(@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>): ... this.
---
 gcc/config/arm/iterators.md | 108 ++++++++++
 gcc/config/arm/mve.md       | 414 +++---------------------------------
 2 files changed, 135 insertions(+), 387 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3c70fd7f56d..ef9fae0412b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -583,6 +583,47 @@ (define_int_iterator MVE_FP_CREATE_ONLY [
 		     VCREATEQ_F
 		     ])
 
+;; MVE comparison iterators
+(define_int_iterator MVE_CMP_M [
+		     VCMPCSQ_M_U
+		     VCMPEQQ_M_S VCMPEQQ_M_U
+		     VCMPGEQ_M_S
+		     VCMPGTQ_M_S
+		     VCMPHIQ_M_U
+		     VCMPLEQ_M_S
+		     VCMPLTQ_M_S
+		     VCMPNEQ_M_S VCMPNEQ_M_U
+		     ])
+
+(define_int_iterator MVE_CMP_M_F [
+		     VCMPEQQ_M_F
+		     VCMPGEQ_M_F
+		     VCMPGTQ_M_F
+		     VCMPLEQ_M_F
+		     VCMPLTQ_M_F
+		     VCMPNEQ_M_F
+		     ])
+
+(define_int_iterator MVE_CMP_M_N [
+		     VCMPCSQ_M_N_U
+		     VCMPEQQ_M_N_S VCMPEQQ_M_N_U
+		     VCMPGEQ_M_N_S
+		     VCMPGTQ_M_N_S
+		     VCMPHIQ_M_N_U
+		     VCMPLEQ_M_N_S
+		     VCMPLTQ_M_N_S
+		     VCMPNEQ_M_N_S VCMPNEQ_M_N_U
+		     ])
+
+(define_int_iterator MVE_CMP_M_N_F [
+		     VCMPEQQ_M_N_F
+		     VCMPGEQ_M_N_F
+		     VCMPGTQ_M_N_F
+		     VCMPLEQ_M_N_F
+		     VCMPLTQ_M_N_F
+		     VCMPNEQ_M_N_F
+		     ])
+
 (define_int_iterator MVE_VMAXVQ_VMINVQ [
 		     VMAXAVQ_S
 		     VMAXVQ_S VMAXVQ_U
@@ -655,6 +696,37 @@ (define_code_attr mve_addsubmul [
 		 (plus "vadd")
 		 ])
 
+(define_int_attr mve_cmp_op1 [
+		 (VCMPCSQ_M_U "cs")
+		 (VCMPEQQ_M_S "eq") (VCMPEQQ_M_U "eq")
+		 (VCMPGEQ_M_S "ge")
+		 (VCMPGTQ_M_S "gt")
+		 (VCMPHIQ_M_U "hi")
+		 (VCMPLEQ_M_S "le")
+		 (VCMPLTQ_M_S "lt")
+		 (VCMPNEQ_M_S "ne") (VCMPNEQ_M_U "ne")
+		 (VCMPEQQ_M_F "eq")
+		 (VCMPGEQ_M_F "ge")
+		 (VCMPGTQ_M_F "gt")
+		 (VCMPLEQ_M_F "le")
+		 (VCMPLTQ_M_F "lt")
+		 (VCMPNEQ_M_F "ne")
+		 (VCMPCSQ_M_N_U "cs")
+		 (VCMPEQQ_M_N_S "eq") (VCMPEQQ_M_N_U "eq")
+		 (VCMPGEQ_M_N_S "ge")
+		 (VCMPGTQ_M_N_S "gt")
+		 (VCMPHIQ_M_N_U "hi")
+		 (VCMPLEQ_M_N_S "le")
+		 (VCMPLTQ_M_N_S "lt")
+		 (VCMPNEQ_M_N_S "ne") (VCMPNEQ_M_N_U "ne")
+		 (VCMPEQQ_M_N_F "eq")
+		 (VCMPGEQ_M_N_F "ge")
+		 (VCMPGTQ_M_N_F "gt")
+		 (VCMPLEQ_M_N_F "le")
+		 (VCMPLTQ_M_N_F "lt")
+		 (VCMPNEQ_M_N_F "ne")
+		 ])
+
 (define_int_attr mve_insn [
 		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd") (VABDQ_M_F "vabd")
 		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
@@ -836,6 +908,26 @@ (define_int_attr isu    [
 		 (VCLSQ_M_S "s")
 		 (VCLZQ_M_S "i")
 		 (VCLZQ_M_U "i")
+		 (VCMPCSQ_M_N_U "u")
+		 (VCMPCSQ_M_U "u")
+		 (VCMPEQQ_M_N_S "i")
+		 (VCMPEQQ_M_N_U "i")
+		 (VCMPEQQ_M_S "i")
+		 (VCMPEQQ_M_U "i")
+		 (VCMPGEQ_M_N_S "s")
+		 (VCMPGEQ_M_S "s")
+		 (VCMPGTQ_M_N_S "s")
+		 (VCMPGTQ_M_S "s")
+		 (VCMPHIQ_M_N_U "u")
+		 (VCMPHIQ_M_U "u")
+		 (VCMPLEQ_M_N_S "s")
+		 (VCMPLEQ_M_S "s")
+		 (VCMPLTQ_M_N_S "s")
+		 (VCMPLTQ_M_S "s")
+		 (VCMPNEQ_M_N_S "i")
+		 (VCMPNEQ_M_N_U "i")
+		 (VCMPNEQ_M_S "i")
+		 (VCMPNEQ_M_U "i")
 		 (VMOVNBQ_M_S "i") (VMOVNBQ_M_U "i")
 		 (VMOVNBQ_S "i") (VMOVNBQ_U "i")
 		 (VMOVNTQ_M_S "i") (VMOVNTQ_M_U "i")
@@ -2082,6 +2174,22 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMAXAQ_M_S "s")
 		       (VMINAQ_S "s")
 		       (VMINAQ_M_S "s")
+		       (VCMPCSQ_M_N_U "u")
+		       (VCMPCSQ_M_U "u")
+		       (VCMPEQQ_M_N_S "s") (VCMPEQQ_M_N_U "u")
+		       (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
+		       (VCMPGEQ_M_N_S "s")
+		       (VCMPGEQ_M_S "s")
+		       (VCMPGTQ_M_N_S "s")
+		       (VCMPGTQ_M_S "s")
+		       (VCMPHIQ_M_N_U "u")
+		       (VCMPHIQ_M_U "u")
+		       (VCMPLEQ_M_N_S "s")
+		       (VCMPLEQ_M_S "s")
+		       (VCMPLTQ_M_N_S "s")
+		       (VCMPLTQ_M_S "s")
+		       (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u")
+		       (VCMPNEQ_M_S "s") (VCMPNEQ_M_U "u")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 45bca6d6215..191d1268ad6 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -695,7 +695,7 @@ (define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
 ;;
 ;; [vcmpcsq_n_, vcmpeqq_n_, vcmpgeq_n_, vcmpgtq_n_, vcmphiq_n_, vcmpleq_n_, vcmpltq_n_, vcmpneq_n_])
 ;;
-(define_insn "mve_vcmp<mve_cmp_op>q_n_<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op>q_n_<mode>"
   [
    (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
 	(MVE_COMPARISONS:<MVE_VPRED>
@@ -1766,18 +1766,23 @@ (define_insn "mve_vrmlaldavhq_<supf>v4si"
 ])
 
 ;;
-;; [vcmpeqq_m_f])
+;; [vcmpeqq_m_f]
+;; [vcmpgeq_m_f]
+;; [vcmpgtq_m_f]
+;; [vcmpleq_m_f]
+;; [vcmpltq_m_f]
+;; [vcmpneq_m_f]
 ;;
-(define_insn "mve_vcmpeqq_m_f<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op1>q_m_f<mode>"
   [
    (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
 	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
 		    (match_operand:MVE_0 2 "s_register_operand" "w")
 		    (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPEQQ_M_F))
+	 MVE_CMP_M_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	eq, %q1, %q2"
+  "vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 ;;
@@ -1954,257 +1959,47 @@ (define_insn "mve_vaddvaq_p_<supf><mode>"
 
 ;;
 ;; [vcmpcsq_m_n_u])
-;;
-(define_insn "mve_vcmpcsq_m_n_u<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPCSQ_M_N_U))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.u%#<V_sz_elem>	cs, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpcsq_m_u])
-;;
-(define_insn "mve_vcmpcsq_m_u<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPCSQ_M_U))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.u%#<V_sz_elem>	cs, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpeqq_m_n_u, vcmpeqq_m_n_s])
-;;
-(define_insn "mve_vcmpeqq_m_n_<supf><mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPEQQ_M_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.i%#<V_sz_elem>	eq, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpeqq_m_u, vcmpeqq_m_s])
-;;
-(define_insn "mve_vcmpeqq_m_<supf><mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPEQQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.i%#<V_sz_elem>	eq, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpgeq_m_n_s])
-;;
-(define_insn "mve_vcmpgeq_m_n_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGEQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	ge, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpgeq_m_s])
-;;
-(define_insn "mve_vcmpgeq_m_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGEQ_M_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	ge, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpgtq_m_n_s])
-;;
-(define_insn "mve_vcmpgtq_m_n_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGTQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	gt, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpgtq_m_s])
-;;
-(define_insn "mve_vcmpgtq_m_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGTQ_M_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	gt, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmphiq_m_n_u])
-;;
-(define_insn "mve_vcmphiq_m_n_u<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPHIQ_M_N_U))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.u%#<V_sz_elem>	hi, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmphiq_m_u])
-;;
-(define_insn "mve_vcmphiq_m_u<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPHIQ_M_U))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.u%#<V_sz_elem>	hi, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpleq_m_n_s])
-;;
-(define_insn "mve_vcmpleq_m_n_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLEQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	le, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpleq_m_s])
-;;
-(define_insn "mve_vcmpleq_m_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLEQ_M_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	le, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpltq_m_n_s])
-;;
-(define_insn "mve_vcmpltq_m_n_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLTQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	lt, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpltq_m_s])
-;;
-(define_insn "mve_vcmpltq_m_s<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLTQ_M_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.s%#<V_sz_elem>	lt, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpneq_m_n_u, vcmpneq_m_n_s])
 ;;
-(define_insn "mve_vcmpneq_m_n_<supf><mode>"
+(define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>"
   [
    (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
 	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:<V_elem> 2 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPNEQ_M_N))
+	 MVE_CMP_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.i%#<V_sz_elem>	ne, %q1, %2"
+  "vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
-;; [vcmpneq_m_s, vcmpneq_m_u])
+;; [vcmpcsq_m_u]
+;; [vcmpeqq_m_u, vcmpeqq_m_s]
+;; [vcmpgeq_m_s]
+;; [vcmpgtq_m_s]
+;; [vcmphiq_m_u]
+;; [vcmpleq_m_s]
+;; [vcmpltq_m_s]
+;; [vcmpneq_m_s, vcmpneq_m_u]
 ;;
-(define_insn "mve_vcmpneq_m_<supf><mode>"
+(define_insn "@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>"
   [
    (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
 	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPNEQ_M))
+	 MVE_CMP_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vcmpt.i%#<V_sz_elem>	ne, %q1, %q2"
+  "vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2785,177 +2580,22 @@ (define_insn "mve_vcmlaq<mve_rot><mode>"
 
 ;;
 ;; [vcmpeqq_m_n_f])
-;;
-(define_insn "mve_vcmpeqq_m_n_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPEQQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	eq, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpgeq_m_f])
-;;
-(define_insn "mve_vcmpgeq_m_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGEQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	ge, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpgeq_m_n_f])
-;;
-(define_insn "mve_vcmpgeq_m_n_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGEQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	ge, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpgtq_m_f])
-;;
-(define_insn "mve_vcmpgtq_m_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGTQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	gt, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpgtq_m_n_f])
-;;
-(define_insn "mve_vcmpgtq_m_n_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPGTQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	gt, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpleq_m_f])
-;;
-(define_insn "mve_vcmpleq_m_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLEQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	le, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpleq_m_n_f])
-;;
-(define_insn "mve_vcmpleq_m_n_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLEQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	le, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpltq_m_f])
-;;
-(define_insn "mve_vcmpltq_m_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLTQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	lt, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpltq_m_n_f])
-;;
-(define_insn "mve_vcmpltq_m_n_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:<V_elem> 2 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPLTQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	lt, %q1, %2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcmpneq_m_f])
-;;
-(define_insn "mve_vcmpneq_m_f<mode>"
-  [
-   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
-	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPNEQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	ne, %q1, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vcmpneq_m_n_f])
 ;;
-(define_insn "mve_vcmpneq_m_n_f<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>"
   [
    (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
 	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w")
 		       (match_operand:<V_elem> 2 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCMPNEQ_M_N_F))
+	 MVE_CMP_M_N_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcmpt.f%#<V_sz_elem>	ne, %q1, %2"
+  "vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 02/20] arm: [MVE intrinsics] add cmp shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 03/20] arm: [MVE intrinsics] rework vcmp Christophe Lyon
                   ` (18 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the cmp shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc	(cmp): New.
	* config/arm/arm-mve-builtins-shapes.h (cmp): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 27 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index c9eac80d1e3..ea0112b3e99 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -974,6 +974,33 @@ struct binary_widen_n_def : public overloaded_base<0>
 };
 SHAPE (binary_widen_n)
 
+/* Shape for comparison operations that operate on
+   uniform types.
+
+   Examples: vcmpq.
+   mve_pred16_t [__arm_]vcmpeqq[_s16](int16x8_t a, int16x8_t b)
+   mve_pred16_t [__arm_]vcmpeqq[_n_s16](int16x8_t a, int16_t b)
+   mve_pred16_t [__arm_]vcmpeqq_m[_s16](int16x8_t a, int16x8_t b, mve_pred16_t p)
+   mve_pred16_t [__arm_]vcmpeqq_m[_n_s16](int16x8_t a, int16_t b, mve_pred16_t p)  */
+struct cmp_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "p,v0,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "p,v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform_opt_n (2);
+  }
+};
+SHAPE (cmp)
+
 /* <T0>xN_t vfoo[_t0](uint64_t, uint64_t)
 
    where there are N arguments in total.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 7f582d7375a..59c4dc39c39 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -49,6 +49,7 @@ namespace arm_mve
     extern const function_shape *const binary_rshift_narrow;
     extern const function_shape *const binary_rshift_narrow_unsigned;
     extern const function_shape *const binary_widen_n;
+    extern const function_shape *const cmp;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const unary;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 03/20] arm: [MVE intrinsics] rework vcmp
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
  2023-05-10 13:30 ` [PATCH 02/20] arm: [MVE intrinsics] add cmp shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 04/20] arm: [MVE intrinsics] factorize vrev16q vrev32q vrev64q Christophe Lyon
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vcmp using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vcmpeqq, vcmpneq, vcmpgeq)
	(vcmpgtq, vcmpleq, vcmpltq, vcmpcsq, vcmphiq): New.
	* config/arm/arm-mve-builtins-base.def (vcmpeqq, vcmpneq, vcmpgeq)
	(vcmpgtq, vcmpleq, vcmpltq, vcmpcsq, vcmphiq): New.
	* config/arm/arm-mve-builtins-base.h (vcmpeqq, vcmpneq, vcmpgeq)
	(vcmpgtq, vcmpleq, vcmpltq, vcmpcsq, vcmphiq): New.
	* config/arm/arm-mve-builtins-functions.h (class
	unspec_based_mve_function_exact_insn_vcmp): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vcmp.
	* config/arm/arm_mve.h (vcmpneq): Remove.
	(vcmphiq): Remove.
	(vcmpeqq): Remove.
	(vcmpcsq): Remove.
	(vcmpltq): Remove.
	(vcmpleq): Remove.
	(vcmpgtq): Remove.
	(vcmpgeq): Remove.
	(vcmpneq_m): Remove.
	(vcmphiq_m): Remove.
	(vcmpeqq_m): Remove.
	(vcmpcsq_m): Remove.
	(vcmpcsq_m_n): Remove.
	(vcmpltq_m): Remove.
	(vcmpleq_m): Remove.
	(vcmpgtq_m): Remove.
	(vcmpgeq_m): Remove.
	(vcmpneq_s8): Remove.
	(vcmpneq_s16): Remove.
	(vcmpneq_s32): Remove.
	(vcmpneq_u8): Remove.
	(vcmpneq_u16): Remove.
	(vcmpneq_u32): Remove.
	(vcmpneq_n_u8): Remove.
	(vcmphiq_u8): Remove.
	(vcmphiq_n_u8): Remove.
	(vcmpeqq_u8): Remove.
	(vcmpeqq_n_u8): Remove.
	(vcmpcsq_u8): Remove.
	(vcmpcsq_n_u8): Remove.
	(vcmpneq_n_s8): Remove.
	(vcmpltq_s8): Remove.
	(vcmpltq_n_s8): Remove.
	(vcmpleq_s8): Remove.
	(vcmpleq_n_s8): Remove.
	(vcmpgtq_s8): Remove.
	(vcmpgtq_n_s8): Remove.
	(vcmpgeq_s8): Remove.
	(vcmpgeq_n_s8): Remove.
	(vcmpeqq_s8): Remove.
	(vcmpeqq_n_s8): Remove.
	(vcmpneq_n_u16): Remove.
	(vcmphiq_u16): Remove.
	(vcmphiq_n_u16): Remove.
	(vcmpeqq_u16): Remove.
	(vcmpeqq_n_u16): Remove.
	(vcmpcsq_u16): Remove.
	(vcmpcsq_n_u16): Remove.
	(vcmpneq_n_s16): Remove.
	(vcmpltq_s16): Remove.
	(vcmpltq_n_s16): Remove.
	(vcmpleq_s16): Remove.
	(vcmpleq_n_s16): Remove.
	(vcmpgtq_s16): Remove.
	(vcmpgtq_n_s16): Remove.
	(vcmpgeq_s16): Remove.
	(vcmpgeq_n_s16): Remove.
	(vcmpeqq_s16): Remove.
	(vcmpeqq_n_s16): Remove.
	(vcmpneq_n_u32): Remove.
	(vcmphiq_u32): Remove.
	(vcmphiq_n_u32): Remove.
	(vcmpeqq_u32): Remove.
	(vcmpeqq_n_u32): Remove.
	(vcmpcsq_u32): Remove.
	(vcmpcsq_n_u32): Remove.
	(vcmpneq_n_s32): Remove.
	(vcmpltq_s32): Remove.
	(vcmpltq_n_s32): Remove.
	(vcmpleq_s32): Remove.
	(vcmpleq_n_s32): Remove.
	(vcmpgtq_s32): Remove.
	(vcmpgtq_n_s32): Remove.
	(vcmpgeq_s32): Remove.
	(vcmpgeq_n_s32): Remove.
	(vcmpeqq_s32): Remove.
	(vcmpeqq_n_s32): Remove.
	(vcmpneq_n_f16): Remove.
	(vcmpneq_f16): Remove.
	(vcmpltq_n_f16): Remove.
	(vcmpltq_f16): Remove.
	(vcmpleq_n_f16): Remove.
	(vcmpleq_f16): Remove.
	(vcmpgtq_n_f16): Remove.
	(vcmpgtq_f16): Remove.
	(vcmpgeq_n_f16): Remove.
	(vcmpgeq_f16): Remove.
	(vcmpeqq_n_f16): Remove.
	(vcmpeqq_f16): Remove.
	(vcmpneq_n_f32): Remove.
	(vcmpneq_f32): Remove.
	(vcmpltq_n_f32): Remove.
	(vcmpltq_f32): Remove.
	(vcmpleq_n_f32): Remove.
	(vcmpleq_f32): Remove.
	(vcmpgtq_n_f32): Remove.
	(vcmpgtq_f32): Remove.
	(vcmpgeq_n_f32): Remove.
	(vcmpgeq_f32): Remove.
	(vcmpeqq_n_f32): Remove.
	(vcmpeqq_f32): Remove.
	(vcmpeqq_m_f16): Remove.
	(vcmpeqq_m_f32): Remove.
	(vcmpneq_m_u8): Remove.
	(vcmpneq_m_n_u8): Remove.
	(vcmphiq_m_u8): Remove.
	(vcmphiq_m_n_u8): Remove.
	(vcmpeqq_m_u8): Remove.
	(vcmpeqq_m_n_u8): Remove.
	(vcmpcsq_m_u8): Remove.
	(vcmpcsq_m_n_u8): Remove.
	(vcmpneq_m_s8): Remove.
	(vcmpneq_m_n_s8): Remove.
	(vcmpltq_m_s8): Remove.
	(vcmpltq_m_n_s8): Remove.
	(vcmpleq_m_s8): Remove.
	(vcmpleq_m_n_s8): Remove.
	(vcmpgtq_m_s8): Remove.
	(vcmpgtq_m_n_s8): Remove.
	(vcmpgeq_m_s8): Remove.
	(vcmpgeq_m_n_s8): Remove.
	(vcmpeqq_m_s8): Remove.
	(vcmpeqq_m_n_s8): Remove.
	(vcmpneq_m_u16): Remove.
	(vcmpneq_m_n_u16): Remove.
	(vcmphiq_m_u16): Remove.
	(vcmphiq_m_n_u16): Remove.
	(vcmpeqq_m_u16): Remove.
	(vcmpeqq_m_n_u16): Remove.
	(vcmpcsq_m_u16): Remove.
	(vcmpcsq_m_n_u16): Remove.
	(vcmpneq_m_s16): Remove.
	(vcmpneq_m_n_s16): Remove.
	(vcmpltq_m_s16): Remove.
	(vcmpltq_m_n_s16): Remove.
	(vcmpleq_m_s16): Remove.
	(vcmpleq_m_n_s16): Remove.
	(vcmpgtq_m_s16): Remove.
	(vcmpgtq_m_n_s16): Remove.
	(vcmpgeq_m_s16): Remove.
	(vcmpgeq_m_n_s16): Remove.
	(vcmpeqq_m_s16): Remove.
	(vcmpeqq_m_n_s16): Remove.
	(vcmpneq_m_u32): Remove.
	(vcmpneq_m_n_u32): Remove.
	(vcmphiq_m_u32): Remove.
	(vcmphiq_m_n_u32): Remove.
	(vcmpeqq_m_u32): Remove.
	(vcmpeqq_m_n_u32): Remove.
	(vcmpcsq_m_u32): Remove.
	(vcmpcsq_m_n_u32): Remove.
	(vcmpneq_m_s32): Remove.
	(vcmpneq_m_n_s32): Remove.
	(vcmpltq_m_s32): Remove.
	(vcmpltq_m_n_s32): Remove.
	(vcmpleq_m_s32): Remove.
	(vcmpleq_m_n_s32): Remove.
	(vcmpgtq_m_s32): Remove.
	(vcmpgtq_m_n_s32): Remove.
	(vcmpgeq_m_s32): Remove.
	(vcmpgeq_m_n_s32): Remove.
	(vcmpeqq_m_s32): Remove.
	(vcmpeqq_m_n_s32): Remove.
	(vcmpeqq_m_n_f16): Remove.
	(vcmpgeq_m_f16): Remove.
	(vcmpgeq_m_n_f16): Remove.
	(vcmpgtq_m_f16): Remove.
	(vcmpgtq_m_n_f16): Remove.
	(vcmpleq_m_f16): Remove.
	(vcmpleq_m_n_f16): Remove.
	(vcmpltq_m_f16): Remove.
	(vcmpltq_m_n_f16): Remove.
	(vcmpneq_m_f16): Remove.
	(vcmpneq_m_n_f16): Remove.
	(vcmpeqq_m_n_f32): Remove.
	(vcmpgeq_m_f32): Remove.
	(vcmpgeq_m_n_f32): Remove.
	(vcmpgtq_m_f32): Remove.
	(vcmpgtq_m_n_f32): Remove.
	(vcmpleq_m_f32): Remove.
	(vcmpleq_m_n_f32): Remove.
	(vcmpltq_m_f32): Remove.
	(vcmpltq_m_n_f32): Remove.
	(vcmpneq_m_f32): Remove.
	(vcmpneq_m_n_f32): Remove.
	(__arm_vcmpneq_s8): Remove.
	(__arm_vcmpneq_s16): Remove.
	(__arm_vcmpneq_s32): Remove.
	(__arm_vcmpneq_u8): Remove.
	(__arm_vcmpneq_u16): Remove.
	(__arm_vcmpneq_u32): Remove.
	(__arm_vcmpneq_n_u8): Remove.
	(__arm_vcmphiq_u8): Remove.
	(__arm_vcmphiq_n_u8): Remove.
	(__arm_vcmpeqq_u8): Remove.
	(__arm_vcmpeqq_n_u8): Remove.
	(__arm_vcmpcsq_u8): Remove.
	(__arm_vcmpcsq_n_u8): Remove.
	(__arm_vcmpneq_n_s8): Remove.
	(__arm_vcmpltq_s8): Remove.
	(__arm_vcmpltq_n_s8): Remove.
	(__arm_vcmpleq_s8): Remove.
	(__arm_vcmpleq_n_s8): Remove.
	(__arm_vcmpgtq_s8): Remove.
	(__arm_vcmpgtq_n_s8): Remove.
	(__arm_vcmpgeq_s8): Remove.
	(__arm_vcmpgeq_n_s8): Remove.
	(__arm_vcmpeqq_s8): Remove.
	(__arm_vcmpeqq_n_s8): Remove.
	(__arm_vcmpneq_n_u16): Remove.
	(__arm_vcmphiq_u16): Remove.
	(__arm_vcmphiq_n_u16): Remove.
	(__arm_vcmpeqq_u16): Remove.
	(__arm_vcmpeqq_n_u16): Remove.
	(__arm_vcmpcsq_u16): Remove.
	(__arm_vcmpcsq_n_u16): Remove.
	(__arm_vcmpneq_n_s16): Remove.
	(__arm_vcmpltq_s16): Remove.
	(__arm_vcmpltq_n_s16): Remove.
	(__arm_vcmpleq_s16): Remove.
	(__arm_vcmpleq_n_s16): Remove.
	(__arm_vcmpgtq_s16): Remove.
	(__arm_vcmpgtq_n_s16): Remove.
	(__arm_vcmpgeq_s16): Remove.
	(__arm_vcmpgeq_n_s16): Remove.
	(__arm_vcmpeqq_s16): Remove.
	(__arm_vcmpeqq_n_s16): Remove.
	(__arm_vcmpneq_n_u32): Remove.
	(__arm_vcmphiq_u32): Remove.
	(__arm_vcmphiq_n_u32): Remove.
	(__arm_vcmpeqq_u32): Remove.
	(__arm_vcmpeqq_n_u32): Remove.
	(__arm_vcmpcsq_u32): Remove.
	(__arm_vcmpcsq_n_u32): Remove.
	(__arm_vcmpneq_n_s32): Remove.
	(__arm_vcmpltq_s32): Remove.
	(__arm_vcmpltq_n_s32): Remove.
	(__arm_vcmpleq_s32): Remove.
	(__arm_vcmpleq_n_s32): Remove.
	(__arm_vcmpgtq_s32): Remove.
	(__arm_vcmpgtq_n_s32): Remove.
	(__arm_vcmpgeq_s32): Remove.
	(__arm_vcmpgeq_n_s32): Remove.
	(__arm_vcmpeqq_s32): Remove.
	(__arm_vcmpeqq_n_s32): Remove.
	(__arm_vcmpneq_m_u8): Remove.
	(__arm_vcmpneq_m_n_u8): Remove.
	(__arm_vcmphiq_m_u8): Remove.
	(__arm_vcmphiq_m_n_u8): Remove.
	(__arm_vcmpeqq_m_u8): Remove.
	(__arm_vcmpeqq_m_n_u8): Remove.
	(__arm_vcmpcsq_m_u8): Remove.
	(__arm_vcmpcsq_m_n_u8): Remove.
	(__arm_vcmpneq_m_s8): Remove.
	(__arm_vcmpneq_m_n_s8): Remove.
	(__arm_vcmpltq_m_s8): Remove.
	(__arm_vcmpltq_m_n_s8): Remove.
	(__arm_vcmpleq_m_s8): Remove.
	(__arm_vcmpleq_m_n_s8): Remove.
	(__arm_vcmpgtq_m_s8): Remove.
	(__arm_vcmpgtq_m_n_s8): Remove.
	(__arm_vcmpgeq_m_s8): Remove.
	(__arm_vcmpgeq_m_n_s8): Remove.
	(__arm_vcmpeqq_m_s8): Remove.
	(__arm_vcmpeqq_m_n_s8): Remove.
	(__arm_vcmpneq_m_u16): Remove.
	(__arm_vcmpneq_m_n_u16): Remove.
	(__arm_vcmphiq_m_u16): Remove.
	(__arm_vcmphiq_m_n_u16): Remove.
	(__arm_vcmpeqq_m_u16): Remove.
	(__arm_vcmpeqq_m_n_u16): Remove.
	(__arm_vcmpcsq_m_u16): Remove.
	(__arm_vcmpcsq_m_n_u16): Remove.
	(__arm_vcmpneq_m_s16): Remove.
	(__arm_vcmpneq_m_n_s16): Remove.
	(__arm_vcmpltq_m_s16): Remove.
	(__arm_vcmpltq_m_n_s16): Remove.
	(__arm_vcmpleq_m_s16): Remove.
	(__arm_vcmpleq_m_n_s16): Remove.
	(__arm_vcmpgtq_m_s16): Remove.
	(__arm_vcmpgtq_m_n_s16): Remove.
	(__arm_vcmpgeq_m_s16): Remove.
	(__arm_vcmpgeq_m_n_s16): Remove.
	(__arm_vcmpeqq_m_s16): Remove.
	(__arm_vcmpeqq_m_n_s16): Remove.
	(__arm_vcmpneq_m_u32): Remove.
	(__arm_vcmpneq_m_n_u32): Remove.
	(__arm_vcmphiq_m_u32): Remove.
	(__arm_vcmphiq_m_n_u32): Remove.
	(__arm_vcmpeqq_m_u32): Remove.
	(__arm_vcmpeqq_m_n_u32): Remove.
	(__arm_vcmpcsq_m_u32): Remove.
	(__arm_vcmpcsq_m_n_u32): Remove.
	(__arm_vcmpneq_m_s32): Remove.
	(__arm_vcmpneq_m_n_s32): Remove.
	(__arm_vcmpltq_m_s32): Remove.
	(__arm_vcmpltq_m_n_s32): Remove.
	(__arm_vcmpleq_m_s32): Remove.
	(__arm_vcmpleq_m_n_s32): Remove.
	(__arm_vcmpgtq_m_s32): Remove.
	(__arm_vcmpgtq_m_n_s32): Remove.
	(__arm_vcmpgeq_m_s32): Remove.
	(__arm_vcmpgeq_m_n_s32): Remove.
	(__arm_vcmpeqq_m_s32): Remove.
	(__arm_vcmpeqq_m_n_s32): Remove.
	(__arm_vcmpneq_n_f16): Remove.
	(__arm_vcmpneq_f16): Remove.
	(__arm_vcmpltq_n_f16): Remove.
	(__arm_vcmpltq_f16): Remove.
	(__arm_vcmpleq_n_f16): Remove.
	(__arm_vcmpleq_f16): Remove.
	(__arm_vcmpgtq_n_f16): Remove.
	(__arm_vcmpgtq_f16): Remove.
	(__arm_vcmpgeq_n_f16): Remove.
	(__arm_vcmpgeq_f16): Remove.
	(__arm_vcmpeqq_n_f16): Remove.
	(__arm_vcmpeqq_f16): Remove.
	(__arm_vcmpneq_n_f32): Remove.
	(__arm_vcmpneq_f32): Remove.
	(__arm_vcmpltq_n_f32): Remove.
	(__arm_vcmpltq_f32): Remove.
	(__arm_vcmpleq_n_f32): Remove.
	(__arm_vcmpleq_f32): Remove.
	(__arm_vcmpgtq_n_f32): Remove.
	(__arm_vcmpgtq_f32): Remove.
	(__arm_vcmpgeq_n_f32): Remove.
	(__arm_vcmpgeq_f32): Remove.
	(__arm_vcmpeqq_n_f32): Remove.
	(__arm_vcmpeqq_f32): Remove.
	(__arm_vcmpeqq_m_f16): Remove.
	(__arm_vcmpeqq_m_f32): Remove.
	(__arm_vcmpeqq_m_n_f16): Remove.
	(__arm_vcmpgeq_m_f16): Remove.
	(__arm_vcmpgeq_m_n_f16): Remove.
	(__arm_vcmpgtq_m_f16): Remove.
	(__arm_vcmpgtq_m_n_f16): Remove.
	(__arm_vcmpleq_m_f16): Remove.
	(__arm_vcmpleq_m_n_f16): Remove.
	(__arm_vcmpltq_m_f16): Remove.
	(__arm_vcmpltq_m_n_f16): Remove.
	(__arm_vcmpneq_m_f16): Remove.
	(__arm_vcmpneq_m_n_f16): Remove.
	(__arm_vcmpeqq_m_n_f32): Remove.
	(__arm_vcmpgeq_m_f32): Remove.
	(__arm_vcmpgeq_m_n_f32): Remove.
	(__arm_vcmpgtq_m_f32): Remove.
	(__arm_vcmpgtq_m_n_f32): Remove.
	(__arm_vcmpleq_m_f32): Remove.
	(__arm_vcmpleq_m_n_f32): Remove.
	(__arm_vcmpltq_m_f32): Remove.
	(__arm_vcmpltq_m_n_f32): Remove.
	(__arm_vcmpneq_m_f32): Remove.
	(__arm_vcmpneq_m_n_f32): Remove.
	(__arm_vcmpneq): Remove.
	(__arm_vcmphiq): Remove.
	(__arm_vcmpeqq): Remove.
	(__arm_vcmpcsq): Remove.
	(__arm_vcmpltq): Remove.
	(__arm_vcmpleq): Remove.
	(__arm_vcmpgtq): Remove.
	(__arm_vcmpgeq): Remove.
	(__arm_vcmpneq_m): Remove.
	(__arm_vcmphiq_m): Remove.
	(__arm_vcmpeqq_m): Remove.
	(__arm_vcmpcsq_m): Remove.
	(__arm_vcmpltq_m): Remove.
	(__arm_vcmpleq_m): Remove.
	(__arm_vcmpgtq_m): Remove.
	(__arm_vcmpgeq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc     |    9 +
 gcc/config/arm/arm-mve-builtins-base.def    |   14 +
 gcc/config/arm/arm-mve-builtins-base.h      |    8 +
 gcc/config/arm/arm-mve-builtins-functions.h |  110 +
 gcc/config/arm/arm-mve-builtins.cc          |    8 +
 gcc/config/arm/arm_mve.h                    | 3298 ++-----------------
 6 files changed, 333 insertions(+), 3114 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index edca0d9ac6c..14870f5b1aa 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -26,6 +26,7 @@
 #include "memmodel.h"
 #include "insn-codes.h"
 #include "optabs.h"
+#include "expr.h"
 #include "basic-block.h"
 #include "function.h"
 #include "gimple.h"
@@ -237,6 +238,14 @@ FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
 FUNCTION_WITHOUT_N_NO_U_F (vclsq, VCLSQ)
 FUNCTION (vclzq, unspec_based_mve_function_exact_insn, (CLZ, CLZ, CLZ, -1, -1, -1, VCLZQ_M_S, VCLZQ_M_U, -1, -1, -1 ,-1))
+FUNCTION (vcmpeqq, unspec_based_mve_function_exact_insn_vcmp, (EQ, EQ, EQ, VCMPEQQ_M_S, VCMPEQQ_M_U, VCMPEQQ_M_F, VCMPEQQ_M_N_S, VCMPEQQ_M_N_U, VCMPEQQ_M_N_F))
+FUNCTION (vcmpneq, unspec_based_mve_function_exact_insn_vcmp, (NE, NE, NE, VCMPNEQ_M_S, VCMPNEQ_M_U, VCMPNEQ_M_F, VCMPNEQ_M_N_S, VCMPNEQ_M_N_U, VCMPNEQ_M_N_F))
+FUNCTION (vcmpgeq, unspec_based_mve_function_exact_insn_vcmp, (GE, UNKNOWN, GE, VCMPGEQ_M_S, UNKNOWN, VCMPGEQ_M_F, VCMPGEQ_M_N_S, UNKNOWN, VCMPGEQ_M_N_F))
+FUNCTION (vcmpgtq, unspec_based_mve_function_exact_insn_vcmp, (GT, UNKNOWN, GT, VCMPGTQ_M_S, UNKNOWN, VCMPGTQ_M_F, VCMPGTQ_M_N_S, UNKNOWN, VCMPGTQ_M_N_F))
+FUNCTION (vcmpleq, unspec_based_mve_function_exact_insn_vcmp, (LE, UNKNOWN, LE, VCMPLEQ_M_S, UNKNOWN, VCMPLEQ_M_F, VCMPLEQ_M_N_S, UNKNOWN, VCMPLEQ_M_N_F))
+FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT, VCMPLTQ_M_S, UNKNOWN, VCMPLTQ_M_F, VCMPLTQ_M_N_S, UNKNOWN, VCMPLTQ_M_N_F))
+FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
+FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
 FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 48a07c8d888..f05cecd9160 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -24,6 +24,14 @@ DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vclsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vclzq, unary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vcmpcsq, cmp, all_unsigned, m_or_none)
+DEF_MVE_FUNCTION (vcmpeqq, cmp, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vcmpgeq, cmp, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vcmpgtq, cmp, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vcmphiq, cmp, all_unsigned, m_or_none)
+DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
 DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
@@ -86,6 +94,12 @@ DEF_MVE_FUNCTION (vabdq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmpeqq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmpgeq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmpgtq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmpleq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmpltq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmpneq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
 DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 31417435f6f..179e1295fb2 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -29,6 +29,14 @@ extern const function_base *const vaddq;
 extern const function_base *const vandq;
 extern const function_base *const vclsq;
 extern const function_base *const vclzq;
+extern const function_base *const vcmpcsq;
+extern const function_base *const vcmpeqq;
+extern const function_base *const vcmpgeq;
+extern const function_base *const vcmpgtq;
+extern const function_base *const vcmphiq;
+extern const function_base *const vcmpleq;
+extern const function_base *const vcmpltq;
+extern const function_base *const vcmpneq;
 extern const function_base *const vcreateq;
 extern const function_base *const veorq;
 extern const function_base *const vhaddq;
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index ddedbb2a8e1..d069990dcab 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -590,6 +590,116 @@ public:
   }
 };
 
+/* Map the comparison functions.  */
+class unspec_based_mve_function_exact_insn_vcmp : public unspec_based_mve_function_base
+{
+public:
+  CONSTEXPR unspec_based_mve_function_exact_insn_vcmp (rtx_code code_for_sint,
+						       rtx_code code_for_uint,
+						       rtx_code code_for_fp,
+						       int unspec_for_m_sint,
+						       int unspec_for_m_uint,
+						       int unspec_for_m_fp,
+						       int unspec_for_m_n_sint,
+						       int unspec_for_m_n_uint,
+						       int unspec_for_m_n_fp)
+    : unspec_based_mve_function_base (code_for_sint,
+				      code_for_uint,
+				      code_for_fp,
+				      -1,
+				      -1,
+				      -1,
+				      unspec_for_m_sint,
+				      unspec_for_m_uint,
+				      unspec_for_m_fp,
+				      unspec_for_m_n_sint,
+				      unspec_for_m_n_uint,
+				      unspec_for_m_n_fp)
+  {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+    rtx target;
+
+    /* No suffix, no predicate, use the right RTX code.  */
+    if (e.pred == PRED_none)
+      {
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    if (e.type_suffix (0).integer_p)
+	      if (e.type_suffix (0).unsigned_p)
+		code = code_for_mve_vcmpq (m_code_for_uint, mode);
+	      else
+		code = code_for_mve_vcmpq (m_code_for_sint, mode);
+	    else
+	      code = code_for_mve_vcmpq_f (m_code_for_fp, mode);
+	    break;
+
+	  case MODE_n:
+	    if (e.type_suffix (0).integer_p)
+	      if (e.type_suffix (0).unsigned_p)
+		code = code_for_mve_vcmpq_n (m_code_for_uint, mode);
+	      else
+		code = code_for_mve_vcmpq_n (m_code_for_sint, mode);
+	    else
+	      code = code_for_mve_vcmpq_n_f (m_code_for_fp, mode);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	target = e.use_exact_insn (code);
+      }
+    else
+      {
+	switch (e.pred)
+	  {
+	  case PRED_m:
+	    switch (e.mode_suffix_id)
+	      {
+	      case MODE_none:
+		/* No suffix, "m" predicate.  */
+		if (e.type_suffix (0).integer_p)
+		  if (e.type_suffix (0).unsigned_p)
+		    code = code_for_mve_vcmpq_m (m_unspec_for_m_uint, m_unspec_for_m_uint, mode);
+		  else
+		    code = code_for_mve_vcmpq_m (m_unspec_for_m_sint, m_unspec_for_m_sint, mode);
+		else
+		  code = code_for_mve_vcmpq_m_f (m_unspec_for_m_fp, mode);
+		break;
+
+	      case MODE_n:
+		/* _n suffix, "m" predicate.  */
+		if (e.type_suffix (0).integer_p)
+		  if (e.type_suffix (0).unsigned_p)
+		    code = code_for_mve_vcmpq_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, mode);
+		  else
+		    code = code_for_mve_vcmpq_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, mode);
+		else
+		  code = code_for_mve_vcmpq_m_n_f (m_unspec_for_m_n_fp, mode);
+		break;
+
+	      default:
+		gcc_unreachable ();
+	      }
+	    target = e.use_cond_insn (code, 0);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+    rtx HItarget = gen_reg_rtx (HImode);
+    emit_move_insn (HItarget, gen_lowpart (HImode, target));
+    return HItarget;
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 9dc762c9fc0..59cfaf6e5b1 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -670,6 +670,14 @@ function_instance::has_inactive_argument () const
     return false;
 
   if (mode_suffix_id == MODE_r
+      || base == functions::vcmpeqq
+      || base == functions::vcmpneq
+      || base == functions::vcmpgeq
+      || base == functions::vcmpgtq
+      || base == functions::vcmpleq
+      || base == functions::vcmpltq
+      || base == functions::vcmpcsq
+      || base == functions::vcmphiq
       || base == functions::vmaxaq
       || base == functions::vmaxnmaq
       || base == functions::vminaq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 373797689cc..3eb8195060b 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -52,24 +52,16 @@
 #define vrev32q(__a) __arm_vrev32q(__a)
 #define vrev64q(__a) __arm_vrev64q(__a)
 #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
-#define vcmpneq(__a, __b) __arm_vcmpneq(__a, __b)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
 #define vmladavq(__a, __b) __arm_vmladavq(__a, __b)
-#define vcmphiq(__a, __b) __arm_vcmphiq(__a, __b)
-#define vcmpeqq(__a, __b) __arm_vcmpeqq(__a, __b)
-#define vcmpcsq(__a, __b) __arm_vcmpcsq(__a, __b)
 #define vcaddq_rot90(__a, __b) __arm_vcaddq_rot90(__a, __b)
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
 #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
-#define vcmpltq(__a, __b) __arm_vcmpltq(__a, __b)
-#define vcmpleq(__a, __b) __arm_vcmpleq(__a, __b)
-#define vcmpgtq(__a, __b) __arm_vcmpgtq(__a, __b)
-#define vcmpgeq(__a, __b) __arm_vcmpgeq(__a, __b)
 #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
 #define vmlsdavxq(__a, __b) __arm_vmlsdavxq(__a, __b)
 #define vmlsdavq(__a, __b) __arm_vmlsdavq(__a, __b)
@@ -105,18 +97,9 @@
 #define vmladavq_p(__a, __b, __p) __arm_vmladavq_p(__a, __b, __p)
 #define vmladavaq(__a, __b, __c) __arm_vmladavaq(__a, __b, __c)
 #define vdupq_m(__inactive, __a, __p) __arm_vdupq_m(__inactive, __a, __p)
-#define vcmpneq_m(__a, __b, __p) __arm_vcmpneq_m(__a, __b, __p)
-#define vcmphiq_m(__a, __b, __p) __arm_vcmphiq_m(__a, __b, __p)
-#define vcmpeqq_m(__a, __b, __p) __arm_vcmpeqq_m(__a, __b, __p)
-#define vcmpcsq_m(__a, __b, __p) __arm_vcmpcsq_m(__a, __b, __p)
-#define vcmpcsq_m_n(__a, __b, __p) __arm_vcmpcsq_m_n(__a, __b, __p)
 #define vaddvaq_p(__a, __b, __p) __arm_vaddvaq_p(__a, __b, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
-#define vcmpltq_m(__a, __b, __p) __arm_vcmpltq_m(__a, __b, __p)
-#define vcmpleq_m(__a, __b, __p) __arm_vcmpleq_m(__a, __b, __p)
-#define vcmpgtq_m(__a, __b, __p) __arm_vcmpgtq_m(__a, __b, __p)
-#define vcmpgeq_m(__a, __b, __p) __arm_vcmpgeq_m(__a, __b, __p)
 #define vmlsdavxq_p(__a, __b, __p) __arm_vmlsdavxq_p(__a, __b, __p)
 #define vmlsdavq_p(__a, __b, __p) __arm_vmlsdavq_p(__a, __b, __p)
 #define vmladavxq_p(__a, __b, __p) __arm_vmladavxq_p(__a, __b, __p)
@@ -442,40 +425,16 @@
 #define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,  __imm6)
 #define vaddlvq_p_s32(__a, __p) __arm_vaddlvq_p_s32(__a, __p)
 #define vaddlvq_p_u32(__a, __p) __arm_vaddlvq_p_u32(__a, __p)
-#define vcmpneq_s8(__a, __b) __arm_vcmpneq_s8(__a, __b)
-#define vcmpneq_s16(__a, __b) __arm_vcmpneq_s16(__a, __b)
-#define vcmpneq_s32(__a, __b) __arm_vcmpneq_s32(__a, __b)
-#define vcmpneq_u8(__a, __b) __arm_vcmpneq_u8(__a, __b)
-#define vcmpneq_u16(__a, __b) __arm_vcmpneq_u16(__a, __b)
-#define vcmpneq_u32(__a, __b) __arm_vcmpneq_u32(__a, __b)
 #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
 #define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
 #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
 #define vmladavq_u8(__a, __b) __arm_vmladavq_u8(__a, __b)
-#define vcmpneq_n_u8(__a, __b) __arm_vcmpneq_n_u8(__a, __b)
-#define vcmphiq_u8(__a, __b) __arm_vcmphiq_u8(__a, __b)
-#define vcmphiq_n_u8(__a, __b) __arm_vcmphiq_n_u8(__a, __b)
-#define vcmpeqq_u8(__a, __b) __arm_vcmpeqq_u8(__a, __b)
-#define vcmpeqq_n_u8(__a, __b) __arm_vcmpeqq_n_u8(__a, __b)
-#define vcmpcsq_u8(__a, __b) __arm_vcmpcsq_u8(__a, __b)
-#define vcmpcsq_n_u8(__a, __b) __arm_vcmpcsq_n_u8(__a, __b)
 #define vcaddq_rot90_u8(__a, __b) __arm_vcaddq_rot90_u8(__a, __b)
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
 #define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
 #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
-#define vcmpneq_n_s8(__a, __b) __arm_vcmpneq_n_s8(__a, __b)
-#define vcmpltq_s8(__a, __b) __arm_vcmpltq_s8(__a, __b)
-#define vcmpltq_n_s8(__a, __b) __arm_vcmpltq_n_s8(__a, __b)
-#define vcmpleq_s8(__a, __b) __arm_vcmpleq_s8(__a, __b)
-#define vcmpleq_n_s8(__a, __b) __arm_vcmpleq_n_s8(__a, __b)
-#define vcmpgtq_s8(__a, __b) __arm_vcmpgtq_s8(__a, __b)
-#define vcmpgtq_n_s8(__a, __b) __arm_vcmpgtq_n_s8(__a, __b)
-#define vcmpgeq_s8(__a, __b) __arm_vcmpgeq_s8(__a, __b)
-#define vcmpgeq_n_s8(__a, __b) __arm_vcmpgeq_n_s8(__a, __b)
-#define vcmpeqq_s8(__a, __b) __arm_vcmpeqq_s8(__a, __b)
-#define vcmpeqq_n_s8(__a, __b) __arm_vcmpeqq_n_s8(__a, __b)
 #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
 #define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
@@ -496,30 +455,12 @@
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
 #define vmladavq_u16(__a, __b) __arm_vmladavq_u16(__a, __b)
-#define vcmpneq_n_u16(__a, __b) __arm_vcmpneq_n_u16(__a, __b)
-#define vcmphiq_u16(__a, __b) __arm_vcmphiq_u16(__a, __b)
-#define vcmphiq_n_u16(__a, __b) __arm_vcmphiq_n_u16(__a, __b)
-#define vcmpeqq_u16(__a, __b) __arm_vcmpeqq_u16(__a, __b)
-#define vcmpeqq_n_u16(__a, __b) __arm_vcmpeqq_n_u16(__a, __b)
-#define vcmpcsq_u16(__a, __b) __arm_vcmpcsq_u16(__a, __b)
-#define vcmpcsq_n_u16(__a, __b) __arm_vcmpcsq_n_u16(__a, __b)
 #define vcaddq_rot90_u16(__a, __b) __arm_vcaddq_rot90_u16(__a, __b)
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
 #define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
 #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
-#define vcmpneq_n_s16(__a, __b) __arm_vcmpneq_n_s16(__a, __b)
-#define vcmpltq_s16(__a, __b) __arm_vcmpltq_s16(__a, __b)
-#define vcmpltq_n_s16(__a, __b) __arm_vcmpltq_n_s16(__a, __b)
-#define vcmpleq_s16(__a, __b) __arm_vcmpleq_s16(__a, __b)
-#define vcmpleq_n_s16(__a, __b) __arm_vcmpleq_n_s16(__a, __b)
-#define vcmpgtq_s16(__a, __b) __arm_vcmpgtq_s16(__a, __b)
-#define vcmpgtq_n_s16(__a, __b) __arm_vcmpgtq_n_s16(__a, __b)
-#define vcmpgeq_s16(__a, __b) __arm_vcmpgeq_s16(__a, __b)
-#define vcmpgeq_n_s16(__a, __b) __arm_vcmpgeq_n_s16(__a, __b)
-#define vcmpeqq_s16(__a, __b) __arm_vcmpeqq_s16(__a, __b)
-#define vcmpeqq_n_s16(__a, __b) __arm_vcmpeqq_n_s16(__a, __b)
 #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
 #define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
@@ -540,30 +481,12 @@
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
 #define vmladavq_u32(__a, __b) __arm_vmladavq_u32(__a, __b)
-#define vcmpneq_n_u32(__a, __b) __arm_vcmpneq_n_u32(__a, __b)
-#define vcmphiq_u32(__a, __b) __arm_vcmphiq_u32(__a, __b)
-#define vcmphiq_n_u32(__a, __b) __arm_vcmphiq_n_u32(__a, __b)
-#define vcmpeqq_u32(__a, __b) __arm_vcmpeqq_u32(__a, __b)
-#define vcmpeqq_n_u32(__a, __b) __arm_vcmpeqq_n_u32(__a, __b)
-#define vcmpcsq_u32(__a, __b) __arm_vcmpcsq_u32(__a, __b)
-#define vcmpcsq_n_u32(__a, __b) __arm_vcmpcsq_n_u32(__a, __b)
 #define vcaddq_rot90_u32(__a, __b) __arm_vcaddq_rot90_u32(__a, __b)
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
 #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
-#define vcmpneq_n_s32(__a, __b) __arm_vcmpneq_n_s32(__a, __b)
-#define vcmpltq_s32(__a, __b) __arm_vcmpltq_s32(__a, __b)
-#define vcmpltq_n_s32(__a, __b) __arm_vcmpltq_n_s32(__a, __b)
-#define vcmpleq_s32(__a, __b) __arm_vcmpleq_s32(__a, __b)
-#define vcmpleq_n_s32(__a, __b) __arm_vcmpleq_n_s32(__a, __b)
-#define vcmpgtq_s32(__a, __b) __arm_vcmpgtq_s32(__a, __b)
-#define vcmpgtq_n_s32(__a, __b) __arm_vcmpgtq_n_s32(__a, __b)
-#define vcmpgeq_s32(__a, __b) __arm_vcmpgeq_s32(__a, __b)
-#define vcmpgeq_n_s32(__a, __b) __arm_vcmpgeq_n_s32(__a, __b)
-#define vcmpeqq_s32(__a, __b) __arm_vcmpeqq_s32(__a, __b)
-#define vcmpeqq_n_s32(__a, __b) __arm_vcmpeqq_n_s32(__a, __b)
 #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
 #define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
@@ -584,18 +507,6 @@
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
 #define vmlaldavq_u16(__a, __b) __arm_vmlaldavq_u16(__a, __b)
 #define vbicq_n_u16(__a,  __imm) __arm_vbicq_n_u16(__a,  __imm)
-#define vcmpneq_n_f16(__a, __b) __arm_vcmpneq_n_f16(__a, __b)
-#define vcmpneq_f16(__a, __b) __arm_vcmpneq_f16(__a, __b)
-#define vcmpltq_n_f16(__a, __b) __arm_vcmpltq_n_f16(__a, __b)
-#define vcmpltq_f16(__a, __b) __arm_vcmpltq_f16(__a, __b)
-#define vcmpleq_n_f16(__a, __b) __arm_vcmpleq_n_f16(__a, __b)
-#define vcmpleq_f16(__a, __b) __arm_vcmpleq_f16(__a, __b)
-#define vcmpgtq_n_f16(__a, __b) __arm_vcmpgtq_n_f16(__a, __b)
-#define vcmpgtq_f16(__a, __b) __arm_vcmpgtq_f16(__a, __b)
-#define vcmpgeq_n_f16(__a, __b) __arm_vcmpgeq_n_f16(__a, __b)
-#define vcmpgeq_f16(__a, __b) __arm_vcmpgeq_f16(__a, __b)
-#define vcmpeqq_n_f16(__a, __b) __arm_vcmpeqq_n_f16(__a, __b)
-#define vcmpeqq_f16(__a, __b) __arm_vcmpeqq_f16(__a, __b)
 #define vqdmulltq_s16(__a, __b) __arm_vqdmulltq_s16(__a, __b)
 #define vqdmulltq_n_s16(__a, __b) __arm_vqdmulltq_n_s16(__a, __b)
 #define vqdmullbq_s16(__a, __b) __arm_vqdmullbq_s16(__a, __b)
@@ -617,18 +528,6 @@
 #define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b)
 #define vmlaldavq_u32(__a, __b) __arm_vmlaldavq_u32(__a, __b)
 #define vbicq_n_u32(__a,  __imm) __arm_vbicq_n_u32(__a,  __imm)
-#define vcmpneq_n_f32(__a, __b) __arm_vcmpneq_n_f32(__a, __b)
-#define vcmpneq_f32(__a, __b) __arm_vcmpneq_f32(__a, __b)
-#define vcmpltq_n_f32(__a, __b) __arm_vcmpltq_n_f32(__a, __b)
-#define vcmpltq_f32(__a, __b) __arm_vcmpltq_f32(__a, __b)
-#define vcmpleq_n_f32(__a, __b) __arm_vcmpleq_n_f32(__a, __b)
-#define vcmpleq_f32(__a, __b) __arm_vcmpleq_f32(__a, __b)
-#define vcmpgtq_n_f32(__a, __b) __arm_vcmpgtq_n_f32(__a, __b)
-#define vcmpgtq_f32(__a, __b) __arm_vcmpgtq_f32(__a, __b)
-#define vcmpgeq_n_f32(__a, __b) __arm_vcmpgeq_n_f32(__a, __b)
-#define vcmpgeq_f32(__a, __b) __arm_vcmpgeq_f32(__a, __b)
-#define vcmpeqq_n_f32(__a, __b) __arm_vcmpeqq_n_f32(__a, __b)
-#define vcmpeqq_f32(__a, __b) __arm_vcmpeqq_f32(__a, __b)
 #define vqdmulltq_s32(__a, __b) __arm_vqdmulltq_s32(__a, __b)
 #define vqdmulltq_n_s32(__a, __b) __arm_vqdmulltq_n_s32(__a, __b)
 #define vqdmullbq_s32(__a, __b) __arm_vqdmullbq_s32(__a, __b)
@@ -666,8 +565,6 @@
 #define vbicq_m_n_s32(__a,  __imm, __p) __arm_vbicq_m_n_s32(__a,  __imm, __p)
 #define vbicq_m_n_u16(__a,  __imm, __p) __arm_vbicq_m_n_u16(__a,  __imm, __p)
 #define vbicq_m_n_u32(__a,  __imm, __p) __arm_vbicq_m_n_u32(__a,  __imm, __p)
-#define vcmpeqq_m_f16(__a, __b, __p) __arm_vcmpeqq_m_f16(__a, __b, __p)
-#define vcmpeqq_m_f32(__a, __b, __p) __arm_vcmpeqq_m_f32(__a, __b, __p)
 #define vcvtaq_m_s16_f16(__inactive, __a, __p) __arm_vcvtaq_m_s16_f16(__inactive, __a, __p)
 #define vcvtaq_m_u16_f16(__inactive, __a, __p) __arm_vcvtaq_m_u16_f16(__inactive, __a, __p)
 #define vcvtaq_m_s32_f32(__inactive, __a, __p) __arm_vcvtaq_m_s32_f32(__inactive, __a, __p)
@@ -696,29 +593,9 @@
 #define vmladavq_p_u8(__a, __b, __p) __arm_vmladavq_p_u8(__a, __b, __p)
 #define vmladavaq_u8(__a, __b, __c) __arm_vmladavaq_u8(__a, __b, __c)
 #define vdupq_m_n_u8(__inactive, __a, __p) __arm_vdupq_m_n_u8(__inactive, __a, __p)
-#define vcmpneq_m_u8(__a, __b, __p) __arm_vcmpneq_m_u8(__a, __b, __p)
-#define vcmpneq_m_n_u8(__a, __b, __p) __arm_vcmpneq_m_n_u8(__a, __b, __p)
-#define vcmphiq_m_u8(__a, __b, __p) __arm_vcmphiq_m_u8(__a, __b, __p)
-#define vcmphiq_m_n_u8(__a, __b, __p) __arm_vcmphiq_m_n_u8(__a, __b, __p)
-#define vcmpeqq_m_u8(__a, __b, __p) __arm_vcmpeqq_m_u8(__a, __b, __p)
-#define vcmpeqq_m_n_u8(__a, __b, __p) __arm_vcmpeqq_m_n_u8(__a, __b, __p)
-#define vcmpcsq_m_u8(__a, __b, __p) __arm_vcmpcsq_m_u8(__a, __b, __p)
-#define vcmpcsq_m_n_u8(__a, __b, __p) __arm_vcmpcsq_m_n_u8(__a, __b, __p)
 #define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
-#define vcmpneq_m_s8(__a, __b, __p) __arm_vcmpneq_m_s8(__a, __b, __p)
-#define vcmpneq_m_n_s8(__a, __b, __p) __arm_vcmpneq_m_n_s8(__a, __b, __p)
-#define vcmpltq_m_s8(__a, __b, __p) __arm_vcmpltq_m_s8(__a, __b, __p)
-#define vcmpltq_m_n_s8(__a, __b, __p) __arm_vcmpltq_m_n_s8(__a, __b, __p)
-#define vcmpleq_m_s8(__a, __b, __p) __arm_vcmpleq_m_s8(__a, __b, __p)
-#define vcmpleq_m_n_s8(__a, __b, __p) __arm_vcmpleq_m_n_s8(__a, __b, __p)
-#define vcmpgtq_m_s8(__a, __b, __p) __arm_vcmpgtq_m_s8(__a, __b, __p)
-#define vcmpgtq_m_n_s8(__a, __b, __p) __arm_vcmpgtq_m_n_s8(__a, __b, __p)
-#define vcmpgeq_m_s8(__a, __b, __p) __arm_vcmpgeq_m_s8(__a, __b, __p)
-#define vcmpgeq_m_n_s8(__a, __b, __p) __arm_vcmpgeq_m_n_s8(__a, __b, __p)
-#define vcmpeqq_m_s8(__a, __b, __p) __arm_vcmpeqq_m_s8(__a, __b, __p)
-#define vcmpeqq_m_n_s8(__a, __b, __p) __arm_vcmpeqq_m_n_s8(__a, __b, __p)
 #define vrev64q_m_s8(__inactive, __a, __p) __arm_vrev64q_m_s8(__inactive, __a, __p)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
 #define vmlsdavxq_p_s8(__a, __b, __p) __arm_vmlsdavxq_p_s8(__a, __b, __p)
@@ -756,29 +633,9 @@
 #define vmladavq_p_u16(__a, __b, __p) __arm_vmladavq_p_u16(__a, __b, __p)
 #define vmladavaq_u16(__a, __b, __c) __arm_vmladavaq_u16(__a, __b, __c)
 #define vdupq_m_n_u16(__inactive, __a, __p) __arm_vdupq_m_n_u16(__inactive, __a, __p)
-#define vcmpneq_m_u16(__a, __b, __p) __arm_vcmpneq_m_u16(__a, __b, __p)
-#define vcmpneq_m_n_u16(__a, __b, __p) __arm_vcmpneq_m_n_u16(__a, __b, __p)
-#define vcmphiq_m_u16(__a, __b, __p) __arm_vcmphiq_m_u16(__a, __b, __p)
-#define vcmphiq_m_n_u16(__a, __b, __p) __arm_vcmphiq_m_n_u16(__a, __b, __p)
-#define vcmpeqq_m_u16(__a, __b, __p) __arm_vcmpeqq_m_u16(__a, __b, __p)
-#define vcmpeqq_m_n_u16(__a, __b, __p) __arm_vcmpeqq_m_n_u16(__a, __b, __p)
-#define vcmpcsq_m_u16(__a, __b, __p) __arm_vcmpcsq_m_u16(__a, __b, __p)
-#define vcmpcsq_m_n_u16(__a, __b, __p) __arm_vcmpcsq_m_n_u16(__a, __b, __p)
 #define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
-#define vcmpneq_m_s16(__a, __b, __p) __arm_vcmpneq_m_s16(__a, __b, __p)
-#define vcmpneq_m_n_s16(__a, __b, __p) __arm_vcmpneq_m_n_s16(__a, __b, __p)
-#define vcmpltq_m_s16(__a, __b, __p) __arm_vcmpltq_m_s16(__a, __b, __p)
-#define vcmpltq_m_n_s16(__a, __b, __p) __arm_vcmpltq_m_n_s16(__a, __b, __p)
-#define vcmpleq_m_s16(__a, __b, __p) __arm_vcmpleq_m_s16(__a, __b, __p)
-#define vcmpleq_m_n_s16(__a, __b, __p) __arm_vcmpleq_m_n_s16(__a, __b, __p)
-#define vcmpgtq_m_s16(__a, __b, __p) __arm_vcmpgtq_m_s16(__a, __b, __p)
-#define vcmpgtq_m_n_s16(__a, __b, __p) __arm_vcmpgtq_m_n_s16(__a, __b, __p)
-#define vcmpgeq_m_s16(__a, __b, __p) __arm_vcmpgeq_m_s16(__a, __b, __p)
-#define vcmpgeq_m_n_s16(__a, __b, __p) __arm_vcmpgeq_m_n_s16(__a, __b, __p)
-#define vcmpeqq_m_s16(__a, __b, __p) __arm_vcmpeqq_m_s16(__a, __b, __p)
-#define vcmpeqq_m_n_s16(__a, __b, __p) __arm_vcmpeqq_m_n_s16(__a, __b, __p)
 #define vrev64q_m_s16(__inactive, __a, __p) __arm_vrev64q_m_s16(__inactive, __a, __p)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
 #define vmlsdavxq_p_s16(__a, __b, __p) __arm_vmlsdavxq_p_s16(__a, __b, __p)
@@ -816,29 +673,9 @@
 #define vmladavq_p_u32(__a, __b, __p) __arm_vmladavq_p_u32(__a, __b, __p)
 #define vmladavaq_u32(__a, __b, __c) __arm_vmladavaq_u32(__a, __b, __c)
 #define vdupq_m_n_u32(__inactive, __a, __p) __arm_vdupq_m_n_u32(__inactive, __a, __p)
-#define vcmpneq_m_u32(__a, __b, __p) __arm_vcmpneq_m_u32(__a, __b, __p)
-#define vcmpneq_m_n_u32(__a, __b, __p) __arm_vcmpneq_m_n_u32(__a, __b, __p)
-#define vcmphiq_m_u32(__a, __b, __p) __arm_vcmphiq_m_u32(__a, __b, __p)
-#define vcmphiq_m_n_u32(__a, __b, __p) __arm_vcmphiq_m_n_u32(__a, __b, __p)
-#define vcmpeqq_m_u32(__a, __b, __p) __arm_vcmpeqq_m_u32(__a, __b, __p)
-#define vcmpeqq_m_n_u32(__a, __b, __p) __arm_vcmpeqq_m_n_u32(__a, __b, __p)
-#define vcmpcsq_m_u32(__a, __b, __p) __arm_vcmpcsq_m_u32(__a, __b, __p)
-#define vcmpcsq_m_n_u32(__a, __b, __p) __arm_vcmpcsq_m_n_u32(__a, __b, __p)
 #define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
-#define vcmpneq_m_s32(__a, __b, __p) __arm_vcmpneq_m_s32(__a, __b, __p)
-#define vcmpneq_m_n_s32(__a, __b, __p) __arm_vcmpneq_m_n_s32(__a, __b, __p)
-#define vcmpltq_m_s32(__a, __b, __p) __arm_vcmpltq_m_s32(__a, __b, __p)
-#define vcmpltq_m_n_s32(__a, __b, __p) __arm_vcmpltq_m_n_s32(__a, __b, __p)
-#define vcmpleq_m_s32(__a, __b, __p) __arm_vcmpleq_m_s32(__a, __b, __p)
-#define vcmpleq_m_n_s32(__a, __b, __p) __arm_vcmpleq_m_n_s32(__a, __b, __p)
-#define vcmpgtq_m_s32(__a, __b, __p) __arm_vcmpgtq_m_s32(__a, __b, __p)
-#define vcmpgtq_m_n_s32(__a, __b, __p) __arm_vcmpgtq_m_n_s32(__a, __b, __p)
-#define vcmpgeq_m_s32(__a, __b, __p) __arm_vcmpgeq_m_s32(__a, __b, __p)
-#define vcmpgeq_m_n_s32(__a, __b, __p) __arm_vcmpgeq_m_n_s32(__a, __b, __p)
-#define vcmpeqq_m_s32(__a, __b, __p) __arm_vcmpeqq_m_s32(__a, __b, __p)
-#define vcmpeqq_m_n_s32(__a, __b, __p) __arm_vcmpeqq_m_n_s32(__a, __b, __p)
 #define vrev64q_m_s32(__inactive, __a, __p) __arm_vrev64q_m_s32(__inactive, __a, __p)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
 #define vmlsdavxq_p_s32(__a, __b, __p) __arm_vmlsdavxq_p_s32(__a, __b, __p)
@@ -913,17 +750,6 @@
 #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
 #define vrev32q_m_s8(__inactive, __a, __p) __arm_vrev32q_m_s8(__inactive, __a, __p)
 #define vrev64q_m_f16(__inactive, __a, __p) __arm_vrev64q_m_f16(__inactive, __a, __p)
-#define vcmpeqq_m_n_f16(__a, __b, __p) __arm_vcmpeqq_m_n_f16(__a, __b, __p)
-#define vcmpgeq_m_f16(__a, __b, __p) __arm_vcmpgeq_m_f16(__a, __b, __p)
-#define vcmpgeq_m_n_f16(__a, __b, __p) __arm_vcmpgeq_m_n_f16(__a, __b, __p)
-#define vcmpgtq_m_f16(__a, __b, __p) __arm_vcmpgtq_m_f16(__a, __b, __p)
-#define vcmpgtq_m_n_f16(__a, __b, __p) __arm_vcmpgtq_m_n_f16(__a, __b, __p)
-#define vcmpleq_m_f16(__a, __b, __p) __arm_vcmpleq_m_f16(__a, __b, __p)
-#define vcmpleq_m_n_f16(__a, __b, __p) __arm_vcmpleq_m_n_f16(__a, __b, __p)
-#define vcmpltq_m_f16(__a, __b, __p) __arm_vcmpltq_m_f16(__a, __b, __p)
-#define vcmpltq_m_n_f16(__a, __b, __p) __arm_vcmpltq_m_n_f16(__a, __b, __p)
-#define vcmpneq_m_f16(__a, __b, __p) __arm_vcmpneq_m_f16(__a, __b, __p)
-#define vcmpneq_m_n_f16(__a, __b, __p) __arm_vcmpneq_m_n_f16(__a, __b, __p)
 #define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
@@ -961,17 +787,6 @@
 #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
 #define vrev32q_m_s16(__inactive, __a, __p) __arm_vrev32q_m_s16(__inactive, __a, __p)
 #define vrev64q_m_f32(__inactive, __a, __p) __arm_vrev64q_m_f32(__inactive, __a, __p)
-#define vcmpeqq_m_n_f32(__a, __b, __p) __arm_vcmpeqq_m_n_f32(__a, __b, __p)
-#define vcmpgeq_m_f32(__a, __b, __p) __arm_vcmpgeq_m_f32(__a, __b, __p)
-#define vcmpgeq_m_n_f32(__a, __b, __p) __arm_vcmpgeq_m_n_f32(__a, __b, __p)
-#define vcmpgtq_m_f32(__a, __b, __p) __arm_vcmpgtq_m_f32(__a, __b, __p)
-#define vcmpgtq_m_n_f32(__a, __b, __p) __arm_vcmpgtq_m_n_f32(__a, __b, __p)
-#define vcmpleq_m_f32(__a, __b, __p) __arm_vcmpleq_m_f32(__a, __b, __p)
-#define vcmpleq_m_n_f32(__a, __b, __p) __arm_vcmpleq_m_n_f32(__a, __b, __p)
-#define vcmpltq_m_f32(__a, __b, __p) __arm_vcmpltq_m_f32(__a, __b, __p)
-#define vcmpltq_m_n_f32(__a, __b, __p) __arm_vcmpltq_m_n_f32(__a, __b, __p)
-#define vcmpneq_m_f32(__a, __b, __p) __arm_vcmpneq_m_f32(__a, __b, __p)
-#define vcmpneq_m_n_f32(__a, __b, __p) __arm_vcmpneq_m_n_f32(__a, __b, __p)
 #define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
@@ -2149,48 +1964,6 @@ __arm_vaddlvq_p_u32 (uint32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vaddlvq_p_uv4si (__a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpneq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpneq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vcmpneq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vcmpneq_v16qi ((int8x16_t)__a, (int8x16_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vcmpneq_v8hi ((int16x8_t)__a, (int16x8_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vcmpneq_v4si ((int32x4_t)__a, (int32x4_t)__b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -2219,55 +1992,6 @@ __arm_vmladavq_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vmladavq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_u8 (uint8x16_t __a, uint8_t __b)
-{
-  return __builtin_mve_vcmpneq_n_v16qi ((int8x16_t)__a, (int8_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vcmphiq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_n_u8 (uint8x16_t __a, uint8_t __b)
-{
-  return __builtin_mve_vcmphiq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vcmpeqq_v16qi ((int8x16_t)__a, (int8x16_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_u8 (uint8x16_t __a, uint8_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v16qi ((int8x16_t)__a, (int8_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vcmpcsq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_n_u8 (uint8x16_t __a, uint8_t __b)
-{
-  return __builtin_mve_vcmpcsq_n_v16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -2312,83 +2036,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpneq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpltq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpltq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpleq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpleq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpgtq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpgtq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpgeq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpgeq_n_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vcmpeqq_v16qi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_s8 (int8x16_t __a, int8_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshluq_n_s8 (int8x16_t __a, const int __imm)
@@ -2529,55 +2176,6 @@ __arm_vmladavq_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vmladavq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_u16 (uint16x8_t __a, uint16_t __b)
-{
-  return __builtin_mve_vcmpneq_n_v8hi ((int16x8_t)__a, (int16_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vcmphiq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_n_u16 (uint16x8_t __a, uint16_t __b)
-{
-  return __builtin_mve_vcmphiq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vcmpeqq_v8hi ((int16x8_t)__a, (int16x8_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_u16 (uint16x8_t __a, uint16_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v8hi ((int16x8_t)__a, (int16_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vcmpcsq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_n_u16 (uint16x8_t __a, uint16_t __b)
-{
-  return __builtin_mve_vcmpcsq_n_v8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -2622,83 +2220,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpneq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpltq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpltq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpleq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpleq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpgtq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpgtq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpgeq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpgeq_n_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vcmpeqq_v8hi (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshluq_n_s16 (int16x8_t __a, const int __imm)
@@ -2839,55 +2360,6 @@ __arm_vmladavq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vmladavq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_u32 (uint32x4_t __a, uint32_t __b)
-{
-  return __builtin_mve_vcmpneq_n_v4si ((int32x4_t)__a, (int32_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vcmphiq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_n_u32 (uint32x4_t __a, uint32_t __b)
-{
-  return __builtin_mve_vcmphiq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vcmpeqq_v4si ((int32x4_t)__a, (int32x4_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_u32 (uint32x4_t __a, uint32_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v4si ((int32x4_t)__a, (int32_t)__b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vcmpcsq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_n_u32 (uint32x4_t __a, uint32_t __b)
-{
-  return __builtin_mve_vcmpcsq_n_v4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -2932,100 +2404,23 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_s32 (int32x4_t __a, int32_t __b)
+__arm_vqshluq_n_s32 (int32x4_t __a, const int __imm)
 {
-  return __builtin_mve_vcmpneq_n_v4si (__a, __b);
+  return __builtin_mve_vqshluq_n_sv4si (__a, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_s32 (int32x4_t __a, int32x4_t __b)
+__arm_vaddvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpltq_v4si (__a, __b);
+  return __builtin_mve_vaddvq_p_sv4si (__a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vcmpltq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vcmpleq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vcmpleq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vcmpgtq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vcmpgtq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vcmpgeq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vcmpgeq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vcmpeqq_v4si (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_v4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vqshluq_n_sv4si (__a, __imm);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_sv4si (__a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
+__arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
 {
   return __builtin_mve_vornq_sv4si (__a, __b);
 }
@@ -3581,62 +2976,6 @@ __arm_vdupq_m_n_u8 (uint8x16_t __inactive, uint8_t __a, mve_pred16_t __p)
   return __builtin_mve_vdupq_m_n_uv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_u8 (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmphiq_m_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_n_u8 (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmphiq_m_n_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_u8 (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpcsq_m_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_n_u8 (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpcsq_m_n_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_u8 (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -3658,90 +2997,6 @@ __arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_n_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_n_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_n_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_n_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_sv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_s8 (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -4001,301 +3256,161 @@ __arm_vdupq_m_n_u16 (uint16x8_t __inactive, uint16_t __a, mve_pred16_t __p)
   return __builtin_mve_vdupq_m_n_uv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
+__arm_vaddvaq_p_u16 (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpneq_m_uv8hi (__a, __b, __p);
+  return __builtin_mve_vaddvaq_p_uv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_u16 (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
+__arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
-  return __builtin_mve_vcmpneq_m_n_uv8hi (__a, __b, __p);
+  return __builtin_mve_vsriq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
+__arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
-  return __builtin_mve_vcmphiq_m_uv8hi (__a, __b, __p);
+  return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_n_u16 (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
+__arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmphiq_m_n_uv8hi (__a, __b, __p);
+  return __builtin_mve_vrev64q_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
+__arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpeqq_m_uv8hi (__a, __b, __p);
+  return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_u16 (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
+__arm_vmlsdavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpeqq_m_n_uv8hi (__a, __b, __p);
+  return __builtin_mve_vmlsdavxq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
+__arm_vmlsdavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpcsq_m_uv8hi (__a, __b, __p);
+  return __builtin_mve_vmlsdavq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_n_u16 (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
+__arm_vmladavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vcmpcsq_m_n_uv8hi (__a, __b, __p);
+  return __builtin_mve_vmladavxq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_u16 (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
+__arm_vmladavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vaddvaq_p_uv8hi (__a, __b, __p);
+  return __builtin_mve_vmladavq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
+__arm_vdupq_m_n_s16 (int16x8_t __inactive, int16_t __a, mve_pred16_t __p)
 {
-  return __builtin_mve_vsriq_n_uv8hi (__a, __b, __imm);
+  return __builtin_mve_vdupq_m_n_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint16x8_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
+__arm_vaddvaq_p_s16 (int32_t __a, int16x8_t __b, mve_pred16_t __p)
 {
-  return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
+  return __builtin_mve_vaddvaq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqrdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpneq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmlsdhxq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vqrdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpneq_m_n_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmlsdhq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  return __builtin_mve_vcmpltq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmlashq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  return __builtin_mve_vcmpltq_m_n_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  return __builtin_mve_vcmpleq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vqrdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpleq_m_n_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmladhxq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqrdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpgtq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqrdmladhq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vqdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpgtq_m_n_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqdmlsdhxq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpgeq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqdmlsdhq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vqdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
-  return __builtin_mve_vcmpgeq_m_n_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqdmlahq_n_sv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vqdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
-  return __builtin_mve_vcmpeqq_m_sv8hi (__a, __b, __p);
+  return __builtin_mve_vqdmladhxq_sv8hi (__inactive, __a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_s16 (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavxq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlsdavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlsdavq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavxq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmladavq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_s16 (int16x8_t __inactive, int16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_s16 (int32_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_sv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmlsdhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmlsdhq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmladhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqrdmladhq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmlsdhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlsdhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmlsdhq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_sv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmladhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmladhxq_sv8hi (__inactive, __a, __b);
-}
-
-__extension__ extern __inline int16x8_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqdmladhq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
 {
@@ -4421,62 +3536,6 @@ __arm_vdupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, mve_pred16_t __p)
   return __builtin_mve_vdupq_m_n_uv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_u32 (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmphiq_m_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m_n_u32 (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmphiq_m_n_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_u32 (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpcsq_m_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m_n_u32 (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpcsq_m_n_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_u32 (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -4498,90 +3557,6 @@ __arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_n_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_n_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_n_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_n_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_sv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -9775,119 +8750,35 @@ __arm_vcvtq_n_u32_f32 (float32x4_t __a, const int __imm6)
   return __builtin_mve_vcvtq_n_from_f_uv4si (__a, __imm6);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_f16 (float16x8_t __a, float16_t __b)
+__arm_vornq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmpneq_n_fv8hf (__a, __b);
+  return __builtin_mve_vornq_fv8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_f16 (float16x8_t __a, float16x8_t __b)
+__arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmpneq_fv8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot90v8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_n_f16 (float16x8_t __a, float16_t __b)
+__arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmpltq_n_fv8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot270v8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_f16 (float16x8_t __a, float16x8_t __b)
+__arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_mve_vcmpltq_fv8hf (__a, __b);
+  return __builtin_mve_vcmulq_rot180v8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_n_f16 (float16x8_t __a, float16_t __b)
-{
-  return __builtin_mve_vcmpleq_n_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmpleq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_n_f16 (float16x8_t __a, float16_t __b)
-{
-  return __builtin_mve_vcmpgtq_n_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmpgtq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_n_f16 (float16x8_t __a, float16_t __b)
-{
-  return __builtin_mve_vcmpgeq_n_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmpgeq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_f16 (float16x8_t __a, float16_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmpeqq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vornq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot90_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmulq_rot90v8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot270_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmulq_rot270v8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot180_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vcmulq_rot180v8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmulq_f16 (float16x8_t __a, float16x8_t __b)
 {
@@ -9915,90 +8806,6 @@ __arm_vbicq_f16 (float16x8_t __a, float16x8_t __b)
   return __builtin_mve_vbicq_fv8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpneq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpneq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpltq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpltq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpleq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpleq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpgtq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpgtq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpgeq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpgeq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_n_f32 (float32x4_t __a, float32_t __b)
-{
-  return __builtin_mve_vcmpeqq_n_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcmpeqq_fv4sf (__a, __b);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_f32 (float32x4_t __a, float32x4_t __b)
@@ -10069,20 +8876,6 @@ __arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b)
   return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_fv4sf (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -10280,83 +9073,6 @@ __arm_vrev64q_m_f16 (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_fv8hf (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_n_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_n_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_n_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_n_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_fv8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_f16 (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_fv8hf (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -10490,83 +9206,6 @@ __arm_vrev64q_m_f32 (float32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vrev64q_m_fv4sf (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpeqq_m_n_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgeq_m_n_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpgtq_m_n_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpleq_m_n_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpltq_m_n_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_fv4sf (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m_n_f32 (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcmpneq_m_n_fv4sf (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -12000,60 +10639,18 @@ __arm_vaddlvq_p (uint32x4_t __a, mve_pred16_t __p)
  return __arm_vaddlvq_p_u32 (__a, __p);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int8x16_t __a, int8x16_t __b)
+__arm_vornq (uint8x16_t __a, uint8x16_t __b)
 {
- return __arm_vcmpneq_s8 (__a, __b);
+ return __arm_vornq_u8 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int16x8_t __a, int16x8_t __b)
+__arm_vmulltq_int (uint8x16_t __a, uint8x16_t __b)
 {
- return __arm_vcmpneq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vcmpneq_s32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vcmpneq_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vcmpneq_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcmpneq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vornq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmulltq_int_u8 (__a, __b);
+ return __arm_vmulltq_int_u8 (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
@@ -12070,55 +10667,6 @@ __arm_vmladavq (uint8x16_t __a, uint8x16_t __b)
  return __arm_vmladavq_u8 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint8x16_t __a, uint8_t __b)
-{
- return __arm_vcmpneq_n_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vcmphiq_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint8x16_t __a, uint8_t __b)
-{
- return __arm_vcmphiq_n_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vcmpeqq_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint8x16_t __a, uint8_t __b)
-{
- return __arm_vcmpeqq_n_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vcmpcsq_u8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint8x16_t __a, uint8_t __b)
-{
- return __arm_vcmpcsq_n_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90 (uint8x16_t __a, uint8x16_t __b)
@@ -12161,83 +10709,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
  return __arm_vbrsrq_n_u8 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpneq_n_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vcmpltq_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpltq_n_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vcmpleq_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpleq_n_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vcmpgtq_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpgtq_n_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vcmpgeq_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpgeq_n_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vcmpeqq_s8 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int8x16_t __a, int8_t __b)
-{
- return __arm_vcmpeqq_n_s8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshluq (int8x16_t __a, const int __imm)
@@ -12378,55 +10849,6 @@ __arm_vmladavq (uint16x8_t __a, uint16x8_t __b)
  return __arm_vmladavq_u16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint16x8_t __a, uint16_t __b)
-{
- return __arm_vcmpneq_n_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vcmphiq_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint16x8_t __a, uint16_t __b)
-{
- return __arm_vcmphiq_n_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vcmpeqq_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint16x8_t __a, uint16_t __b)
-{
- return __arm_vcmpeqq_n_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vcmpcsq_u16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint16x8_t __a, uint16_t __b)
-{
- return __arm_vcmpcsq_n_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90 (uint16x8_t __a, uint16x8_t __b)
@@ -12469,83 +10891,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
  return __arm_vbrsrq_n_u16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpneq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vcmpltq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpltq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vcmpleq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpleq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vcmpgtq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpgtq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vcmpgeq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpgeq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vcmpeqq_s16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int16x8_t __a, int16_t __b)
-{
- return __arm_vcmpeqq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqshluq (int16x8_t __a, const int __imm)
@@ -12644,214 +10989,88 @@ __arm_vbrsrq (int16x8_t __a, int32_t __b)
  return __arm_vbrsrq_n_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vbicq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (int32_t __a, int16x8_t __b)
-{
- return __arm_vaddvaq_s16 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vornq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmulltq_int_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmullbq_int_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmladavq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmladavq_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (uint32x4_t __a, uint32_t __b)
-{
- return __arm_vcmpneq_n_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcmphiq_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq (uint32x4_t __a, uint32_t __b)
-{
- return __arm_vcmphiq_n_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcmpeqq_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (uint32x4_t __a, uint32_t __b)
-{
- return __arm_vcmpeqq_n_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcmpcsq_u32 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq (uint32x4_t __a, uint32_t __b)
-{
- return __arm_vcmpcsq_n_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcaddq_rot90 (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcaddq_rot90_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcaddq_rot270 (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vcaddq_rot270_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vbicq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_u32 (__a, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (uint32_t __a, uint32x4_t __b)
-{
- return __arm_vaddvaq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
+__extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (uint32x4_t __a, int32_t __b)
+__arm_vbicq (int16x8_t __a, int16x8_t __b)
 {
- return __arm_vbrsrq_n_u32 (__a, __b);
+ return __arm_vbicq_s16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (int32x4_t __a, int32_t __b)
+__arm_vaddvaq (int32_t __a, int16x8_t __b)
 {
- return __arm_vcmpneq_n_s32 (__a, __b);
+ return __arm_vaddvaq_s16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int32x4_t __a, int32x4_t __b)
+__arm_vornq (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpltq_s32 (__a, __b);
+ return __arm_vornq_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (int32x4_t __a, int32_t __b)
+__arm_vmulltq_int (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpltq_n_s32 (__a, __b);
+ return __arm_vmulltq_int_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int32x4_t __a, int32x4_t __b)
+__arm_vmullbq_int (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpleq_s32 (__a, __b);
+ return __arm_vmullbq_int_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (int32x4_t __a, int32_t __b)
+__arm_vmladavq (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpleq_n_s32 (__a, __b);
+ return __arm_vmladavq_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int32x4_t __a, int32x4_t __b)
+__arm_vcaddq_rot90 (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpgtq_s32 (__a, __b);
+ return __arm_vcaddq_rot90_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (int32x4_t __a, int32_t __b)
+__arm_vcaddq_rot270 (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpgtq_n_s32 (__a, __b);
+ return __arm_vcaddq_rot270_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int32x4_t __a, int32x4_t __b)
+__arm_vbicq (uint32x4_t __a, uint32x4_t __b)
 {
- return __arm_vcmpgeq_s32 (__a, __b);
+ return __arm_vbicq_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (int32x4_t __a, int32_t __b)
+__arm_vaddvq_p (uint32x4_t __a, mve_pred16_t __p)
 {
- return __arm_vcmpgeq_n_s32 (__a, __b);
+ return __arm_vaddvq_p_u32 (__a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int32x4_t __a, int32x4_t __b)
+__arm_vaddvaq (uint32_t __a, uint32x4_t __b)
 {
- return __arm_vcmpeqq_s32 (__a, __b);
+ return __arm_vaddvaq_u32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (int32x4_t __a, int32_t __b)
+__arm_vbrsrq (uint32x4_t __a, int32_t __b)
 {
- return __arm_vcmpeqq_n_s32 (__a, __b);
+ return __arm_vbrsrq_n_u32 (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
@@ -13386,62 +11605,6 @@ __arm_vdupq_m (uint8x16_t __inactive, uint8_t __a, mve_pred16_t __p)
  return __arm_vdupq_m_n_u8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_n_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint8x16_t __a, uint8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_n_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -13463,90 +11626,6 @@ __arm_vsliq (uint8x16_t __a, uint8x16_t __b, const int __imm)
  return __arm_vsliq_n_u8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int8x16_t __a, int8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -13806,165 +11885,25 @@ __arm_vdupq_m (uint16x8_t __inactive, uint16_t __a, mve_pred16_t __p)
  return __arm_vdupq_m_n_u16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_n_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint16x8_t __a, uint16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_n_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsriq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsliq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vaddvaq_p (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
 {
- return __arm_vcmpgeq_m_n_s16 (__a, __b, __p);
+ return __arm_vaddvaq_p_u16 (__a, __b, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
+__arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
- return __arm_vcmpeqq_m_s16 (__a, __b, __p);
+ return __arm_vsriq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int16x8_t __a, int16_t __b, mve_pred16_t __p)
+__arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
 {
- return __arm_vcmpeqq_m_n_s16 (__a, __b, __p);
+ return __arm_vsliq_n_u16 (__a, __b, __imm);
 }
 
 __extension__ extern __inline int16x8_t
@@ -14226,62 +12165,6 @@ __arm_vdupq_m (uint32x4_t __inactive, uint32_t __a, mve_pred16_t __p)
  return __arm_vdupq_m_n_u32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmphiq_m (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmphiq_m_n_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpcsq_m (uint32x4_t __a, uint32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpcsq_m_n_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -14303,90 +12186,6 @@ __arm_vsliq (uint32x4_t __a, uint32x4_t __b, const int __imm)
  return __arm_vsliq_n_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -18635,280 +16434,112 @@ __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtq (uint16x8_t __a)
 {
- return __arm_vcvtq_f16_u16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq (uint32x4_t __a)
-{
- return __arm_vcvtq_f32_u32 (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (float16x8_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (float32x4_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_f32 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (int16x8_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f16_s16 (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (int32x4_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f32_s32 (__a, __imm6);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (uint16x8_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f16_u16 (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (uint32x4_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f32_u32 (__a, __imm6);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpneq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpneq_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpltq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpltq_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpleq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpleq_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpgtq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpgtq_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpgeq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpgeq_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (float16x8_t __a, float16_t __b)
-{
- return __arm_vcmpeqq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmpeqq_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vornq_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot90 (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmulq_rot90_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot270 (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmulq_rot270_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq_rot180 (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmulq_rot180_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmulq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vcmulq_f16 (__a, __b);
+ return __arm_vcvtq_f16_u16 (__a);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcaddq_rot90 (float16x8_t __a, float16x8_t __b)
+__arm_vcvtq (uint32x4_t __a)
 {
- return __arm_vcaddq_rot90_f16 (__a, __b);
+ return __arm_vcvtq_f32_u32 (__a);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcaddq_rot270 (float16x8_t __a, float16x8_t __b)
+__arm_vbrsrq (float16x8_t __a, int32_t __b)
 {
- return __arm_vcaddq_rot270_f16 (__a, __b);
+ return __arm_vbrsrq_n_f16 (__a, __b);
 }
 
-__extension__ extern __inline float16x8_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (float16x8_t __a, float16x8_t __b)
+__arm_vbrsrq (float32x4_t __a, int32_t __b)
 {
- return __arm_vbicq_f16 (__a, __b);
+ return __arm_vbrsrq_n_f32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (float32x4_t __a, float32_t __b)
+__arm_vcvtq_n (int16x8_t __a, const int __imm6)
 {
- return __arm_vcmpneq_n_f32 (__a, __b);
+ return __arm_vcvtq_n_f16_s16 (__a, __imm6);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq (float32x4_t __a, float32x4_t __b)
+__arm_vcvtq_n (int32x4_t __a, const int __imm6)
 {
- return __arm_vcmpneq_f32 (__a, __b);
+ return __arm_vcvtq_n_f32_s32 (__a, __imm6);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (float32x4_t __a, float32_t __b)
+__arm_vcvtq_n (uint16x8_t __a, const int __imm6)
 {
- return __arm_vcmpltq_n_f32 (__a, __b);
+ return __arm_vcvtq_n_f16_u16 (__a, __imm6);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq (float32x4_t __a, float32x4_t __b)
+__arm_vcvtq_n (uint32x4_t __a, const int __imm6)
 {
- return __arm_vcmpltq_f32 (__a, __b);
+ return __arm_vcvtq_n_f32_u32 (__a, __imm6);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (float32x4_t __a, float32_t __b)
+__arm_vornq (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpleq_n_f32 (__a, __b);
+ return __arm_vornq_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq (float32x4_t __a, float32x4_t __b)
+__arm_vcmulq_rot90 (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpleq_f32 (__a, __b);
+ return __arm_vcmulq_rot90_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (float32x4_t __a, float32_t __b)
+__arm_vcmulq_rot270 (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpgtq_n_f32 (__a, __b);
+ return __arm_vcmulq_rot270_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq (float32x4_t __a, float32x4_t __b)
+__arm_vcmulq_rot180 (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpgtq_f32 (__a, __b);
+ return __arm_vcmulq_rot180_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (float32x4_t __a, float32_t __b)
+__arm_vcmulq (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpgeq_n_f32 (__a, __b);
+ return __arm_vcmulq_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq (float32x4_t __a, float32x4_t __b)
+__arm_vcaddq_rot90 (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpgeq_f32 (__a, __b);
+ return __arm_vcaddq_rot90_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (float32x4_t __a, float32_t __b)
+__arm_vcaddq_rot270 (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpeqq_n_f32 (__a, __b);
+ return __arm_vcaddq_rot270_f16 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq (float32x4_t __a, float32x4_t __b)
+__arm_vbicq (float16x8_t __a, float16x8_t __b)
 {
- return __arm_vcmpeqq_f32 (__a, __b);
+ return __arm_vbicq_f16 (__a, __b);
 }
 
 __extension__ extern __inline float32x4_t
@@ -18967,20 +16598,6 @@ __arm_vbicq (float32x4_t __a, float32x4_t __b)
  return __arm_vbicq_f32 (__a, __b);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_f32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtaq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -19177,83 +16794,6 @@ __arm_vrev64q_m (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vrev64q_m_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (float16x8_t __a, float16_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_f16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -19387,83 +16927,6 @@ __arm_vrev64q_m (float32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
  return __arm_vrev64q_m_f32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpeqq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpeqq_m_n_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgeq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgeq_m_n_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpgtq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpgtq_m_n_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpleq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpleq_m_n_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpltq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpltq_m_n_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcmpneq_m (float32x4_t __a, float32_t __b, mve_pred16_t __p)
-{
- return __arm_vcmpneq_m_n_f32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -20672,26 +18135,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot270_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot270_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
 
-#define __arm_vcmpeqq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpeqq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpeqq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpeqq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
-
 #define __arm_vcaddq_rot90(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -20704,88 +18147,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcaddq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcaddq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
 
-#define __arm_vcmpeqq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpeqq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpeqq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpeqq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2));})
-
-#define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)));})
-
-#define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpleq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpleq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)));})
-
-#define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)));})
-
-#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpneq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpneq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
-
 #define __arm_vcmulq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -21125,68 +18486,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
 
-#define __arm_vcmpgtq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcmpleq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpleq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpleq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2));})
-
-#define __arm_vcmpltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2));})
-
-#define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpneq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpneq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2));})
-
 #define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -21293,40 +18592,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vpselq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vpselq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vcmpgeq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgeq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgeq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgeq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double)));})
-
 #define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
 
-#define __arm_vcmpgeq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgeq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgeq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgeq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(p1, double), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -21990,22 +19261,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -22099,22 +19354,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vcmpeqq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpeqq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpeqq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpeqq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)));})
-
 #define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -22149,62 +19388,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
 
-#define __arm_vcmpgeq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgeq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgeq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgeq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -22214,22 +19397,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vcmpeqq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpeqq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpeqq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpeqq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
 #define __arm_vbicq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vbicq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
@@ -22331,63 +19498,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vcmpgeq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgeq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgeq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgeq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
-
-#define __arm_vcmpgtq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpgtq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpgtq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpgtq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
-#define __arm_vcmpleq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpleq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpleq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpleq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
-#define __arm_vcmpltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpltq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpltq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpltq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
-#define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpneq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpneq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpneq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
 #define __arm_vdupq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -23667,46 +20777,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vaddvq_p_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddvq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vcmpcsq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpcsq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpcsq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpcsq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmpcsq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmpcsq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmpcsq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmpcsq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpcsq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2));})
-
-#define __arm_vcmphiq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmphiq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmphiq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmphiq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmphiq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmphiq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmphiq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int)));})
-
-#define __arm_vcmphiq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmphiq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmphiq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmphiq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce3(p1, int), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vcmphiq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcmphiq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcmphiq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 04/20] arm: [MVE intrinsics] factorize vrev16q vrev32q vrev64q
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
  2023-05-10 13:30 ` [PATCH 02/20] arm: [MVE intrinsics] add cmp shape Christophe Lyon
  2023-05-10 13:30 ` [PATCH 03/20] arm: [MVE intrinsics] rework vcmp Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 05/20] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vrev16q vrev32q vrev64q so that they use generic builtin
names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_V8HF, MVE_V16QI)
	(MVE_FP_VREV64Q_ONLY, MVE_FP_M_VREV64Q_ONLY, MVE_FP_VREV32Q_ONLY)
	(MVE_FP_M_VREV32Q_ONLY): New iterators.
	(mve_insn): Add vrev16q, vrev32q, vrev64q.
	* config/arm/mve.md (mve_vrev64q_f<mode>): Rename into ...
	(@mve_<mve_insn>q_f<mode>): ... this
	(mve_vrev32q_fv8hf): Rename into @mve_<mve_insn>q_f<mode>.
	(mve_vrev64q_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vrev32q_<supf><mode>): Rename into
	@mve_<mve_insn>q_<supf><mode>.
	(mve_vrev16q_<supf>v16qi): Rename into
	@mve_<mve_insn>q_<supf><mode>.
	(mve_vrev64q_m_<supf><mode>): Rename into
	@mve_<mve_insn>q_m_<supf><mode>.
	(mve_vrev32q_m_fv8hf): Rename into @mve_<mve_insn>q_m_f<mode>.
	(mve_vrev32q_m_<supf><mode>): Rename into
	@mve_<mve_insn>q_m_<supf><mode>.
	(mve_vrev64q_m_f<mode>): Rename into @mve_<mve_insn>q_m_f<mode>.
	(mve_vrev16q_m_<supf>v16qi): Rename into
	@mve_<mve_insn>q_m_<supf><mode>.
---
 gcc/config/arm/iterators.md | 25 +++++++++++++
 gcc/config/arm/mve.md       | 72 ++++++++++++++++++-------------------
 2 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index ef9fae0412b..878210471c8 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1,3 +1,4 @@
+
 ;; Code and mode itertator and attribute definitions for the ARM backend
 ;; Copyright (C) 2010-2023 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
@@ -274,6 +275,8 @@ (define_mode_iterator MVE_5 [V8HI V4SI])
 (define_mode_iterator MVE_6 [V8HI V4SI])
 (define_mode_iterator MVE_7 [V16BI V8BI V4BI V2QI])
 (define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI])
+(define_mode_iterator MVE_V8HF [V8HF])
+(define_mode_iterator MVE_V16QI [V16QI])
 
 ;;----------------------------------------------------------------------------
 ;; Code iterators
@@ -372,6 +375,22 @@ (define_int_iterator MVE_FP_M_UNARY [
 		     VRNDXQ_M_F
 		     ])
 
+(define_int_iterator MVE_FP_VREV64Q_ONLY [
+		     VREV64Q_F
+		     ])
+
+(define_int_iterator MVE_FP_M_VREV64Q_ONLY [
+		     VREV64Q_M_F
+		     ])
+
+(define_int_iterator MVE_FP_VREV32Q_ONLY [
+		     VREV32Q_F
+		     ])
+
+(define_int_iterator MVE_FP_M_VREV32Q_ONLY [
+		     VREV32Q_M_F
+		     ])
+
 ;; MVE integer binary operations.
 (define_code_iterator MVE_INT_BINARY_RTX [plus minus mult])
 
@@ -862,6 +881,12 @@ (define_int_attr mve_insn [
 		 (VQSUBQ_M_S "vqsub") (VQSUBQ_M_U "vqsub")
 		 (VQSUBQ_N_S "vqsub") (VQSUBQ_N_U "vqsub")
 		 (VQSUBQ_S "vqsub") (VQSUBQ_U "vqsub")
+		 (VREV16Q_M_S "vrev16") (VREV16Q_M_U "vrev16")
+		 (VREV16Q_S "vrev16") (VREV16Q_U "vrev16")
+		 (VREV32Q_M_S "vrev32") (VREV32Q_M_U "vrev32") (VREV32Q_M_F "vrev32")
+		 (VREV32Q_S "vrev32") (VREV32Q_U "vrev32") (VREV32Q_F "vrev32")
+		 (VREV64Q_M_S "vrev64") (VREV64Q_M_U "vrev64") (VREV64Q_M_F "vrev64")
+		 (VREV64Q_S "vrev64") (VREV64Q_U "vrev64") (VREV64Q_F "vrev64")
 		 (VRHADDQ_M_S "vrhadd") (VRHADDQ_M_U "vrhadd")
 		 (VRHADDQ_S "vrhadd") (VRHADDQ_U "vrhadd")
 		 (VRMULHQ_M_S "vrmulh") (VRMULHQ_M_U "vrmulh")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 191d1268ad6..4dfcd6c4280 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -151,14 +151,14 @@ (define_insn "@mve_<mve_insn>q_f<mode>"
 ;;
 ;; [vrev64q_f])
 ;;
-(define_insn "mve_vrev64q_f<mode>"
+(define_insn "@mve_<mve_insn>q_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=&w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")]
-	 VREV64Q_F))
+	 MVE_FP_VREV64Q_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vrev64.%#<V_sz_elem> %q0, %q1"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -193,14 +193,14 @@ (define_insn "mve_vdupq_n_f<mode>"
 ;;
 ;; [vrev32q_f])
 ;;
-(define_insn "mve_vrev32q_fv8hf"
+(define_insn "@mve_<mve_insn>q_f<mode>"
   [
-   (set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "w")]
-	 VREV32Q_F))
+   (set (match_operand:MVE_V8HF 0 "s_register_operand" "=w")
+	(unspec:MVE_V8HF [(match_operand:MVE_V8HF 1 "s_register_operand" "w")]
+	 MVE_FP_VREV32Q_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vrev32.16 %q0, %q1"
+  "<mve_insn>.<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 ;;
@@ -248,14 +248,14 @@ (define_insn "mve_vcvtq_to_f_<supf><mode>"
 ;;
 ;; [vrev64q_u, vrev64q_s])
 ;;
-(define_insn "mve_vrev64q_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=&w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
 	 VREV64Q))
   ]
   "TARGET_HAVE_MVE"
-  "vrev64.%#<V_sz_elem> %q0, %q1"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -374,14 +374,14 @@ (define_insn "@mve_vaddvq_<supf><mode>"
 ;;
 ;; [vrev32q_u, vrev32q_s])
 ;;
-(define_insn "mve_vrev32q_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:MVE_3 0 "s_register_operand" "=w")
 	(unspec:MVE_3 [(match_operand:MVE_3 1 "s_register_operand" "w")]
 	 VREV32Q))
   ]
   "TARGET_HAVE_MVE"
-  "vrev32.%#<V_sz_elem>\t%q0, %q1"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -486,14 +486,14 @@ (define_insn "mve_vmvnq_n_<supf><mode>"
 ;;
 ;; [vrev16q_u, vrev16q_s])
 ;;
-(define_insn "mve_vrev16q_<supf>v16qi"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
-   (set (match_operand:V16QI 0 "s_register_operand" "=w")
-	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")]
+   (set (match_operand:MVE_V16QI 0 "s_register_operand" "=w")
+	(unspec:MVE_V16QI [(match_operand:MVE_V16QI 1 "s_register_operand" "w")]
 	 VREV16Q))
   ]
   "TARGET_HAVE_MVE"
-  "vrev16.8 %q0, %q1"
+  "<mve_insn>.<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2364,7 +2364,7 @@ (define_insn "@mve_<mve_insn>q_m_r_<supf><mode>"
 ;;
 ;; [vrev64q_m_u, vrev64q_m_s])
 ;;
-(define_insn "mve_vrev64q_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=&w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2373,7 +2373,7 @@ (define_insn "mve_vrev64q_m_<supf><mode>"
 	 VREV64Q_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vrev64t.%#<V_sz_elem>\t%q0, %q2"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3008,23 +3008,23 @@ (define_insn "@mve_vpselq_f<mode>"
 ;;
 ;; [vrev32q_m_f])
 ;;
-(define_insn "mve_vrev32q_m_fv8hf"
+(define_insn "@mve_<mve_insn>q_m_f<mode>"
   [
-   (set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
-		       (match_operand:V8HF 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VREV32Q_M_F))
+   (set (match_operand:MVE_V8HF 0 "s_register_operand" "=w")
+	(unspec:MVE_V8HF [(match_operand:MVE_V8HF 1 "s_register_operand" "0")
+			  (match_operand:MVE_V8HF 2 "s_register_operand" "w")
+			  (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	 MVE_FP_M_VREV32Q_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vrev32t.16 %q0, %q2"
+  "vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
 ;; [vrev32q_m_s, vrev32q_m_u])
 ;;
-(define_insn "mve_vrev32q_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:MVE_3 0 "s_register_operand" "=w")
 	(unspec:MVE_3 [(match_operand:MVE_3 1 "s_register_operand" "0")
@@ -3033,23 +3033,23 @@ (define_insn "mve_vrev32q_m_<supf><mode>"
 	 VREV32Q_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vrev32t.%#<V_sz_elem>	%q0, %q2"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
 ;; [vrev64q_m_f])
 ;;
-(define_insn "mve_vrev64q_m_f<mode>"
+(define_insn "@mve_<mve_insn>q_m_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=&w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VREV64Q_M_F))
+	 MVE_FP_M_VREV64Q_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vrev64t.%#<V_sz_elem>	%q0, %q2"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3201,16 +3201,16 @@ (define_insn "mve_vcvtq_m_n_from_f_<supf><mode>"
 ;;
 ;; [vrev16q_m_u, vrev16q_m_s])
 ;;
-(define_insn "mve_vrev16q_m_<supf>v16qi"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
-   (set (match_operand:V16QI 0 "s_register_operand" "=w")
-	(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "0")
-		       (match_operand:V16QI 2 "s_register_operand" "w")
-		       (match_operand:V16BI 3 "vpr_register_operand" "Up")]
+   (set (match_operand:MVE_V16QI 0 "s_register_operand" "=w")
+	(unspec:MVE_V16QI [(match_operand:MVE_V16QI 1 "s_register_operand" "0")
+			   (match_operand:MVE_V16QI 2 "s_register_operand" "w")
+			   (match_operand:V16BI 3 "vpr_register_operand" "Up")]
 	 VREV16Q_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vrev16t.8 %q0, %q2"
+  "vpst\;<mve_insn>t.<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 05/20] arm: [MVE intrinsics] rework vrev16q vrev32q vrev64q
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (2 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 04/20] arm: [MVE intrinsics] factorize vrev16q vrev32q vrev64q Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 06/20] arm: [MVE intrinsics] factorize vdupq Christophe Lyon
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vrev16q, vrev32q, vrev64q using the new MVE builtins
framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vrev16q, vrev32q, vrev64q):
	New.
	* config/arm/arm-mve-builtins-base.def (vrev16q, vrev32q)
	(vrev64q): New.
	* config/arm/arm-mve-builtins-base.h (vrev16q, vrev32q)
	(vrev64q): New.
	* config/arm/arm_mve.h (vrev16q): Remove.
	(vrev32q): Remove.
	(vrev64q): Remove.
	(vrev64q_m): Remove.
	(vrev16q_m): Remove.
	(vrev32q_m): Remove.
	(vrev16q_x): Remove.
	(vrev32q_x): Remove.
	(vrev64q_x): Remove.
	(vrev64q_f16): Remove.
	(vrev64q_f32): Remove.
	(vrev32q_f16): Remove.
	(vrev16q_s8): Remove.
	(vrev32q_s8): Remove.
	(vrev32q_s16): Remove.
	(vrev64q_s8): Remove.
	(vrev64q_s16): Remove.
	(vrev64q_s32): Remove.
	(vrev64q_u8): Remove.
	(vrev64q_u16): Remove.
	(vrev64q_u32): Remove.
	(vrev32q_u8): Remove.
	(vrev32q_u16): Remove.
	(vrev16q_u8): Remove.
	(vrev64q_m_u8): Remove.
	(vrev64q_m_s8): Remove.
	(vrev64q_m_u16): Remove.
	(vrev64q_m_s16): Remove.
	(vrev64q_m_u32): Remove.
	(vrev64q_m_s32): Remove.
	(vrev16q_m_s8): Remove.
	(vrev32q_m_f16): Remove.
	(vrev16q_m_u8): Remove.
	(vrev32q_m_s8): Remove.
	(vrev64q_m_f16): Remove.
	(vrev32q_m_u8): Remove.
	(vrev32q_m_s16): Remove.
	(vrev64q_m_f32): Remove.
	(vrev32q_m_u16): Remove.
	(vrev16q_x_s8): Remove.
	(vrev16q_x_u8): Remove.
	(vrev32q_x_s8): Remove.
	(vrev32q_x_s16): Remove.
	(vrev32q_x_u8): Remove.
	(vrev32q_x_u16): Remove.
	(vrev64q_x_s8): Remove.
	(vrev64q_x_s16): Remove.
	(vrev64q_x_s32): Remove.
	(vrev64q_x_u8): Remove.
	(vrev64q_x_u16): Remove.
	(vrev64q_x_u32): Remove.
	(vrev32q_x_f16): Remove.
	(vrev64q_x_f16): Remove.
	(vrev64q_x_f32): Remove.
	(__arm_vrev16q_s8): Remove.
	(__arm_vrev32q_s8): Remove.
	(__arm_vrev32q_s16): Remove.
	(__arm_vrev64q_s8): Remove.
	(__arm_vrev64q_s16): Remove.
	(__arm_vrev64q_s32): Remove.
	(__arm_vrev64q_u8): Remove.
	(__arm_vrev64q_u16): Remove.
	(__arm_vrev64q_u32): Remove.
	(__arm_vrev32q_u8): Remove.
	(__arm_vrev32q_u16): Remove.
	(__arm_vrev16q_u8): Remove.
	(__arm_vrev64q_m_u8): Remove.
	(__arm_vrev64q_m_s8): Remove.
	(__arm_vrev64q_m_u16): Remove.
	(__arm_vrev64q_m_s16): Remove.
	(__arm_vrev64q_m_u32): Remove.
	(__arm_vrev64q_m_s32): Remove.
	(__arm_vrev16q_m_s8): Remove.
	(__arm_vrev16q_m_u8): Remove.
	(__arm_vrev32q_m_s8): Remove.
	(__arm_vrev32q_m_u8): Remove.
	(__arm_vrev32q_m_s16): Remove.
	(__arm_vrev32q_m_u16): Remove.
	(__arm_vrev16q_x_s8): Remove.
	(__arm_vrev16q_x_u8): Remove.
	(__arm_vrev32q_x_s8): Remove.
	(__arm_vrev32q_x_s16): Remove.
	(__arm_vrev32q_x_u8): Remove.
	(__arm_vrev32q_x_u16): Remove.
	(__arm_vrev64q_x_s8): Remove.
	(__arm_vrev64q_x_s16): Remove.
	(__arm_vrev64q_x_s32): Remove.
	(__arm_vrev64q_x_u8): Remove.
	(__arm_vrev64q_x_u16): Remove.
	(__arm_vrev64q_x_u32): Remove.
	(__arm_vrev64q_f16): Remove.
	(__arm_vrev64q_f32): Remove.
	(__arm_vrev32q_f16): Remove.
	(__arm_vrev32q_m_f16): Remove.
	(__arm_vrev64q_m_f16): Remove.
	(__arm_vrev64q_m_f32): Remove.
	(__arm_vrev32q_x_f16): Remove.
	(__arm_vrev64q_x_f16): Remove.
	(__arm_vrev64q_x_f32): Remove.
	(__arm_vrev16q): Remove.
	(__arm_vrev32q): Remove.
	(__arm_vrev64q): Remove.
	(__arm_vrev64q_m): Remove.
	(__arm_vrev16q_m): Remove.
	(__arm_vrev32q_m): Remove.
	(__arm_vrev16q_x): Remove.
	(__arm_vrev32q_x): Remove.
	(__arm_vrev64q_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   3 +
 gcc/config/arm/arm-mve-builtins-base.def |   5 +
 gcc/config/arm/arm-mve-builtins-base.h   |   3 +
 gcc/config/arm/arm_mve.h                 | 820 -----------------------
 4 files changed, 11 insertions(+), 820 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 14870f5b1aa..76294ddb7fb 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -293,6 +293,9 @@ FUNCTION_ONLY_N_NO_U_F (vqshrunbq, VQSHRUNBQ)
 FUNCTION_ONLY_N_NO_U_F (vqshruntq, VQSHRUNTQ)
 FUNCTION_WITH_M_N_NO_F (vqsubq, VQSUBQ)
 FUNCTION (vreinterpretq, vreinterpretq_impl,)
+FUNCTION_WITHOUT_N_NO_F (vrev16q, VREV16Q)
+FUNCTION_WITHOUT_N (vrev32q, VREV32Q)
+FUNCTION_WITHOUT_N (vrev64q, VREV64Q)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
 FUNCTION_ONLY_F (vrndq, VRNDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index f05cecd9160..2602cbf20e3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -72,6 +72,9 @@ DEF_MVE_FUNCTION (vqshrunbq, binary_rshift_narrow_unsigned, signed_16_32, m_or_n
 DEF_MVE_FUNCTION (vqshruntq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqsubq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_integer, none)
+DEF_MVE_FUNCTION (vrev16q, unary, integer_8, mx_or_none)
+DEF_MVE_FUNCTION (vrev32q, unary, integer_8_16, mx_or_none)
+DEF_MVE_FUNCTION (vrev64q, unary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
@@ -114,6 +117,8 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_float, none)
+DEF_MVE_FUNCTION (vrev32q, unary, float16, mx_or_none)
+DEF_MVE_FUNCTION (vrev64q, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndaq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndmq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndnq, unary, all_float, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 179e1295fb2..eaea466712a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -84,6 +84,9 @@ extern const function_base *const vqshrunbq;
 extern const function_base *const vqshruntq;
 extern const function_base *const vqsubq;
 extern const function_base *const vreinterpretq;
+extern const function_base *const vrev16q;
+extern const function_base *const vrev32q;
+extern const function_base *const vrev64q;
 extern const function_base *const vrhaddq;
 extern const function_base *const vrmulhq;
 extern const function_base *const vrndaq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 3eb8195060b..3692f600b37 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -48,9 +48,6 @@
 #define vmovlbq(__a) __arm_vmovlbq(__a)
 #define vmovltq(__a) __arm_vmovltq(__a)
 #define vmvnq(__a) __arm_vmvnq(__a)
-#define vrev16q(__a) __arm_vrev16q(__a)
-#define vrev32q(__a) __arm_vrev32q(__a)
-#define vrev64q(__a) __arm_vrev64q(__a)
 #define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
@@ -86,7 +83,6 @@
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
-#define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a, __p)
 #define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
 #define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
 #define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
@@ -118,7 +114,6 @@
 #define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
 #define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
 #define vaddlvaq_p(__a, __b, __p) __arm_vaddlvaq_p(__a, __b, __p)
-#define vrev16q_m(__inactive, __a, __p) __arm_vrev16q_m(__inactive, __a, __p)
 #define vrmlaldavhq_p(__a, __b, __p) __arm_vrmlaldavhq_p(__a, __b, __p)
 #define vrmlaldavhxq_p(__a, __b, __p) __arm_vrmlaldavhxq_p(__a, __b, __p)
 #define vrmlsldavhq_p(__a, __b, __p) __arm_vrmlsldavhq_p(__a, __b, __p)
@@ -133,7 +128,6 @@
 #define vmlsldavxq_p(__a, __b, __p) __arm_vmlsldavxq_p(__a, __b, __p)
 #define vmovlbq_m(__inactive, __a, __p) __arm_vmovlbq_m(__inactive, __a, __p)
 #define vmovltq_m(__inactive, __a, __p) __arm_vmovltq_m(__inactive, __a, __p)
-#define vrev32q_m(__inactive, __a, __p) __arm_vrev32q_m(__inactive, __a, __p)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
@@ -264,9 +258,6 @@
 #define vmovltq_x(__a, __p) __arm_vmovltq_x(__a, __p)
 #define vmvnq_x(__a, __p) __arm_vmvnq_x(__a, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
-#define vrev16q_x(__a, __p) __arm_vrev16q_x(__a, __p)
-#define vrev32q_x(__a, __p) __arm_vrev32q_x(__a, __p)
-#define vrev64q_x(__a, __p) __arm_vrev64q_x(__a, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
 #define vadciq_m(__inactive, __a, __b, __carry_out, __p) __arm_vadciq_m(__inactive, __a, __b, __carry_out, __p)
 #define vadcq(__a, __b, __carry) __arm_vadcq(__a, __b, __carry)
@@ -333,11 +324,8 @@
 #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vrev64q_f16(__a) __arm_vrev64q_f16(__a)
-#define vrev64q_f32(__a) __arm_vrev64q_f32(__a)
 #define vdupq_n_f16(__a) __arm_vdupq_n_f16(__a)
 #define vdupq_n_f32(__a) __arm_vdupq_n_f32(__a)
-#define vrev32q_f16(__a) __arm_vrev32q_f16(__a)
 #define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a)
 #define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a)
 #define vcvtq_f16_s16(__a) __arm_vcvtq_f16_s16(__a)
@@ -360,12 +348,6 @@
 #define vmvnq_s32(__a) __arm_vmvnq_s32(__a)
 #define vmvnq_n_s16( __imm) __arm_vmvnq_n_s16( __imm)
 #define vmvnq_n_s32( __imm) __arm_vmvnq_n_s32( __imm)
-#define vrev16q_s8(__a) __arm_vrev16q_s8(__a)
-#define vrev32q_s8(__a) __arm_vrev32q_s8(__a)
-#define vrev32q_s16(__a) __arm_vrev32q_s16(__a)
-#define vrev64q_s8(__a) __arm_vrev64q_s8(__a)
-#define vrev64q_s16(__a) __arm_vrev64q_s16(__a)
-#define vrev64q_s32(__a) __arm_vrev64q_s32(__a)
 #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a)
 #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a)
 #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a)
@@ -376,9 +358,6 @@
 #define vcvtmq_s32_f32(__a) __arm_vcvtmq_s32_f32(__a)
 #define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
 #define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
-#define vrev64q_u8(__a) __arm_vrev64q_u8(__a)
-#define vrev64q_u16(__a) __arm_vrev64q_u16(__a)
-#define vrev64q_u32(__a) __arm_vrev64q_u32(__a)
 #define vmvnq_u8(__a) __arm_vmvnq_u8(__a)
 #define vmvnq_u16(__a) __arm_vmvnq_u16(__a)
 #define vmvnq_u32(__a) __arm_vmvnq_u32(__a)
@@ -388,15 +367,12 @@
 #define vaddvq_u8(__a) __arm_vaddvq_u8(__a)
 #define vaddvq_u16(__a) __arm_vaddvq_u16(__a)
 #define vaddvq_u32(__a) __arm_vaddvq_u32(__a)
-#define vrev32q_u8(__a) __arm_vrev32q_u8(__a)
-#define vrev32q_u16(__a) __arm_vrev32q_u16(__a)
 #define vmovltq_u8(__a) __arm_vmovltq_u8(__a)
 #define vmovltq_u16(__a) __arm_vmovltq_u16(__a)
 #define vmovlbq_u8(__a) __arm_vmovlbq_u8(__a)
 #define vmovlbq_u16(__a) __arm_vmovlbq_u16(__a)
 #define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
 #define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
-#define vrev16q_u8(__a) __arm_vrev16q_u8(__a)
 #define vaddlvq_u32(__a) __arm_vaddlvq_u32(__a)
 #define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
 #define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
@@ -586,7 +562,6 @@
 #define vabavq_u32(__a, __b, __c) __arm_vabavq_u32(__a, __b, __c)
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
-#define vrev64q_m_u8(__inactive, __a, __p) __arm_vrev64q_m_u8(__inactive, __a, __p)
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
@@ -596,7 +571,6 @@
 #define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
-#define vrev64q_m_s8(__inactive, __a, __p) __arm_vrev64q_m_s8(__inactive, __a, __p)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
 #define vmlsdavxq_p_s8(__a, __b, __p) __arm_vmlsdavxq_p_s8(__a, __b, __p)
 #define vmlsdavq_p_s8(__a, __b, __p) __arm_vmlsdavq_p_s8(__a, __b, __p)
@@ -626,7 +600,6 @@
 #define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
 #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
-#define vrev64q_m_u16(__inactive, __a, __p) __arm_vrev64q_m_u16(__inactive, __a, __p)
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
@@ -636,7 +609,6 @@
 #define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
-#define vrev64q_m_s16(__inactive, __a, __p) __arm_vrev64q_m_s16(__inactive, __a, __p)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
 #define vmlsdavxq_p_s16(__a, __b, __p) __arm_vmlsdavxq_p_s16(__a, __b, __p)
 #define vmlsdavq_p_s16(__a, __b, __p) __arm_vmlsdavq_p_s16(__a, __b, __p)
@@ -666,7 +638,6 @@
 #define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
 #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
-#define vrev64q_m_u32(__inactive, __a, __p) __arm_vrev64q_m_u32(__inactive, __a, __p)
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
@@ -676,7 +647,6 @@
 #define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
-#define vrev64q_m_s32(__inactive, __a, __p) __arm_vrev64q_m_s32(__inactive, __a, __p)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
 #define vmlsdavxq_p_s32(__a, __b, __p) __arm_vmlsdavxq_p_s32(__a, __b, __p)
 #define vmlsdavq_p_s32(__a, __b, __p) __arm_vmlsdavq_p_s32(__a, __b, __p)
@@ -714,14 +684,11 @@
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
 #define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
-#define vrev16q_m_s8(__inactive, __a, __p) __arm_vrev16q_m_s8(__inactive, __a, __p)
-#define vrev32q_m_f16(__inactive, __a, __p) __arm_vrev32q_m_f16(__inactive, __a, __p)
 #define vrmlaldavhq_p_s32(__a, __b, __p) __arm_vrmlaldavhq_p_s32(__a, __b, __p)
 #define vrmlaldavhxq_p_s32(__a, __b, __p) __arm_vrmlaldavhxq_p_s32(__a, __b, __p)
 #define vrmlsldavhq_p_s32(__a, __b, __p) __arm_vrmlsldavhq_p_s32(__a, __b, __p)
 #define vrmlsldavhxq_p_s32(__a, __b, __p) __arm_vrmlsldavhxq_p_s32(__a, __b, __p)
 #define vaddlvaq_p_u32(__a, __b, __p) __arm_vaddlvaq_p_u32(__a, __b, __p)
-#define vrev16q_m_u8(__inactive, __a, __p) __arm_vrev16q_m_u8(__inactive, __a, __p)
 #define vrmlaldavhq_p_u32(__a, __b, __p) __arm_vrmlaldavhq_p_u32(__a, __b, __p)
 #define vmvnq_m_n_s16(__inactive,  __imm, __p) __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
 #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
@@ -748,8 +715,6 @@
 #define vmovlbq_m_s8(__inactive, __a, __p) __arm_vmovlbq_m_s8(__inactive, __a, __p)
 #define vmovltq_m_s8(__inactive, __a, __p) __arm_vmovltq_m_s8(__inactive, __a, __p)
 #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
-#define vrev32q_m_s8(__inactive, __a, __p) __arm_vrev32q_m_s8(__inactive, __a, __p)
-#define vrev64q_m_f16(__inactive, __a, __p) __arm_vrev64q_m_f16(__inactive, __a, __p)
 #define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
@@ -759,7 +724,6 @@
 #define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b, __p)
 #define vmovlbq_m_u8(__inactive, __a, __p) __arm_vmovlbq_m_u8(__inactive, __a, __p)
 #define vmovltq_m_u8(__inactive, __a, __p) __arm_vmovltq_m_u8(__inactive, __a, __p)
-#define vrev32q_m_u8(__inactive, __a, __p) __arm_vrev32q_m_u8(__inactive, __a, __p)
 #define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
@@ -785,8 +749,6 @@
 #define vmovlbq_m_s16(__inactive, __a, __p) __arm_vmovlbq_m_s16(__inactive, __a, __p)
 #define vmovltq_m_s16(__inactive, __a, __p) __arm_vmovltq_m_s16(__inactive, __a, __p)
 #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
-#define vrev32q_m_s16(__inactive, __a, __p) __arm_vrev32q_m_s16(__inactive, __a, __p)
-#define vrev64q_m_f32(__inactive, __a, __p) __arm_vrev64q_m_f32(__inactive, __a, __p)
 #define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
@@ -796,7 +758,6 @@
 #define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b, __p)
 #define vmovlbq_m_u16(__inactive, __a, __p) __arm_vmovlbq_m_u16(__inactive, __a, __p)
 #define vmovltq_m_u16(__inactive, __a, __p) __arm_vmovltq_m_u16(__inactive, __a, __p)
-#define vrev32q_m_u16(__inactive, __a, __p) __arm_vrev32q_m_u16(__inactive, __a, __p)
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
@@ -1372,18 +1333,6 @@
 #define vornq_x_u8(__a, __b, __p) __arm_vornq_x_u8(__a, __b, __p)
 #define vornq_x_u16(__a, __b, __p) __arm_vornq_x_u16(__a, __b, __p)
 #define vornq_x_u32(__a, __b, __p) __arm_vornq_x_u32(__a, __b, __p)
-#define vrev16q_x_s8(__a, __p) __arm_vrev16q_x_s8(__a, __p)
-#define vrev16q_x_u8(__a, __p) __arm_vrev16q_x_u8(__a, __p)
-#define vrev32q_x_s8(__a, __p) __arm_vrev32q_x_s8(__a, __p)
-#define vrev32q_x_s16(__a, __p) __arm_vrev32q_x_s16(__a, __p)
-#define vrev32q_x_u8(__a, __p) __arm_vrev32q_x_u8(__a, __p)
-#define vrev32q_x_u16(__a, __p) __arm_vrev32q_x_u16(__a, __p)
-#define vrev64q_x_s8(__a, __p) __arm_vrev64q_x_s8(__a, __p)
-#define vrev64q_x_s16(__a, __p) __arm_vrev64q_x_s16(__a, __p)
-#define vrev64q_x_s32(__a, __p) __arm_vrev64q_x_s32(__a, __p)
-#define vrev64q_x_u8(__a, __p) __arm_vrev64q_x_u8(__a, __p)
-#define vrev64q_x_u16(__a, __p) __arm_vrev64q_x_u16(__a, __p)
-#define vrev64q_x_u32(__a, __p) __arm_vrev64q_x_u32(__a, __p)
 #define vdupq_x_n_f16(__a, __p) __arm_vdupq_x_n_f16(__a, __p)
 #define vdupq_x_n_f32(__a, __p) __arm_vdupq_x_n_f32(__a, __p)
 #define vcaddq_rot90_x_f16(__a, __b, __p) __arm_vcaddq_rot90_x_f16(__a, __b, __p)
@@ -1438,9 +1387,6 @@
 #define vbrsrq_x_n_f32(__a, __b, __p) __arm_vbrsrq_x_n_f32(__a, __b, __p)
 #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p)
 #define vornq_x_f32(__a, __b, __p) __arm_vornq_x_f32(__a, __b, __p)
-#define vrev32q_x_f16(__a, __p) __arm_vrev32q_x_f16(__a, __p)
-#define vrev64q_x_f16(__a, __p) __arm_vrev64q_x_f16(__a, __p)
-#define vrev64q_x_f32(__a, __p) __arm_vrev64q_x_f32(__a, __p)
 #define vadciq_s32(__a, __b,  __carry_out) __arm_vadciq_s32(__a, __b,  __carry_out)
 #define vadciq_u32(__a, __b,  __carry_out) __arm_vadciq_u32(__a, __b,  __carry_out)
 #define vadciq_m_s32(__inactive, __a, __b,  __carry_out, __p) __arm_vadciq_m_s32(__inactive, __a, __b,  __carry_out, __p)
@@ -1719,69 +1665,6 @@ __arm_vmvnq_n_s32 (const int32_t __imm)
   return __builtin_mve_vmvnq_n_sv4si (__imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vrev16q_sv16qi (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vrev32q_sv16qi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vrev32q_sv8hi (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vrev64q_sv16qi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vrev64q_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_s32 (int32x4_t __a)
-{
-  return __builtin_mve_vrev64q_sv4si (__a);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vrev64q_uv16qi (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vrev64q_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_u32 (uint32x4_t __a)
-{
-  return __builtin_mve_vrev64q_uv4si (__a);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_u8 (uint8x16_t __a)
@@ -1845,20 +1728,6 @@ __arm_vaddvq_u32 (uint32x4_t __a)
   return __builtin_mve_vaddvq_uv4si (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vrev32q_uv16qi (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vrev32q_uv8hi (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovltq_u8 (uint8x16_t __a)
@@ -1901,13 +1770,6 @@ __arm_vmvnq_n_u32 (const int __imm)
   return __builtin_mve_vmvnq_n_uv4si (__imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vrev16q_uv16qi (__a);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq_u32 (uint32x4_t __a)
@@ -2927,13 +2789,6 @@ __arm_vpselq_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
@@ -2997,13 +2852,6 @@ __arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -3207,13 +3055,6 @@ __arm_vpselq_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
@@ -3277,13 +3118,6 @@ __arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -3487,13 +3321,6 @@ __arm_vpselq_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
@@ -3557,13 +3384,6 @@ __arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -3795,13 +3615,6 @@ __arm_vaddlvaq_p_s32 (int64_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vaddlvaq_p_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev16q_m_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -3837,13 +3650,6 @@ __arm_vaddlvaq_p_u32 (uint64_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vaddlvaq_p_uv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev16q_m_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -3928,13 +3734,6 @@ __arm_vmovltq_m_s8 (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmovltq_m_sv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -3970,13 +3769,6 @@ __arm_vmovltq_m_u8 (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
   return __builtin_mve_vmovltq_m_uv16qi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -4054,13 +3846,6 @@ __arm_vmovltq_m_s16 (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmovltq_m_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_sv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -4096,13 +3881,6 @@ __arm_vmovltq_m_u16 (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vmovltq_m_uv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_uv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -7659,90 +7437,6 @@ __arm_vornq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vornq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev16q_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev16q_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vadciq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
@@ -8463,20 +8157,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
   __builtin_mve_vst4qv4sf (__addr, __rv.__o);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vrev64q_fv8hf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vrev64q_fv4sf (__a);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vdupq_n_f16 (float16_t __a)
@@ -8491,13 +8171,6 @@ __arm_vdupq_n_f32 (float32_t __a)
   return __builtin_mve_vdupq_n_fv4sf (__a);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vrev32q_fv8hf (__a);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvttq_f32_f16 (float16x8_t __a)
@@ -8961,13 +8634,6 @@ __arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __
   return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m_f16 (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_fv8hf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
@@ -9066,13 +8732,6 @@ __arm_vpselq_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_fv8hf (__a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_f16 (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_fv8hf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -9199,13 +8858,6 @@ __arm_vpselq_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_fv4sf (__a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m_f32 (float32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_fv4sf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -10164,27 +9816,6 @@ __arm_vornq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vornq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev32q_m_fv8hf (__arm_vuninitializedq_f16 (), __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_fv8hf (__arm_vuninitializedq_f16 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vrev64q_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vld4q_f16 (float16_t const * __addr)
@@ -10443,69 +10074,6 @@ __arm_vmvnq (int32x4_t __a)
  return __arm_vmvnq_s32 (__a);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q (int8x16_t __a)
-{
- return __arm_vrev16q_s8 (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q (int8x16_t __a)
-{
- return __arm_vrev32q_s8 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q (int16x8_t __a)
-{
- return __arm_vrev32q_s16 (__a);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (int8x16_t __a)
-{
- return __arm_vrev64q_s8 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (int16x8_t __a)
-{
- return __arm_vrev64q_s16 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (int32x4_t __a)
-{
- return __arm_vrev64q_s32 (__a);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (uint8x16_t __a)
-{
- return __arm_vrev64q_u8 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (uint16x8_t __a)
-{
- return __arm_vrev64q_u16 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (uint32x4_t __a)
-{
- return __arm_vrev64q_u32 (__a);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq (uint8x16_t __a)
@@ -10569,20 +10137,6 @@ __arm_vaddvq (uint32x4_t __a)
  return __arm_vaddvq_u32 (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q (uint8x16_t __a)
-{
- return __arm_vrev32q_u8 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q (uint16x8_t __a)
-{
- return __arm_vrev32q_u16 (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovltq (uint8x16_t __a)
@@ -10611,13 +10165,6 @@ __arm_vmovlbq (uint16x8_t __a)
  return __arm_vmovlbq_u16 (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q (uint8x16_t __a)
-{
- return __arm_vrev16q_u8 (__a);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq (uint32x4_t __a)
@@ -11556,13 +11103,6 @@ __arm_vpselq (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
  return __arm_vpselq_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
@@ -11626,13 +11166,6 @@ __arm_vsliq (uint8x16_t __a, uint8x16_t __b, const int __imm)
  return __arm_vsliq_n_u8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
@@ -11836,13 +11369,6 @@ __arm_vpselq (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vpselq_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_u16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
@@ -11906,13 +11432,6 @@ __arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
  return __arm_vsliq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_s16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
@@ -12116,13 +11635,6 @@ __arm_vpselq (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vpselq_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_u32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
@@ -12186,13 +11698,6 @@ __arm_vsliq (uint32x4_t __a, uint32x4_t __b, const int __imm)
  return __arm_vsliq_n_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_s32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
@@ -12424,13 +11929,6 @@ __arm_vaddlvaq_p (int64_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vaddlvaq_p_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev16q_m_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -12466,13 +11964,6 @@ __arm_vaddlvaq_p (uint64_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vaddlvaq_p_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev16q_m_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -12557,13 +12048,6 @@ __arm_vmovltq_m (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
  return __arm_vmovltq_m_s8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -12599,13 +12083,6 @@ __arm_vmovltq_m (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
  return __arm_vmovltq_m_u8 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -12683,13 +12160,6 @@ __arm_vmovltq_m (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
  return __arm_vmovltq_m_s16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_s16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -12725,13 +12195,6 @@ __arm_vmovltq_m (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
  return __arm_vmovltq_m_u16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_u16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -15791,90 +15254,6 @@ __arm_vornq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vornq_x_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev16q_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev16q_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev16q_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_x_s16 (__a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_x_u16 (__a, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_s16 (__a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_s32 (__a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_u16 (__a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_u32 (__a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vadciq (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
@@ -16367,20 +15746,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value)
  __arm_vst4q_f32 (__addr, __value);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (float16x8_t __a)
-{
- return __arm_vrev64q_f16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q (float32x4_t __a)
-{
- return __arm_vrev64q_f32 (__a);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vdupq_n (float16_t __a)
@@ -16395,13 +15760,6 @@ __arm_vdupq_n (float32_t __a)
  return __arm_vdupq_n_f32 (__a);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q (float16x8_t __a)
-{
- return __arm_vrev32q_f16 (__a);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvttq_f32 (float16x8_t __a)
@@ -16682,13 +16040,6 @@ __arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_m (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_m_f16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcmlaq (float16x8_t __a, float16x8_t __b, float16x8_t __c)
@@ -16787,13 +16138,6 @@ __arm_vpselq (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
  return __arm_vpselq_f16 (__a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (float16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_f16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -16920,13 +16264,6 @@ __arm_vpselq (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
  return __arm_vpselq_f32 (__a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_m (float32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_m_f32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -17627,27 +16964,6 @@ __arm_vornq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
  return __arm_vornq_x_f32 (__a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev32q_x (float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev32q_x_f16 (__a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_f16 (__a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrev64q_x (float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vrev64q_x_f32 (__a, __p);
-}
-
 __extension__ extern __inline float16x8x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vld4q (float16_t const * __addr)
@@ -18021,30 +17337,11 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
   int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
 
-#define __arm_vrev64q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 (__ARM_mve_coerce(__p0, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vrev64q_f16 (__ARM_mve_coerce(__p0, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vrev64q_f32 (__ARM_mve_coerce(__p0, float32x4_t)));})
-
 #define __arm_vdupq_n(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vdupq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t]: __arm_vdupq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t)));})
 
-#define __arm_vrev32q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev32q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev32q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev32q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev32q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vrev32q_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
-
 #define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
@@ -18053,11 +17350,6 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
 
-#define __arm_vrev16q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)));})
-
 #define __arm_vmvnq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vmvnq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
@@ -18557,27 +17849,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(p2, double)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(p2, double)));})
 
-#define __arm_vrev64q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev64q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrev64q_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrev64q_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev64q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vrev64q_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vrev64q_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vrev32q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev32q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrev32q_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev32q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev32q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vrev32q_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2));})
-
 #define __arm_vpselq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -18592,12 +17863,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vpselq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vpselq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -19144,25 +18409,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vrev32q_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev32q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev32q_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev32q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev32q_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vrev32q_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), p2));})
-
-#define __arm_vrev64q_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vrev64q_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vrev64q_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), p2));})
-
 #define __arm_vcmulq_rot90_x(p1,p2,p3)  ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -19240,27 +18486,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vrev16q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)));})
-
-#define __arm_vrev32q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev32q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev32q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev32q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev32q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)));})
-
-#define __arm_vrev64q(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
 #define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -19420,16 +18645,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqrdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqrdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vrev64q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev64q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrev64q_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrev64q_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev64q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev64q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrev64q_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -19582,20 +18797,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
 
-#define __arm_vrev32q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev32q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vrev32q_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev32q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vrev32q_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vrev16q_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vrev16q_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vrev16q_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2));})
-
 #define __arm_vmovltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -19994,22 +19195,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vrev32q_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev32q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev32q_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev32q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev32q_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vrev64q_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vbicq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -20090,11 +19275,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vrev16q_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev16q_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev16q_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2));})
-
 #define __arm_vdwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u8 ((uint32_t) __p1, p2, p3, p4), \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 06/20] arm: [MVE intrinsics] factorize vdupq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (3 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 05/20] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 07/20] arm: [MVE intrinsics] add unary_n shape Christophe Lyon
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vdup builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_FP_M_N_VDUPQ_ONLY)
	(MVE_FP_N_VDUPQ_ONLY): New.
	(mve_insn): Add vdupq.
	* config/arm/mve.md (mve_vdupq_n_f<mode>): Rename into ...
	(@mve_<mve_insn>q_n_f<mode>): ... this.
	(mve_vdupq_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vdupq_m_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
	(mve_vdupq_m_n_f<mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_f<mode>): ... this.
---
 gcc/config/arm/iterators.md | 10 ++++++++++
 gcc/config/arm/mve.md       | 20 ++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 878210471c8..aff4e7fb814 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -391,6 +391,14 @@ (define_int_iterator MVE_FP_M_VREV32Q_ONLY [
 		     VREV32Q_M_F
 		     ])
 
+(define_int_iterator MVE_FP_M_N_VDUPQ_ONLY [
+		     VDUPQ_M_N_F
+		     ])
+
+(define_int_iterator MVE_FP_N_VDUPQ_ONLY [
+		     VDUPQ_N_F
+		     ])
+
 ;; MVE integer binary operations.
 (define_code_iterator MVE_INT_BINARY_RTX [plus minus mult])
 
@@ -762,6 +770,8 @@ (define_int_attr mve_insn [
 		 (VCLSQ_S "vcls")
 		 (VCLZQ_M_S "vclz") (VCLZQ_M_U "vclz")
 		 (VCREATEQ_S "vcreate") (VCREATEQ_U "vcreate") (VCREATEQ_F "vcreate")
+		 (VDUPQ_M_N_S "vdup") (VDUPQ_M_N_U "vdup") (VDUPQ_M_N_F "vdup")
+		 (VDUPQ_N_S "vdup") (VDUPQ_N_U "vdup") (VDUPQ_N_F "vdup")
 		 (VEORQ_M_S "veor") (VEORQ_M_U "veor") (VEORQ_M_F "veor")
 		 (VHADDQ_M_N_S "vhadd") (VHADDQ_M_N_U "vhadd")
 		 (VHADDQ_M_S "vhadd") (VHADDQ_M_U "vhadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 4dfcd6c4280..0c4e4e60bc4 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -179,14 +179,14 @@ (define_insn "mve_v<absneg_str>q_f<mode>"
 ;;
 ;; [vdupq_n_f])
 ;;
-(define_insn "mve_vdupq_n_f<mode>"
+(define_insn "@mve_<mve_insn>q_n_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
-	 VDUPQ_N_F))
+	 MVE_FP_N_VDUPQ_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vdup.%#<V_sz_elem>\t%q0, %1"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -310,14 +310,14 @@ (define_expand "mve_vmvnq_s<mode>"
 ;;
 ;; [vdupq_n_u, vdupq_n_s])
 ;;
-(define_insn "mve_vdupq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
 	 VDUPQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vdup.%#<V_sz_elem>\t%q0, %1"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2006,7 +2006,7 @@ (define_insn "@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>"
 ;;
 ;; [vdupq_m_n_s, vdupq_m_n_u])
 ;;
-(define_insn "mve_vdupq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2015,7 +2015,7 @@ (define_insn "mve_vdupq_m_n_<supf><mode>"
 	 VDUPQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vdupt.%#<V_sz_elem>\t%q0, %2"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2666,16 +2666,16 @@ (define_insn "mve_vcvttq_m_f32_f16v4sf"
 ;;
 ;; [vdupq_m_n_f])
 ;;
-(define_insn "mve_vdupq_m_n_f<mode>"
+(define_insn "@mve_<mve_insn>q_m_n_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
 		       (match_operand:<V_elem> 2 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VDUPQ_M_N_F))
+	 MVE_FP_M_N_VDUPQ_ONLY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vdupt.%#<V_sz_elem>\t%q0, %2"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 07/20] arm: [MVE intrinsics] add unary_n shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (4 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 06/20] arm: [MVE intrinsics] factorize vdupq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 08/20] arm: [MVE intrinsics] rework vdupq Christophe Lyon
                   ` (13 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_n shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_n): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_n): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 53 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 54 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ea0112b3e99..c78683aaba2 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1094,6 +1094,59 @@ struct unary_convert_def : public overloaded_base<1>
 };
 SHAPE (unary_convert)
 
+/* <T0>_t vfoo[_n]_t0(<S0>_t)
+
+   Example: vdupq.
+   int16x8_t [__arm_]vdupq_n_s16(int16_t a)
+   int16x8_t [__arm_]vdupq_m[_n_s16](int16x8_t inactive, int16_t a, mve_pred16_t p)
+   int16x8_t [__arm_]vdupq_x_n_s16(int16_t a, mve_pred16_t p)  */
+struct unary_n_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int, enum predication_index pred,
+			  enum mode_suffix_index) const override
+  {
+    return pred != PRED_m;
+  }
+
+  bool
+  explicit_mode_suffix_p (enum predication_index pred,
+			  enum mode_suffix_index mode) const override
+  {
+    return ((mode == MODE_n)
+	    && (pred != PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index pred, enum mode_suffix_index mode)
+    const override
+  {
+    switch (mode)
+      {
+      case MODE_n:
+	return pred != PRED_m;
+
+      default:
+	gcc_unreachable ();
+      }
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_unary_n ();
+  }
+};
+SHAPE (unary_n)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 59c4dc39c39..a35faec2542 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -54,6 +54,7 @@ namespace arm_mve
     extern const function_shape *const inherent;
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
+    extern const function_shape *const unary_n;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 08/20] arm: [MVE intrinsics] rework vdupq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (5 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 07/20] arm: [MVE intrinsics] add unary_n shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 09/20] arm: [MVE intrinsics] factorize vaddvq Christophe Lyon
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vdupq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (FUNCTION_ONLY_N): New.
	(vdupq): New.
	* config/arm/arm-mve-builtins-base.def (vdupq): New.
	* config/arm/arm-mve-builtins-base.h: (vdupq): New.
	* config/arm/arm_mve.h (vdupq_n): Remove.
	(vdupq_m): Remove.
	(vdupq_n_f16): Remove.
	(vdupq_n_f32): Remove.
	(vdupq_n_s8): Remove.
	(vdupq_n_s16): Remove.
	(vdupq_n_s32): Remove.
	(vdupq_n_u8): Remove.
	(vdupq_n_u16): Remove.
	(vdupq_n_u32): Remove.
	(vdupq_m_n_u8): Remove.
	(vdupq_m_n_s8): Remove.
	(vdupq_m_n_u16): Remove.
	(vdupq_m_n_s16): Remove.
	(vdupq_m_n_u32): Remove.
	(vdupq_m_n_s32): Remove.
	(vdupq_m_n_f16): Remove.
	(vdupq_m_n_f32): Remove.
	(vdupq_x_n_s8): Remove.
	(vdupq_x_n_s16): Remove.
	(vdupq_x_n_s32): Remove.
	(vdupq_x_n_u8): Remove.
	(vdupq_x_n_u16): Remove.
	(vdupq_x_n_u32): Remove.
	(vdupq_x_n_f16): Remove.
	(vdupq_x_n_f32): Remove.
	(__arm_vdupq_n_s8): Remove.
	(__arm_vdupq_n_s16): Remove.
	(__arm_vdupq_n_s32): Remove.
	(__arm_vdupq_n_u8): Remove.
	(__arm_vdupq_n_u16): Remove.
	(__arm_vdupq_n_u32): Remove.
	(__arm_vdupq_m_n_u8): Remove.
	(__arm_vdupq_m_n_s8): Remove.
	(__arm_vdupq_m_n_u16): Remove.
	(__arm_vdupq_m_n_s16): Remove.
	(__arm_vdupq_m_n_u32): Remove.
	(__arm_vdupq_m_n_s32): Remove.
	(__arm_vdupq_x_n_s8): Remove.
	(__arm_vdupq_x_n_s16): Remove.
	(__arm_vdupq_x_n_s32): Remove.
	(__arm_vdupq_x_n_u8): Remove.
	(__arm_vdupq_x_n_u16): Remove.
	(__arm_vdupq_x_n_u32): Remove.
	(__arm_vdupq_n_f16): Remove.
	(__arm_vdupq_n_f32): Remove.
	(__arm_vdupq_m_n_f16): Remove.
	(__arm_vdupq_m_n_f32): Remove.
	(__arm_vdupq_x_n_f16): Remove.
	(__arm_vdupq_x_n_f32): Remove.
	(__arm_vdupq_n): Remove.
	(__arm_vdupq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  10 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 333 -----------------------
 4 files changed, 13 insertions(+), 333 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 76294ddb7fb..cb572130c2b 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -176,6 +176,15 @@ namespace arm_mve {
     UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,				\
     -1, -1, -1))
 
+  /* Helper for builtins with only unspec codes, _m predicated
+     overrides, only _n version.  */
+#define FUNCTION_ONLY_N(NAME, UNSPEC) FUNCTION				\
+  (NAME, unspec_mve_function_exact_insn,				\
+   (-1, -1, -1,								\
+    UNSPEC##_N_S, UNSPEC##_N_U, UNSPEC##_N_F,				\
+    -1, -1, -1,								\
+    UNSPEC##_M_N_S, UNSPEC##_M_N_U, UNSPEC##_M_N_F))
+
   /* Helper for builtins with only unspec codes, _m predicated
      overrides, only _n version, no floating-point.  */
 #define FUNCTION_ONLY_N_NO_F(NAME, UNSPEC) FUNCTION			\
@@ -247,6 +256,7 @@ FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
 FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
+FUNCTION_ONLY_N (vdupq, VDUPQ)
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
 FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
 FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 2602cbf20e3..30e6aa1e1e6 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -33,6 +33,7 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
+DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
@@ -104,6 +105,7 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
+DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index eaea466712a..3dc9114045f 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -38,6 +38,7 @@ extern const function_base *const vcmpleq;
 extern const function_base *const vcmpltq;
 extern const function_base *const vcmpneq;
 extern const function_base *const vcreateq;
+extern const function_base *const vdupq;
 extern const function_base *const veorq;
 extern const function_base *const vhaddq;
 extern const function_base *const vhsubq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 3692f600b37..c3d18e4cc6f 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,7 +42,6 @@
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vdupq_n(__a) __arm_vdupq_n(__a)
 #define vaddlvq(__a) __arm_vaddlvq(__a)
 #define vaddvq(__a) __arm_vaddvq(__a)
 #define vmovlbq(__a) __arm_vmovlbq(__a)
@@ -92,7 +91,6 @@
 #define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
 #define vmladavq_p(__a, __b, __p) __arm_vmladavq_p(__a, __b, __p)
 #define vmladavaq(__a, __b, __c) __arm_vmladavaq(__a, __b, __c)
-#define vdupq_m(__inactive, __a, __p) __arm_vdupq_m(__inactive, __a, __p)
 #define vaddvaq_p(__a, __b, __p) __arm_vaddvaq_p(__a, __b, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
@@ -324,17 +322,12 @@
 #define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
 #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
 #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vdupq_n_f16(__a) __arm_vdupq_n_f16(__a)
-#define vdupq_n_f32(__a) __arm_vdupq_n_f32(__a)
 #define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a)
 #define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a)
 #define vcvtq_f16_s16(__a) __arm_vcvtq_f16_s16(__a)
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
-#define vdupq_n_s8(__a) __arm_vdupq_n_s8(__a)
-#define vdupq_n_s16(__a) __arm_vdupq_n_s16(__a)
-#define vdupq_n_s32(__a) __arm_vdupq_n_s32(__a)
 #define vaddlvq_s32(__a) __arm_vaddlvq_s32(__a)
 #define vaddvq_s8(__a) __arm_vaddvq_s8(__a)
 #define vaddvq_s16(__a) __arm_vaddvq_s16(__a)
@@ -361,9 +354,6 @@
 #define vmvnq_u8(__a) __arm_vmvnq_u8(__a)
 #define vmvnq_u16(__a) __arm_vmvnq_u16(__a)
 #define vmvnq_u32(__a) __arm_vmvnq_u32(__a)
-#define vdupq_n_u8(__a) __arm_vdupq_n_u8(__a)
-#define vdupq_n_u16(__a) __arm_vdupq_n_u16(__a)
-#define vdupq_n_u32(__a) __arm_vdupq_n_u32(__a)
 #define vaddvq_u8(__a) __arm_vaddvq_u8(__a)
 #define vaddvq_u16(__a) __arm_vaddvq_u16(__a)
 #define vaddvq_u32(__a) __arm_vaddvq_u32(__a)
@@ -567,7 +557,6 @@
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
 #define vmladavq_p_u8(__a, __b, __p) __arm_vmladavq_p_u8(__a, __b, __p)
 #define vmladavaq_u8(__a, __b, __c) __arm_vmladavaq_u8(__a, __b, __c)
-#define vdupq_m_n_u8(__inactive, __a, __p) __arm_vdupq_m_n_u8(__inactive, __a, __p)
 #define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
@@ -576,7 +565,6 @@
 #define vmlsdavq_p_s8(__a, __b, __p) __arm_vmlsdavq_p_s8(__a, __b, __p)
 #define vmladavxq_p_s8(__a, __b, __p) __arm_vmladavxq_p_s8(__a, __b, __p)
 #define vmladavq_p_s8(__a, __b, __p) __arm_vmladavq_p_s8(__a, __b, __p)
-#define vdupq_m_n_s8(__inactive, __a, __p) __arm_vdupq_m_n_s8(__inactive, __a, __p)
 #define vaddvaq_p_s8(__a, __b, __p) __arm_vaddvaq_p_s8(__a, __b, __p)
 #define vqrdmlsdhxq_s8(__inactive, __a, __b) __arm_vqrdmlsdhxq_s8(__inactive, __a, __b)
 #define vqrdmlsdhq_s8(__inactive, __a, __b) __arm_vqrdmlsdhq_s8(__inactive, __a, __b)
@@ -605,7 +593,6 @@
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
 #define vmladavq_p_u16(__a, __b, __p) __arm_vmladavq_p_u16(__a, __b, __p)
 #define vmladavaq_u16(__a, __b, __c) __arm_vmladavaq_u16(__a, __b, __c)
-#define vdupq_m_n_u16(__inactive, __a, __p) __arm_vdupq_m_n_u16(__inactive, __a, __p)
 #define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
@@ -614,7 +601,6 @@
 #define vmlsdavq_p_s16(__a, __b, __p) __arm_vmlsdavq_p_s16(__a, __b, __p)
 #define vmladavxq_p_s16(__a, __b, __p) __arm_vmladavxq_p_s16(__a, __b, __p)
 #define vmladavq_p_s16(__a, __b, __p) __arm_vmladavq_p_s16(__a, __b, __p)
-#define vdupq_m_n_s16(__inactive, __a, __p) __arm_vdupq_m_n_s16(__inactive, __a, __p)
 #define vaddvaq_p_s16(__a, __b, __p) __arm_vaddvaq_p_s16(__a, __b, __p)
 #define vqrdmlsdhxq_s16(__inactive, __a, __b) __arm_vqrdmlsdhxq_s16(__inactive, __a, __b)
 #define vqrdmlsdhq_s16(__inactive, __a, __b) __arm_vqrdmlsdhq_s16(__inactive, __a, __b)
@@ -643,7 +629,6 @@
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
 #define vmladavq_p_u32(__a, __b, __p) __arm_vmladavq_p_u32(__a, __b, __p)
 #define vmladavaq_u32(__a, __b, __c) __arm_vmladavaq_u32(__a, __b, __c)
-#define vdupq_m_n_u32(__inactive, __a, __p) __arm_vdupq_m_n_u32(__inactive, __a, __p)
 #define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
@@ -652,7 +637,6 @@
 #define vmlsdavq_p_s32(__a, __b, __p) __arm_vmlsdavq_p_s32(__a, __b, __p)
 #define vmladavxq_p_s32(__a, __b, __p) __arm_vmladavxq_p_s32(__a, __b, __p)
 #define vmladavq_p_s32(__a, __b, __p) __arm_vmladavq_p_s32(__a, __b, __p)
-#define vdupq_m_n_s32(__inactive, __a, __p) __arm_vdupq_m_n_s32(__inactive, __a, __p)
 #define vaddvaq_p_s32(__a, __b, __p) __arm_vaddvaq_p_s32(__a, __b, __p)
 #define vqrdmlsdhxq_s32(__inactive, __a, __b) __arm_vqrdmlsdhxq_s32(__inactive, __a, __b)
 #define vqrdmlsdhq_s32(__inactive, __a, __b) __arm_vqrdmlsdhq_s32(__inactive, __a, __b)
@@ -707,7 +691,6 @@
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
 #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p)
-#define vdupq_m_n_f16(__inactive, __a, __p) __arm_vdupq_m_n_f16(__inactive, __a, __p)
 #define vmlaldavq_p_s16(__a, __b, __p) __arm_vmlaldavq_p_s16(__a, __b, __p)
 #define vmlaldavxq_p_s16(__a, __b, __p) __arm_vmlaldavxq_p_s16(__a, __b, __p)
 #define vmlsldavq_p_s16(__a, __b, __p) __arm_vmlsldavq_p_s16(__a, __b, __p)
@@ -741,7 +724,6 @@
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
 #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p)
-#define vdupq_m_n_f32(__inactive, __a, __p) __arm_vdupq_m_n_f32(__inactive, __a, __p)
 #define vmlaldavq_p_s32(__a, __b, __p) __arm_vmlaldavq_p_s32(__a, __b, __p)
 #define vmlaldavxq_p_s32(__a, __b, __p) __arm_vmlaldavxq_p_s32(__a, __b, __p)
 #define vmlsldavq_p_s32(__a, __b, __p) __arm_vmlsldavq_p_s32(__a, __b, __p)
@@ -1257,12 +1239,6 @@
 #define viwdupq_x_wb_u8(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u8(__a, __b,  __imm, __p)
 #define viwdupq_x_wb_u16(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u16(__a, __b,  __imm, __p)
 #define viwdupq_x_wb_u32(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u32(__a, __b,  __imm, __p)
-#define vdupq_x_n_s8(__a, __p) __arm_vdupq_x_n_s8(__a, __p)
-#define vdupq_x_n_s16(__a, __p) __arm_vdupq_x_n_s16(__a, __p)
-#define vdupq_x_n_s32(__a, __p) __arm_vdupq_x_n_s32(__a, __p)
-#define vdupq_x_n_u8(__a, __p) __arm_vdupq_x_n_u8(__a, __p)
-#define vdupq_x_n_u16(__a, __p) __arm_vdupq_x_n_u16(__a, __p)
-#define vdupq_x_n_u32(__a, __p) __arm_vdupq_x_n_u32(__a, __p)
 #define vmullbq_poly_x_p8(__a, __b, __p) __arm_vmullbq_poly_x_p8(__a, __b, __p)
 #define vmullbq_poly_x_p16(__a, __b, __p) __arm_vmullbq_poly_x_p16(__a, __b, __p)
 #define vmullbq_int_x_s8(__a, __b, __p) __arm_vmullbq_int_x_s8(__a, __b, __p)
@@ -1333,8 +1309,6 @@
 #define vornq_x_u8(__a, __b, __p) __arm_vornq_x_u8(__a, __b, __p)
 #define vornq_x_u16(__a, __b, __p) __arm_vornq_x_u16(__a, __b, __p)
 #define vornq_x_u32(__a, __b, __p) __arm_vornq_x_u32(__a, __b, __p)
-#define vdupq_x_n_f16(__a, __p) __arm_vdupq_x_n_f16(__a, __p)
-#define vdupq_x_n_f32(__a, __p) __arm_vdupq_x_n_f32(__a, __p)
 #define vcaddq_rot90_x_f16(__a, __b, __p) __arm_vcaddq_rot90_x_f16(__a, __b, __p)
 #define vcaddq_rot90_x_f32(__a, __b, __p) __arm_vcaddq_rot90_x_f32(__a, __b, __p)
 #define vcaddq_rot270_x_f16(__a, __b, __p) __arm_vcaddq_rot270_x_f16(__a, __b, __p)
@@ -1553,27 +1527,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_s8 (int8_t __a)
-{
-  return __builtin_mve_vdupq_n_sv16qi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_s16 (int16_t __a)
-{
-  return __builtin_mve_vdupq_n_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_s32 (int32_t __a)
-{
-  return __builtin_mve_vdupq_n_sv4si (__a);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq_s32 (int32x4_t __a)
@@ -1686,27 +1639,6 @@ __arm_vmvnq_u32 (uint32x4_t __a)
   return __builtin_mve_vmvnq_uv4si (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_u8 (uint8_t __a)
-{
-  return __builtin_mve_vdupq_n_uv16qi (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_u16 (uint16_t __a)
-{
-  return __builtin_mve_vdupq_n_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_u32 (uint32_t __a)
-{
-  return __builtin_mve_vdupq_n_uv4si (__a);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvq_u8 (uint8x16_t __a)
@@ -2824,13 +2756,6 @@ __arm_vmladavaq_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
   return __builtin_mve_vmladavaq_uv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_u8 (uint8x16_t __inactive, uint8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_u8 (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -2887,13 +2812,6 @@ __arm_vmladavq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_s8 (int8x16_t __inactive, int8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_s8 (int32_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -3090,13 +3008,6 @@ __arm_vmladavaq_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
   return __builtin_mve_vmladavaq_uv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_u16 (uint16x8_t __inactive, uint16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_u16 (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
@@ -3153,13 +3064,6 @@ __arm_vmladavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_s16 (int16x8_t __inactive, int16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_s16 (int32_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -3356,13 +3260,6 @@ __arm_vmladavaq_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
   return __builtin_mve_vmladavaq_uv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_u32 (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -3419,13 +3316,6 @@ __arm_vmladavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_s32 (int32x4_t __inactive, int32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p_s32 (int32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -6905,48 +6795,6 @@ __arm_viwdupq_x_wb_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16
   return __res;
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_s8 (int8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_s16 (int16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_s32 (int32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_u8 (uint8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_u16 (uint16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_u32 (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_poly_x_p8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -8157,20 +8005,6 @@ __arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
   __builtin_mve_vst4qv4sf (__addr, __rv.__o);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_f16 (float16_t __a)
-{
-  return __builtin_mve_vdupq_n_fv8hf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n_f32 (float32_t __a)
-{
-  return __builtin_mve_vdupq_n_fv4sf (__a);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvttq_f32_f16 (float16x8_t __a)
@@ -8718,13 +8552,6 @@ __arm_vcvtq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtq_m_from_f_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_f16 (float16x8_t __inactive, float16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_fv8hf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpselq_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -8844,13 +8671,6 @@ __arm_vcvtq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtq_m_from_f_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m_n_f32 (float32x4_t __inactive, float32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_fv4sf (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpselq_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -9438,20 +9258,6 @@ __arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, flo
   *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_f16 (float16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_fv8hf (__arm_vuninitializedq_f16 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_x_n_f32 (float32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vdupq_m_n_fv4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -9976,27 +9782,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (int8_t __a)
-{
- return __arm_vdupq_n_s8 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (int16_t __a)
-{
- return __arm_vdupq_n_s16 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (int32_t __a)
-{
- return __arm_vdupq_n_s32 (__a);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddlvq (int32x4_t __a)
@@ -10095,27 +9880,6 @@ __arm_vmvnq (uint32x4_t __a)
  return __arm_vmvnq_u32 (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (uint8_t __a)
-{
- return __arm_vdupq_n_u8 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (uint16_t __a)
-{
- return __arm_vdupq_n_u16 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (uint32_t __a)
-{
- return __arm_vdupq_n_u32 (__a);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvq (uint8x16_t __a)
@@ -11138,13 +10902,6 @@ __arm_vmladavaq (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
  return __arm_vmladavaq_u8 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (uint8x16_t __inactive, uint8_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -11201,13 +10958,6 @@ __arm_vmladavq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (int8x16_t __inactive, int8_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (int32_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -11404,13 +11154,6 @@ __arm_vmladavaq (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
  return __arm_vmladavaq_u16 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (uint16x8_t __inactive, uint16_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_u16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
@@ -11467,13 +11210,6 @@ __arm_vmladavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (int16x8_t __inactive, int16_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_s16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (int32_t __a, int16x8_t __b, mve_pred16_t __p)
@@ -11670,13 +11406,6 @@ __arm_vmladavaq (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
  return __arm_vmladavaq_u32 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (uint32x4_t __inactive, uint32_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_u32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
@@ -11733,13 +11462,6 @@ __arm_vmladavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (int32x4_t __inactive, int32_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_s32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_p (int32_t __a, int32x4_t __b, mve_pred16_t __p)
@@ -15746,20 +15468,6 @@ __arm_vst4q (float32_t * __addr, float32x4x4_t __value)
  __arm_vst4q_f32 (__addr, __value);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (float16_t __a)
-{
- return __arm_vdupq_n_f16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_n (float32_t __a)
-{
- return __arm_vdupq_n_f32 (__a);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvttq_f32 (float16x8_t __a)
@@ -16124,13 +15832,6 @@ __arm_vcvtq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vcvtq_m_s16_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (float16x8_t __inactive, float16_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_f16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpselq (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -16250,13 +15951,6 @@ __arm_vcvtq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
  return __arm_vcvtq_m_s32_f32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdupq_m (float32x4_t __inactive, float32_t __a, mve_pred16_t __p)
-{
- return __arm_vdupq_m_n_f32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpselq (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -17337,11 +17031,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
   int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
 
-#define __arm_vdupq_n(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vdupq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vdupq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t)));})
-
 #define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
@@ -17814,18 +17503,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtpq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtpq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vdupq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), (int8_t) __p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), (int16_t) __p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), (int32_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), (uint8_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), (uint16_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), (uint32_t) __p1, p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vdupq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), (float16_t) __p1, p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vdupq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), (float32_t) __p1, p2));})
-
 #define __arm_vfmaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -18713,16 +18390,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmlsdhxq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmlsdhxq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vdupq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), (int8_t) __p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), (int16_t) __p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), (int32_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), (uint8_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), (uint16_t) __p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vdupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), (uint32_t) __p1, p2));})
-
 #define __arm_vmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 09/20] arm: [MVE intrinsics] factorize vaddvq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (6 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 08/20] arm: [MVE intrinsics] rework vdupq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 10/20] arm: [MVE intrinsics] add unary_int32 shape Christophe Lyon
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vaddvq builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vaddv.
	* config/arm/mve.md (@mve_vaddvq_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vaddvq_p_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
	* config/arm/vec-common.md: Use gen_mve_q instead of
	gen_mve_vaddvq.
---
 gcc/config/arm/iterators.md  | 2 ++
 gcc/config/arm/mve.md        | 8 ++++----
 gcc/config/arm/vec-common.md | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index aff4e7fb814..46c7ddeda67 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -762,6 +762,8 @@ (define_int_attr mve_insn [
 		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd") (VADDQ_M_N_F "vadd")
 		 (VADDQ_M_S "vadd") (VADDQ_M_U "vadd") (VADDQ_M_F "vadd")
 		 (VADDQ_N_S "vadd") (VADDQ_N_U "vadd") (VADDQ_N_F "vadd")
+		 (VADDVQ_P_S "vaddv") (VADDVQ_P_U "vaddv")
+		 (VADDVQ_S "vaddv") (VADDVQ_U "vaddv")
 		 (VANDQ_M_S "vand") (VANDQ_M_U "vand") (VANDQ_M_F "vand")
 		 (VBICQ_M_N_S "vbic") (VBICQ_M_N_U "vbic")
 		 (VBICQ_M_S "vbic") (VBICQ_M_U "vbic") (VBICQ_M_F "vbic")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 0c4e4e60bc4..d772f4d4380 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -360,14 +360,14 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
 ;;
 ;; [vaddvq_s, vaddvq_u])
 ;;
-(define_insn "@mve_vaddvq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")]
 	 VADDVQ))
   ]
   "TARGET_HAVE_MVE"
-  "vaddv.<supf>%#<V_sz_elem>\t%0, %q1"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -773,7 +773,7 @@ (define_insn "mve_vaddvaq_<supf><mode>"
 ;;
 ;; [vaddvq_p_u, vaddvq_p_s])
 ;;
-(define_insn "mve_vaddvq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w")
@@ -781,7 +781,7 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
 	 VADDVQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vaddvt.<supf>%#<V_sz_elem>	%0, %q1"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q1"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 6183c931e36..9af8429968d 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -559,7 +559,7 @@ (define_expand "reduc_plus_scal_<mode>"
       /* vaddv generates a 32 bits accumulator.  */
       rtx op0 = gen_reg_rtx (SImode);
 
-      emit_insn (gen_mve_vaddvq (VADDVQ_S, <MODE>mode, op0, operands[1]));
+      emit_insn (gen_mve_q (VADDVQ_S, VADDVQ_S, <MODE>mode, op0, operands[1]));
       emit_move_insn (operands[0], gen_lowpart (<V_elem>mode, op0));
     }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 10/20] arm: [MVE intrinsics] add unary_int32 shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (7 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 09/20] arm: [MVE intrinsics] factorize vaddvq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 11/20] arm: [MVE intrinsics] rework vaddvq Christophe Lyon
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_int32 shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_int32): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_int32): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 27 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index c78683aaba2..0bd91b24147 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1094,6 +1094,33 @@ struct unary_convert_def : public overloaded_base<1>
 };
 SHAPE (unary_convert)
 
+/* [u]int32_t vfoo[_<t0>](<T0>_t)
+
+   i.e. a version of "unary" which generates a scalar of type int32_t
+   or uint32_t depending on the signedness of the elements of of input
+   vector.
+
+   Example: vaddvq
+   int32_t [__arm_]vaddvq[_s16](int16x8_t a)
+   int32_t [__arm_]vaddvq_p[_s16](int16x8_t a, mve_pred16_t p)  */
+struct unary_int32_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx32,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (unary_int32)
+
 /* <T0>_t vfoo[_n]_t0(<S0>_t)
 
    Example: vdupq.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index a35faec2542..f422550559e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -54,6 +54,7 @@ namespace arm_mve
     extern const function_shape *const inherent;
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
+    extern const function_shape *const unary_int32;
     extern const function_shape *const unary_n;
 
   } /* end namespace arm_mve::shapes */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 11/20] arm: [MVE intrinsics] rework vaddvq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (8 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 10/20] arm: [MVE intrinsics] add unary_int32 shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 12/20] arm: [MVE intrinsics] factorize vaddvaq Christophe Lyon
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vaddvq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vaddvq): New.
	* config/arm/arm-mve-builtins-base.def (vaddvq): New.
	* config/arm/arm-mve-builtins-base.h (vaddvq): New.
	* config/arm/arm_mve.h (vaddvq): Remove.
	(vaddvq_p): Remove.
	(vaddvq_s8): Remove.
	(vaddvq_s16): Remove.
	(vaddvq_s32): Remove.
	(vaddvq_u8): Remove.
	(vaddvq_u16): Remove.
	(vaddvq_u32): Remove.
	(vaddvq_p_u8): Remove.
	(vaddvq_p_s8): Remove.
	(vaddvq_p_u16): Remove.
	(vaddvq_p_s16): Remove.
	(vaddvq_p_u32): Remove.
	(vaddvq_p_s32): Remove.
	(__arm_vaddvq_s8): Remove.
	(__arm_vaddvq_s16): Remove.
	(__arm_vaddvq_s32): Remove.
	(__arm_vaddvq_u8): Remove.
	(__arm_vaddvq_u16): Remove.
	(__arm_vaddvq_u32): Remove.
	(__arm_vaddvq_p_u8): Remove.
	(__arm_vaddvq_p_s8): Remove.
	(__arm_vaddvq_p_u16): Remove.
	(__arm_vaddvq_p_s16): Remove.
	(__arm_vaddvq_p_u32): Remove.
	(__arm_vaddvq_p_s32): Remove.
	(__arm_vaddvq): Remove.
	(__arm_vaddvq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 200 -----------------------
 4 files changed, 3 insertions(+), 200 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index cb572130c2b..7f90fc65ae2 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -244,6 +244,7 @@ namespace arm_mve {
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
+FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
 FUNCTION_WITHOUT_N_NO_U_F (vclsq, VCLSQ)
 FUNCTION (vclzq, unspec_based_mve_function_exact_insn, (CLZ, CLZ, CLZ, -1, -1, -1, VCLZQ_M_S, VCLZQ_M_U, -1, -1, -1 ,-1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 30e6aa1e1e6..d32745f334a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -21,6 +21,7 @@
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vaddvq, unary_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vclsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vclzq, unary, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 3dc9114045f..9080542e7e3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -26,6 +26,7 @@ namespace functions {
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
 extern const function_base *const vaddq;
+extern const function_base *const vaddvq;
 extern const function_base *const vandq;
 extern const function_base *const vclsq;
 extern const function_base *const vclzq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index c3d18e4cc6f..11f1033deb9 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -43,7 +43,6 @@
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
 #define vaddlvq(__a) __arm_vaddlvq(__a)
-#define vaddvq(__a) __arm_vaddvq(__a)
 #define vmovlbq(__a) __arm_vmovlbq(__a)
 #define vmovltq(__a) __arm_vmovltq(__a)
 #define vmvnq(__a) __arm_vmvnq(__a)
@@ -55,7 +54,6 @@
 #define vcaddq_rot90(__a, __b) __arm_vcaddq_rot90(__a, __b)
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vaddvq_p(__a, __p) __arm_vaddvq_p(__a, __p)
 #define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
 #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
@@ -329,9 +327,6 @@
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
 #define vaddlvq_s32(__a) __arm_vaddlvq_s32(__a)
-#define vaddvq_s8(__a) __arm_vaddvq_s8(__a)
-#define vaddvq_s16(__a) __arm_vaddvq_s16(__a)
-#define vaddvq_s32(__a) __arm_vaddvq_s32(__a)
 #define vmovlbq_s8(__a) __arm_vmovlbq_s8(__a)
 #define vmovlbq_s16(__a) __arm_vmovlbq_s16(__a)
 #define vmovltq_s8(__a) __arm_vmovltq_s8(__a)
@@ -354,9 +349,6 @@
 #define vmvnq_u8(__a) __arm_vmvnq_u8(__a)
 #define vmvnq_u16(__a) __arm_vmvnq_u16(__a)
 #define vmvnq_u32(__a) __arm_vmvnq_u32(__a)
-#define vaddvq_u8(__a) __arm_vaddvq_u8(__a)
-#define vaddvq_u16(__a) __arm_vaddvq_u16(__a)
-#define vaddvq_u32(__a) __arm_vaddvq_u32(__a)
 #define vmovltq_u8(__a) __arm_vmovltq_u8(__a)
 #define vmovltq_u16(__a) __arm_vmovltq_u16(__a)
 #define vmovlbq_u8(__a) __arm_vmovlbq_u8(__a)
@@ -398,11 +390,9 @@
 #define vcaddq_rot90_u8(__a, __b) __arm_vcaddq_rot90_u8(__a, __b)
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
-#define vaddvq_p_u8(__a, __p) __arm_vaddvq_p_u8(__a, __p)
 #define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
 #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
-#define vaddvq_p_s8(__a, __p) __arm_vaddvq_p_s8(__a, __p)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
@@ -424,11 +414,9 @@
 #define vcaddq_rot90_u16(__a, __b) __arm_vcaddq_rot90_u16(__a, __b)
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
-#define vaddvq_p_u16(__a, __p) __arm_vaddvq_p_u16(__a, __p)
 #define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
 #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
-#define vaddvq_p_s16(__a, __p) __arm_vaddvq_p_s16(__a, __p)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
@@ -450,11 +438,9 @@
 #define vcaddq_rot90_u32(__a, __b) __arm_vcaddq_rot90_u32(__a, __b)
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
-#define vaddvq_p_u32(__a, __p) __arm_vaddvq_p_u32(__a, __p)
 #define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
 #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
-#define vaddvq_p_s32(__a, __p) __arm_vaddvq_p_s32(__a, __p)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
@@ -1534,27 +1520,6 @@ __arm_vaddlvq_s32 (int32x4_t __a)
   return __builtin_mve_vaddlvq_sv4si (__a);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vaddvq_sv16qi (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vaddvq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_s32 (int32x4_t __a)
-{
-  return __builtin_mve_vaddvq_sv4si (__a);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovlbq_s8 (int8x16_t __a)
@@ -1639,27 +1604,6 @@ __arm_vmvnq_u32 (uint32x4_t __a)
   return __builtin_mve_vmvnq_uv4si (__a);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vaddvq_uv16qi (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vaddvq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_u32 (uint32x4_t __a)
-{
-  return __builtin_mve_vaddvq_uv4si (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovltq_u8 (uint8x16_t __a)
@@ -1809,13 +1753,6 @@ __arm_vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vbicq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_uv16qi (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_u8 (uint32_t __a, uint8x16_t __b)
@@ -1837,13 +1774,6 @@ __arm_vqshluq_n_s8 (int8x16_t __a, const int __imm)
   return __builtin_mve_vqshluq_n_sv16qi (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_sv16qi (__a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
@@ -1993,13 +1923,6 @@ __arm_vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vbicq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_uv8hi (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_u16 (uint32_t __a, uint16x8_t __b)
@@ -2021,13 +1944,6 @@ __arm_vqshluq_n_s16 (int16x8_t __a, const int __imm)
   return __builtin_mve_vqshluq_n_sv8hi (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_sv8hi (__a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
@@ -2177,13 +2093,6 @@ __arm_vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vbicq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_uv4si (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq_u32 (uint32_t __a, uint32x4_t __b)
@@ -2205,13 +2114,6 @@ __arm_vqshluq_n_s32 (int32x4_t __a, const int __imm)
   return __builtin_mve_vqshluq_n_sv4si (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvq_p_sv4si (__a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
@@ -9789,27 +9691,6 @@ __arm_vaddlvq (int32x4_t __a)
  return __arm_vaddlvq_s32 (__a);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (int8x16_t __a)
-{
- return __arm_vaddvq_s8 (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (int16x8_t __a)
-{
- return __arm_vaddvq_s16 (__a);
-}
-
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (int32x4_t __a)
-{
- return __arm_vaddvq_s32 (__a);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovlbq (int8x16_t __a)
@@ -9880,27 +9761,6 @@ __arm_vmvnq (uint32x4_t __a)
  return __arm_vmvnq_u32 (__a);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (uint8x16_t __a)
-{
- return __arm_vaddvq_u8 (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (uint16x8_t __a)
-{
- return __arm_vaddvq_u16 (__a);
-}
-
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq (uint32x4_t __a)
-{
- return __arm_vaddvq_u32 (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovltq (uint8x16_t __a)
@@ -9999,13 +9859,6 @@ __arm_vbicq (uint8x16_t __a, uint8x16_t __b)
  return __arm_vbicq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_u8 (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq (uint32_t __a, uint8x16_t __b)
@@ -10027,13 +9880,6 @@ __arm_vqshluq (int8x16_t __a, const int __imm)
  return __arm_vqshluq_n_s8 (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_s8 (__a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int8x16_t __a, int8x16_t __b)
@@ -10181,13 +10027,6 @@ __arm_vbicq (uint16x8_t __a, uint16x8_t __b)
  return __arm_vbicq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_u16 (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq (uint32_t __a, uint16x8_t __b)
@@ -10209,13 +10048,6 @@ __arm_vqshluq (int16x8_t __a, const int __imm)
  return __arm_vqshluq_n_s16 (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_s16 (__a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int16x8_t __a, int16x8_t __b)
@@ -10363,13 +10195,6 @@ __arm_vbicq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vbicq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_u32 (__a, __p);
-}
-
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vaddvaq (uint32_t __a, uint32x4_t __b)
@@ -10391,13 +10216,6 @@ __arm_vqshluq (int32x4_t __a, const int __imm)
  return __arm_vqshluq_n_s32 (__a, __imm);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvq_p (int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vaddvq_p_s32 (__a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int32x4_t __a, int32x4_t __b)
@@ -19606,24 +19424,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vaddvaq_p_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddvaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vaddvq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vaddvq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vaddvq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddvq_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vaddvq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vaddvq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddvq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
-#define __arm_vaddvq_p(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vaddvq_p_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vaddvq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddvq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vaddvq_p_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vaddvq_p_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddvq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 12/20] arm: [MVE intrinsics] factorize vaddvaq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (9 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 11/20] arm: [MVE intrinsics] rework vaddvq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 13/20] arm: [MVE intrinsics] add unary_int32_acc shape Christophe Lyon
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vaddvaq builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vaddva.
	* config/arm/mve.md (mve_vaddvaq_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vaddvaq_p_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_p_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 46c7ddeda67..00123c0a376 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -762,6 +762,8 @@ (define_int_attr mve_insn [
 		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd") (VADDQ_M_N_F "vadd")
 		 (VADDQ_M_S "vadd") (VADDQ_M_U "vadd") (VADDQ_M_F "vadd")
 		 (VADDQ_N_S "vadd") (VADDQ_N_U "vadd") (VADDQ_N_F "vadd")
+		 (VADDVAQ_P_S "vaddva") (VADDVAQ_P_U "vaddva")
+		 (VADDVAQ_S "vaddva") (VADDVAQ_U "vaddva")
 		 (VADDVQ_P_S "vaddv") (VADDVQ_P_U "vaddv")
 		 (VADDVQ_S "vaddv") (VADDVQ_U "vaddv")
 		 (VANDQ_M_S "vand") (VANDQ_M_U "vand") (VANDQ_M_F "vand")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index d772f4d4380..1ccbce3c89c 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -758,7 +758,7 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ;;
 ;; [vaddvaq_s, vaddvaq_u])
 ;;
-(define_insn "mve_vaddvaq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
@@ -766,7 +766,7 @@ (define_insn "mve_vaddvaq_<supf><mode>"
 	 VADDVAQ))
   ]
   "TARGET_HAVE_MVE"
-  "vaddva.<supf>%#<V_sz_elem>\t%0, %q2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%0, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1944,7 +1944,7 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vaddvaq_p_u, vaddvaq_p_s])
 ;;
-(define_insn "mve_vaddvaq_p_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_p_<supf><mode>"
   [
    (set (match_operand:SI 0 "s_register_operand" "=Te")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "0")
@@ -1953,7 +1953,7 @@ (define_insn "mve_vaddvaq_p_<supf><mode>"
 	 VADDVAQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vaddvat.<supf>%#<V_sz_elem>	%0, %q2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 13/20] arm: [MVE intrinsics] add unary_int32_acc shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (10 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 12/20] arm: [MVE intrinsics] factorize vaddvaq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 14/20] arm: [MVE intrinsics] rework vaddvaq Christophe Lyon
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_int32_acc shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_int32_acc): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_int32_acc): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 34 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 35 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 0bd91b24147..bff1c3e843b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1121,6 +1121,40 @@ struct unary_int32_def : public overloaded_base<0>
 };
 SHAPE (unary_int32)
 
+/* [u]int32_t vfoo[_<t0>]([u]int32_t, <T0>_t)
+
+   i.e. a version of "unary" which accumulates into scalar of type
+   int32_t or uint32_t depending on the signedness of the elements of
+   of input vector.
+
+   Example: vaddvaq.
+   int32_t [__arm_]vaddvaq[_s16](int32_t a, int16x8_t b)
+   int32_t [__arm_]vaddvaq_p[_s16](int32_t a, int16x8_t b, mve_pred16_t p)  */
+struct unary_int32_acc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sx32,sx32,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| !r.require_integer_immediate (0)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_int32_acc)
+
 /* <T0>_t vfoo[_n]_t0(<S0>_t)
 
    Example: vdupq.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index f422550559e..fc1bacbd4da 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -55,6 +55,7 @@ namespace arm_mve
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
     extern const function_shape *const unary_int32;
+    extern const function_shape *const unary_int32_acc;
     extern const function_shape *const unary_n;
 
   } /* end namespace arm_mve::shapes */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 14/20] arm: [MVE intrinsics] rework vaddvaq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (11 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 13/20] arm: [MVE intrinsics] add unary_int32_acc shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape Christophe Lyon
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vaddvaq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vaddvaq): New.
	* config/arm/arm-mve-builtins-base.def (vaddvaq): New.
	* config/arm/arm-mve-builtins-base.h (vaddvaq): New.
	* config/arm/arm_mve.h (vaddvaq): Remove.
	(vaddvaq_p): Remove.
	(vaddvaq_u8): Remove.
	(vaddvaq_s8): Remove.
	(vaddvaq_u16): Remove.
	(vaddvaq_s16): Remove.
	(vaddvaq_u32): Remove.
	(vaddvaq_s32): Remove.
	(vaddvaq_p_u8): Remove.
	(vaddvaq_p_s8): Remove.
	(vaddvaq_p_u16): Remove.
	(vaddvaq_p_s16): Remove.
	(vaddvaq_p_u32): Remove.
	(vaddvaq_p_s32): Remove.
	(__arm_vaddvaq_u8): Remove.
	(__arm_vaddvaq_s8): Remove.
	(__arm_vaddvaq_u16): Remove.
	(__arm_vaddvaq_s16): Remove.
	(__arm_vaddvaq_u32): Remove.
	(__arm_vaddvaq_s32): Remove.
	(__arm_vaddvaq_p_u8): Remove.
	(__arm_vaddvaq_p_s8): Remove.
	(__arm_vaddvaq_p_u16): Remove.
	(__arm_vaddvaq_p_s16): Remove.
	(__arm_vaddvaq_p_u32): Remove.
	(__arm_vaddvaq_p_s32): Remove.
	(__arm_vaddvaq): Remove.
	(__arm_vaddvaq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 202 -----------------------
 4 files changed, 3 insertions(+), 202 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 7f90fc65ae2..e87069b0467 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -245,6 +245,7 @@ FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
+FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
 FUNCTION_WITHOUT_N_NO_U_F (vclsq, VCLSQ)
 FUNCTION (vclzq, unspec_based_mve_function_exact_insn, (CLZ, CLZ, CLZ, -1, -1, -1, VCLZQ_M_S, VCLZQ_M_U, -1, -1, -1 ,-1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index d32745f334a..413fe4a1ef0 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -21,6 +21,7 @@
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vaddvaq, unary_int32_acc, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vaddvq, unary_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vclsq, unary, all_signed, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 9080542e7e3..5338b777444 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -26,6 +26,7 @@ namespace functions {
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
 extern const function_base *const vaddq;
+extern const function_base *const vaddvaq;
 extern const function_base *const vaddvq;
 extern const function_base *const vandq;
 extern const function_base *const vclsq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 11f1033deb9..74783570561 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -54,7 +54,6 @@
 #define vcaddq_rot90(__a, __b) __arm_vcaddq_rot90(__a, __b)
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vaddvaq(__a, __b) __arm_vaddvaq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
 #define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
 #define vmlsdavxq(__a, __b) __arm_vmlsdavxq(__a, __b)
@@ -89,7 +88,6 @@
 #define vmlaq(__a, __b, __c) __arm_vmlaq(__a, __b, __c)
 #define vmladavq_p(__a, __b, __p) __arm_vmladavq_p(__a, __b, __p)
 #define vmladavaq(__a, __b, __c) __arm_vmladavaq(__a, __b, __c)
-#define vaddvaq_p(__a, __b, __p) __arm_vaddvaq_p(__a, __b, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vmlsdavxq_p(__a, __b, __p) __arm_vmlsdavxq_p(__a, __b, __p)
@@ -390,7 +388,6 @@
 #define vcaddq_rot90_u8(__a, __b) __arm_vcaddq_rot90_u8(__a, __b)
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
-#define vaddvaq_u8(__a, __b) __arm_vaddvaq_u8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
 #define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
@@ -406,7 +403,6 @@
 #define vcaddq_rot270_s8(__a, __b) __arm_vcaddq_rot270_s8(__a, __b)
 #define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
-#define vaddvaq_s8(__a, __b) __arm_vaddvaq_s8(__a, __b)
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
 #define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
@@ -414,7 +410,6 @@
 #define vcaddq_rot90_u16(__a, __b) __arm_vcaddq_rot90_u16(__a, __b)
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
-#define vaddvaq_u16(__a, __b) __arm_vaddvaq_u16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
 #define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
@@ -430,7 +425,6 @@
 #define vcaddq_rot270_s16(__a, __b) __arm_vcaddq_rot270_s16(__a, __b)
 #define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
-#define vaddvaq_s16(__a, __b) __arm_vaddvaq_s16(__a, __b)
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
 #define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
@@ -438,7 +432,6 @@
 #define vcaddq_rot90_u32(__a, __b) __arm_vcaddq_rot90_u32(__a, __b)
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
-#define vaddvaq_u32(__a, __b) __arm_vaddvaq_u32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
 #define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
@@ -454,7 +447,6 @@
 #define vcaddq_rot270_s32(__a, __b) __arm_vcaddq_rot270_s32(__a, __b)
 #define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
-#define vaddvaq_s32(__a, __b) __arm_vaddvaq_s32(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
 #define vmlaldavq_u16(__a, __b) __arm_vmlaldavq_u16(__a, __b)
@@ -543,7 +535,6 @@
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
 #define vmladavq_p_u8(__a, __b, __p) __arm_vmladavq_p_u8(__a, __b, __p)
 #define vmladavaq_u8(__a, __b, __c) __arm_vmladavaq_u8(__a, __b, __c)
-#define vaddvaq_p_u8(__a, __b, __p) __arm_vaddvaq_p_u8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
@@ -551,7 +542,6 @@
 #define vmlsdavq_p_s8(__a, __b, __p) __arm_vmlsdavq_p_s8(__a, __b, __p)
 #define vmladavxq_p_s8(__a, __b, __p) __arm_vmladavxq_p_s8(__a, __b, __p)
 #define vmladavq_p_s8(__a, __b, __p) __arm_vmladavq_p_s8(__a, __b, __p)
-#define vaddvaq_p_s8(__a, __b, __p) __arm_vaddvaq_p_s8(__a, __b, __p)
 #define vqrdmlsdhxq_s8(__inactive, __a, __b) __arm_vqrdmlsdhxq_s8(__inactive, __a, __b)
 #define vqrdmlsdhq_s8(__inactive, __a, __b) __arm_vqrdmlsdhq_s8(__inactive, __a, __b)
 #define vqrdmlashq_n_s8(__a, __b, __c) __arm_vqrdmlashq_n_s8(__a, __b, __c)
@@ -579,7 +569,6 @@
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
 #define vmladavq_p_u16(__a, __b, __p) __arm_vmladavq_p_u16(__a, __b, __p)
 #define vmladavaq_u16(__a, __b, __c) __arm_vmladavaq_u16(__a, __b, __c)
-#define vaddvaq_p_u16(__a, __b, __p) __arm_vaddvaq_p_u16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
@@ -587,7 +576,6 @@
 #define vmlsdavq_p_s16(__a, __b, __p) __arm_vmlsdavq_p_s16(__a, __b, __p)
 #define vmladavxq_p_s16(__a, __b, __p) __arm_vmladavxq_p_s16(__a, __b, __p)
 #define vmladavq_p_s16(__a, __b, __p) __arm_vmladavq_p_s16(__a, __b, __p)
-#define vaddvaq_p_s16(__a, __b, __p) __arm_vaddvaq_p_s16(__a, __b, __p)
 #define vqrdmlsdhxq_s16(__inactive, __a, __b) __arm_vqrdmlsdhxq_s16(__inactive, __a, __b)
 #define vqrdmlsdhq_s16(__inactive, __a, __b) __arm_vqrdmlsdhq_s16(__inactive, __a, __b)
 #define vqrdmlashq_n_s16(__a, __b, __c) __arm_vqrdmlashq_n_s16(__a, __b, __c)
@@ -615,7 +603,6 @@
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
 #define vmladavq_p_u32(__a, __b, __p) __arm_vmladavq_p_u32(__a, __b, __p)
 #define vmladavaq_u32(__a, __b, __c) __arm_vmladavaq_u32(__a, __b, __c)
-#define vaddvaq_p_u32(__a, __b, __p) __arm_vaddvaq_p_u32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
@@ -623,7 +610,6 @@
 #define vmlsdavq_p_s32(__a, __b, __p) __arm_vmlsdavq_p_s32(__a, __b, __p)
 #define vmladavxq_p_s32(__a, __b, __p) __arm_vmladavxq_p_s32(__a, __b, __p)
 #define vmladavq_p_s32(__a, __b, __p) __arm_vmladavq_p_s32(__a, __b, __p)
-#define vaddvaq_p_s32(__a, __b, __p) __arm_vaddvaq_p_s32(__a, __b, __p)
 #define vqrdmlsdhxq_s32(__inactive, __a, __b) __arm_vqrdmlsdhxq_s32(__inactive, __a, __b)
 #define vqrdmlsdhq_s32(__inactive, __a, __b) __arm_vqrdmlsdhq_s32(__inactive, __a, __b)
 #define vqrdmlashq_n_s32(__a, __b, __c) __arm_vqrdmlashq_n_s32(__a, __b, __c)
@@ -1753,13 +1739,6 @@ __arm_vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vbicq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_u8 (uint32_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vaddvaq_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
@@ -1865,13 +1844,6 @@ __arm_vbicq_s8 (int8x16_t __a, int8x16_t __b)
   return __builtin_mve_vbicq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_s8 (int32_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vaddvaq_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -1923,13 +1895,6 @@ __arm_vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vbicq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_u16 (uint32_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vaddvaq_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
@@ -2035,13 +2000,6 @@ __arm_vbicq_s16 (int16x8_t __a, int16x8_t __b)
   return __builtin_mve_vbicq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_s16 (int32_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vaddvaq_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -2093,13 +2051,6 @@ __arm_vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vbicq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_u32 (uint32_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vaddvaq_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
@@ -2205,13 +2156,6 @@ __arm_vbicq_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vbicq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_s32 (int32_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vaddvaq_sv4si (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmulltq_poly_p8 (uint8x16_t __a, uint8x16_t __b)
@@ -2658,13 +2602,6 @@ __arm_vmladavaq_u8 (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
   return __builtin_mve_vmladavaq_uv16qi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_u8 (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_uv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -2714,13 +2651,6 @@ __arm_vmladavq_p_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_s8 (int32_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
@@ -2910,13 +2840,6 @@ __arm_vmladavaq_u16 (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
   return __builtin_mve_vmladavaq_uv8hi (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_u16 (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_uv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -2966,13 +2889,6 @@ __arm_vmladavq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_s16 (int32_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
@@ -3162,13 +3078,6 @@ __arm_vmladavaq_u32 (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
   return __builtin_mve_vmladavaq_uv4si (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_u32 (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_uv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -3218,13 +3127,6 @@ __arm_vmladavq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmladavq_p_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p_s32 (int32_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddvaq_p_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
@@ -9859,13 +9761,6 @@ __arm_vbicq (uint8x16_t __a, uint8x16_t __b)
  return __arm_vbicq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (uint32_t __a, uint8x16_t __b)
-{
- return __arm_vaddvaq_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq (uint8x16_t __a, int32_t __b)
@@ -9971,13 +9866,6 @@ __arm_vbicq (int8x16_t __a, int8x16_t __b)
  return __arm_vbicq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (int32_t __a, int8x16_t __b)
-{
- return __arm_vaddvaq_s8 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint16x8_t __a, uint16x8_t __b)
@@ -10027,13 +9915,6 @@ __arm_vbicq (uint16x8_t __a, uint16x8_t __b)
  return __arm_vbicq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (uint32_t __a, uint16x8_t __b)
-{
- return __arm_vaddvaq_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq (uint16x8_t __a, int32_t __b)
@@ -10139,13 +10020,6 @@ __arm_vbicq (int16x8_t __a, int16x8_t __b)
  return __arm_vbicq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (int32_t __a, int16x8_t __b)
-{
- return __arm_vaddvaq_s16 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint32x4_t __a, uint32x4_t __b)
@@ -10195,13 +10069,6 @@ __arm_vbicq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vbicq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (uint32_t __a, uint32x4_t __b)
-{
- return __arm_vaddvaq_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbrsrq (uint32x4_t __a, int32_t __b)
@@ -10307,13 +10174,6 @@ __arm_vbicq (int32x4_t __a, int32x4_t __b)
  return __arm_vbicq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq (int32_t __a, int32x4_t __b)
-{
- return __arm_vaddvaq_s32 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmulltq_poly (uint8x16_t __a, uint8x16_t __b)
@@ -10720,13 +10580,6 @@ __arm_vmladavaq (uint32_t __a, uint8x16_t __b, uint8x16_t __c)
  return __arm_vmladavaq_u8 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (uint32_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_u8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -10776,13 +10629,6 @@ __arm_vmladavq_p (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (int32_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int8x16_t __inactive, int8x16_t __a, int8x16_t __b)
@@ -10972,13 +10818,6 @@ __arm_vmladavaq (uint32_t __a, uint16x8_t __b, uint16x8_t __c)
  return __arm_vmladavaq_u16 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (uint32_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_u16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -11028,13 +10867,6 @@ __arm_vmladavq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (int32_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int16x8_t __inactive, int16x8_t __a, int16x8_t __b)
@@ -11224,13 +11056,6 @@ __arm_vmladavaq (uint32_t __a, uint32x4_t __b, uint32x4_t __c)
  return __arm_vmladavaq_u32 (__a, __b, __c);
 }
 
-__extension__ extern __inline uint32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (uint32_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -11280,13 +11105,6 @@ __arm_vmladavq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vmladavq_p_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddvaq_p (int32_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vaddvaq_p_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhxq (int32x4_t __inactive, int32x4_t __a, int32x4_t __b)
@@ -19404,26 +19222,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddlvq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddlvq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
 
-#define __arm_vaddvaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vaddvaq_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vaddvaq_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vaddvaq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vaddvaq_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vaddvaq_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddvaq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vaddvaq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vaddvaq_p_s8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vaddvaq_p_s16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vaddvaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vaddvaq_p_u8 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vaddvaq_p_u16 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddvaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (12 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 14/20] arm: [MVE intrinsics] rework vaddvaq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 14:52   ` Kyrylo Tkachov
  2023-05-10 13:30 ` [PATCH 16/20] arm: [MVE intrinsics] factorize vaddlvq Christophe Lyon
                   ` (5 subsequent siblings)
  19 siblings, 1 reply; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_acc shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 29 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index bff1c3e843b..e77a0cc20ac 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
 };
 SHAPE (unary)
 
+/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
+
+   i.e. a version of "unary" in which the source elements are half the
+   size of the destination scalar, but have the same type class.
+
+   Example: vaddlvq.
+   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
+   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
+struct unary_acc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    /* FIXME: check that the return value is actually
+       twice as wide as arg 0.  */
+    return r.resolve_unary ();
+  }
+};
+SHAPE (unary_acc)
+
 /* <T0>_t foo_t0[_t1](<T1>_t)
 
    where the target type <t0> must be specified explicitly but the source
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index fc1bacbd4da..c062fe624c4 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -53,6 +53,7 @@ namespace arm_mve
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const unary;
+    extern const function_shape *const unary_acc;
     extern const function_shape *const unary_convert;
     extern const function_shape *const unary_int32;
     extern const function_shape *const unary_int32_acc;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 16/20] arm: [MVE intrinsics] factorize vaddlvq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (13 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 17/20] arm: [MVE intrinsics] rework vaddlvq Christophe Lyon
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vaddlvq builtins so that they use parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vaddlv.
	* config/arm/mve.md (mve_vaddlvq_<supf>v4si): Rename into ...
	(@mve_<mve_insn>q_<supf>v4si): ... this.
	(mve_vaddlvq_p_<supf>v4si): Rename into ...
	(@mve_<mve_insn>q_p_<supf>v4si): ... this.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 00123c0a376..84dd97249f9 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -759,6 +759,8 @@ (define_int_attr mve_insn [
 		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 		 (VABSQ_M_F "vabs")
 		 (VABSQ_M_S "vabs")
+		 (VADDLVQ_P_S "vaddlv") (VADDLVQ_P_U "vaddlv")
+		 (VADDLVQ_S "vaddlv") (VADDLVQ_U "vaddlv")
 		 (VADDQ_M_N_S "vadd") (VADDQ_M_N_U "vadd") (VADDQ_M_N_F "vadd")
 		 (VADDQ_M_S "vadd") (VADDQ_M_U "vadd") (VADDQ_M_F "vadd")
 		 (VADDQ_N_S "vadd") (VADDQ_N_U "vadd") (VADDQ_N_F "vadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 1ccbce3c89c..c5373fef9a2 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -500,14 +500,14 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
 ;;
 ;; [vaddlvq_s vaddlvq_u])
 ;;
-(define_insn "mve_vaddlvq_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")]
 	 VADDLVQ))
   ]
   "TARGET_HAVE_MVE"
-  "vaddlv.<supf>32\t%Q0, %R0, %q1"
+  "<mve_insn>.<supf>32\t%Q0, %R0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -666,7 +666,7 @@ (define_insn "mve_vcvtq_n_from_f_<supf><mode>"
 ;;
 ;; [vaddlvq_p_s])
 ;;
-(define_insn "mve_vaddlvq_p_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_p_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w")
@@ -674,7 +674,7 @@ (define_insn "mve_vaddlvq_p_<supf>v4si"
 	 VADDLVQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vaddlvt.<supf>32\t%Q0, %R0, %q1"
+  "vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q1"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 17/20] arm: [MVE intrinsics] rework vaddlvq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (14 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 16/20] arm: [MVE intrinsics] factorize vaddlvq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 18/20] arm: [MVE intrinsics] factorize vmovlbq vmovltq Christophe Lyon
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vaddlvq using the new MVE builtins framework.

Since we kept v4si hardcoded in the builtin name, we need to
special-case it in unspec_mve_function_exact_insn_pred_p.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vaddlvq): New.
	* config/arm/arm-mve-builtins-base.def (vaddlvq): New.
	* config/arm/arm-mve-builtins-base.h (vaddlvq): New.
	* config/arm/arm-mve-builtins-functions.h
	(unspec_mve_function_exact_insn_pred_p): Handle vaddlvq.
	* config/arm/arm_mve.h (vaddlvq): Remove.
	(vaddlvq_p): Remove.
	(vaddlvq_s32): Remove.
	(vaddlvq_u32): Remove.
	(vaddlvq_p_s32): Remove.
	(vaddlvq_p_u32): Remove.
	(__arm_vaddlvq_s32): Remove.
	(__arm_vaddlvq_u32): Remove.
	(__arm_vaddlvq_p_s32): Remove.
	(__arm_vaddlvq_p_u32): Remove.
	(__arm_vaddlvq): Remove.
	(__arm_vaddlvq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc     |  1 +
 gcc/config/arm/arm-mve-builtins-base.def    |  1 +
 gcc/config/arm/arm-mve-builtins-base.h      |  1 +
 gcc/config/arm/arm-mve-builtins-functions.h | 69 ++++++++++++++------
 gcc/config/arm/arm_mve.h                    | 72 ---------------------
 5 files changed, 51 insertions(+), 93 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index e87069b0467..fdc0ff50b96 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -244,6 +244,7 @@ namespace arm_mve {
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
+FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 413fe4a1ef0..dcfb426a7fb 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -20,6 +20,7 @@
 #define REQUIRES_FLOAT false
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vaddlvq, unary_acc, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vaddvaq, unary_int32_acc, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vaddvq, unary_int32, all_integer, p_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 5338b777444..5de70d5e1d4 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -25,6 +25,7 @@ namespace functions {
 
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
+extern const function_base *const vaddlvq;
 extern const function_base *const vaddq;
 extern const function_base *const vaddvaq;
 extern const function_base *const vaddvq;
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index d069990dcab..ea926e42b81 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -408,32 +408,59 @@ public:
   expand (function_expander &e) const override
   {
     insn_code code;
-    switch (e.pred)
+
+    if ((m_unspec_for_sint == VADDLVQ_S)
+	|| m_unspec_for_sint == VADDLVAQ_S)
       {
-      case PRED_none:
-	if (e.type_suffix (0).integer_p)
-	  if (e.type_suffix (0).unsigned_p)
-	    code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	  else
-	    code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
-	else
-	  code = code_for_mve_q_f (m_unspec_for_fp, e.vector_mode (0));
+	switch (e.pred)
+	  {
+	  case PRED_none:
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_v4si (m_unspec_for_uint, m_unspec_for_uint);
+	    else
+	      code = code_for_mve_q_v4si (m_unspec_for_sint, m_unspec_for_sint);
+	    return e.use_exact_insn (code);
 
-	return e.use_exact_insn (code);
+	  case PRED_p:
+	    if (e.type_suffix (0).unsigned_p)
+	      code = code_for_mve_q_p_v4si (m_unspec_for_p_uint, m_unspec_for_p_uint);
+	    else
+	      code = code_for_mve_q_p_v4si (m_unspec_for_p_sint, m_unspec_for_p_sint);
+	    return e.use_exact_insn (code);
 
-      case PRED_p:
-	if (e.type_suffix (0).integer_p)
-	  if (e.type_suffix (0).unsigned_p)
-	    code = code_for_mve_q_p (m_unspec_for_p_uint, m_unspec_for_p_uint, e.vector_mode (0));
-	  else
-	    code = code_for_mve_q_p (m_unspec_for_p_sint, m_unspec_for_p_sint, e.vector_mode (0));
-	else
-	  code = code_for_mve_q_p_f (m_unspec_for_p_fp, e.vector_mode (0));
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    else
+      {
+	switch (e.pred)
+	  {
+	  case PRED_none:
+	    if (e.type_suffix (0).integer_p)
+	      if (e.type_suffix (0).unsigned_p)
+		code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
+	      else
+		code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_f (m_unspec_for_fp, e.vector_mode (0));
 
-	return e.use_exact_insn (code);
+	    return e.use_exact_insn (code);
 
-      default:
-	gcc_unreachable ();
+	  case PRED_p:
+	    if (e.type_suffix (0).integer_p)
+	      if (e.type_suffix (0).unsigned_p)
+		code = code_for_mve_q_p (m_unspec_for_p_uint, m_unspec_for_p_uint, e.vector_mode (0));
+	      else
+		code = code_for_mve_q_p (m_unspec_for_p_sint, m_unspec_for_p_sint, e.vector_mode (0));
+	    else
+	      code = code_for_mve_q_p_f (m_unspec_for_p_fp, e.vector_mode (0));
+
+	    return e.use_exact_insn (code);
+
+	  default:
+	    gcc_unreachable ();
+	  }
       }
 
     gcc_unreachable ();
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 74783570561..21d7768a732 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,11 +42,9 @@
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vaddlvq(__a) __arm_vaddlvq(__a)
 #define vmovlbq(__a) __arm_vmovlbq(__a)
 #define vmovltq(__a) __arm_vmovltq(__a)
 #define vmvnq(__a) __arm_vmvnq(__a)
-#define vaddlvq_p(__a, __p) __arm_vaddlvq_p(__a, __p)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
@@ -324,7 +322,6 @@
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
-#define vaddlvq_s32(__a) __arm_vaddlvq_s32(__a)
 #define vmovlbq_s8(__a) __arm_vmovlbq_s8(__a)
 #define vmovlbq_s16(__a) __arm_vmovlbq_s16(__a)
 #define vmovltq_s8(__a) __arm_vmovltq_s8(__a)
@@ -353,7 +350,6 @@
 #define vmovlbq_u16(__a) __arm_vmovlbq_u16(__a)
 #define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
 #define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
-#define vaddlvq_u32(__a) __arm_vaddlvq_u32(__a)
 #define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
 #define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
 #define vcvtpq_u16_f16(__a) __arm_vcvtpq_u16_f16(__a)
@@ -379,8 +375,6 @@
 #define vcvtq_n_s32_f32(__a,  __imm6) __arm_vcvtq_n_s32_f32(__a,  __imm6)
 #define vcvtq_n_u16_f16(__a,  __imm6) __arm_vcvtq_n_u16_f16(__a,  __imm6)
 #define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,  __imm6)
-#define vaddlvq_p_s32(__a, __p) __arm_vaddlvq_p_s32(__a, __p)
-#define vaddlvq_p_u32(__a, __p) __arm_vaddlvq_p_u32(__a, __p)
 #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
 #define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
 #define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
@@ -1499,13 +1493,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_s32 (int32x4_t __a)
-{
-  return __builtin_mve_vaddlvq_sv4si (__a);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovlbq_s8 (int8x16_t __a)
@@ -1632,13 +1619,6 @@ __arm_vmvnq_n_u32 (const int __imm)
   return __builtin_mve_vmvnq_n_uv4si (__imm);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_u32 (uint32x4_t __a)
-{
-  return __builtin_mve_vaddlvq_uv4si (__a);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vctp16q (uint32_t __a)
@@ -1674,20 +1654,6 @@ __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_p_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddlvq_p_sv4si (__a, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_p_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vaddlvq_p_uv4si (__a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -9586,13 +9552,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq (int32x4_t __a)
-{
- return __arm_vaddlvq_s32 (__a);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmovlbq (int8x16_t __a)
@@ -9691,27 +9650,6 @@ __arm_vmovlbq (uint16x8_t __a)
  return __arm_vmovlbq_u16 (__a);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq (uint32x4_t __a)
-{
- return __arm_vaddlvq_u32 (__a);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_p (int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vaddlvq_p_s32 (__a, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vaddlvq_p (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vaddlvq_p_u32 (__a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint8x16_t __a, uint8x16_t __b)
@@ -19212,16 +19150,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vaddlvaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), p2), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vaddlvaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vaddlvq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddlvq_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddlvq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
-#define __arm_vaddlvq_p(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vaddlvq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vaddlvq_p_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 18/20] arm: [MVE intrinsics] factorize vmovlbq vmovltq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (15 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 17/20] arm: [MVE intrinsics] rework vaddlvq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 19/20] arm: [MVE intrinsics] add unary_widen shape Christophe Lyon
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmovlbq, vmovltq builtins so that they use the same
parameterized names.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vmovlb, vmovlt.
	(VMOVLBQ, VMOVLTQ): Merge into ...
	(VMOVLxQ): ... this.
	(VMOVLTQ_M, VMOVLBQ_M): Merge into ...
	(VMOVLxQ_M): ... this.
	* config/arm/mve.md (mve_vmovltq_<supf><mode>)
	(mve_vmovlbq_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vmovlbq_m_<supf><mode>, mve_vmovltq_m_<supf><mode>): Merge
	into ...
	(@mve_<mve_insn>q_m_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 10 +++++----
 gcc/config/arm/mve.md       | 44 ++++++++-----------------------------
 2 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 84dd97249f9..2f6de937ef7 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -817,6 +817,10 @@ (define_int_attr mve_insn [
 		 (VMINVQ_S "vminv") (VMINVQ_U "vminv")
 		 (VMLAQ_M_N_S "vmla") (VMLAQ_M_N_U "vmla")
 		 (VMLASQ_M_N_S "vmlas") (VMLASQ_M_N_U "vmlas")
+		 (VMOVLBQ_M_S "vmovlb") (VMOVLBQ_M_U "vmovlb")
+		 (VMOVLBQ_S "vmovlb") (VMOVLBQ_U "vmovlb")
+		 (VMOVLTQ_M_S "vmovlt") (VMOVLTQ_M_U "vmovlt")
+		 (VMOVLTQ_S "vmovlt") (VMOVLTQ_U "vmovlt")
 		 (VMOVNBQ_M_S "vmovnb") (VMOVNBQ_M_U "vmovnb")
 		 (VMOVNBQ_S "vmovnb") (VMOVNBQ_U "vmovnb")
 		 (VMOVNTQ_M_S "vmovnt") (VMOVNTQ_M_U "vmovnt")
@@ -2318,8 +2322,7 @@ (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
 (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
 (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
 (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
-(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
-(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S])
+(define_int_iterator VMOVLxQ [VMOVLBQ_S VMOVLBQ_U VMOVLTQ_U VMOVLTQ_S])
 (define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
 (define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
 (define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
@@ -2413,7 +2416,7 @@ (define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U])
 (define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U])
 (define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S])
 (define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U])
-(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S])
+(define_int_iterator VMOVLxQ_M [VMOVLBQ_M_U VMOVLBQ_M_S VMOVLTQ_M_U VMOVLTQ_M_S])
 (define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S])
 (define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S])
 (define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U])
@@ -2421,7 +2424,6 @@ (define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U])
 (define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U])
 (define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S])
 (define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S])
-(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S])
 (define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
 (define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
 (define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index c5373fef9a2..f5cb8ef48ef 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -386,30 +386,17 @@ (define_insn "@mve_<mve_insn>q_<supf><mode>"
 ])
 
 ;;
-;; [vmovltq_u, vmovltq_s])
+;; [vmovlbq_s, vmovlbq_u]
+;; [vmovltq_u, vmovltq_s]
 ;;
-(define_insn "mve_vmovltq_<supf><mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
-	(unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w")]
-	 VMOVLTQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmovlt.<supf>%#<V_sz_elem>   %q0, %q1"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmovlbq_s, vmovlbq_u])
-;;
-(define_insn "mve_vmovlbq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
 	(unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w")]
-	 VMOVLBQ))
+	 VMOVLxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vmovlb.<supf>%#<V_sz_elem>   %q0, %q1"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2904,34 +2891,21 @@ (define_insn "mve_vmlsldavxq_p_s<mode>"
   "vpst\;vmlsldavxt.s%#<V_sz_elem> %Q0, %R0, %q1, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
+
 ;;
 ;; [vmovlbq_m_u, vmovlbq_m_s])
-;;
-(define_insn "mve_vmovlbq_m_<supf><mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
-	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
-		       (match_operand:MVE_3 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMOVLBQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmovlbt.<supf>%#<V_sz_elem>	%q0, %q2"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-;;
 ;; [vmovltq_m_u, vmovltq_m_s])
 ;;
-(define_insn "mve_vmovltq_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
 	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
 		       (match_operand:MVE_3 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VMOVLTQ_M))
+	 VMOVLxQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmovltt.<supf>%#<V_sz_elem>	%q0, %q2"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 19/20] arm: [MVE intrinsics] add unary_widen shape
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (16 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 18/20] arm: [MVE intrinsics] factorize vmovlbq vmovltq Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 13:30 ` [PATCH 20/20] arm: [MVE intrinsics] rework vmovlbq vmovltq Christophe Lyon
  2023-05-10 16:53 ` [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Kyrylo Tkachov
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the unary_widen shape description.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (unary_widen): New.
	* config/arm/arm-mve-builtins-shapes.h (unary_widen): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 46 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 47 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index e77a0cc20ac..ae73fc6b1b7 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1236,6 +1236,52 @@ struct unary_n_def : public overloaded_base<0>
 };
 SHAPE (unary_n)
 
+/* <T0:twice>_t vfoo[_t0](<T0>_t)
+
+   i.e. a version of "unary" in which the source elements are half the
+   size of the destination, but have the same type class.
+
+   Example: vmovlbq.
+   int32x4_t [__arm_]vmovlbq[_s16](int16x8_t a)
+   int32x4_t [__arm_]vmovlbq_m[_s16](int32x4_t inactive, int16x8_t a, mve_pred16_t p)
+   int32x4_t [__arm_]vmovlbq_x[_s16](int16x8_t a, mve_pred16_t p)  */
+struct unary_widen_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "vw0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    tree res;
+    if (!r.check_gp_argument (1, i, nargs)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    type_suffix_index wide_suffix
+      = find_type_suffix (type_suffixes[type].tclass,
+			  type_suffixes[type].element_bits * 2);
+
+    /* Check the inactive argument has the wide type.  */
+    if ((r.pred == PRED_m)
+	&& (r.infer_vector_type (0) != wide_suffix))
+    return r.report_no_such_form (type);
+
+    if ((res = r.lookup_form (r.mode_suffix_id, type)))
+	return res;
+
+    return r.report_no_such_form (type);
+  }
+};
+SHAPE (unary_widen)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index c062fe624c4..5a8d9fe2b2d 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -58,6 +58,7 @@ namespace arm_mve
     extern const function_shape *const unary_int32;
     extern const function_shape *const unary_int32_acc;
     extern const function_shape *const unary_n;
+    extern const function_shape *const unary_widen;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 20/20] arm: [MVE intrinsics] rework vmovlbq vmovltq
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (17 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 19/20] arm: [MVE intrinsics] add unary_widen shape Christophe Lyon
@ 2023-05-10 13:30 ` Christophe Lyon
  2023-05-10 16:53 ` [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Kyrylo Tkachov
  19 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-10 13:30 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmovlbq, vmovltq using the new MVE builtins framework.

2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmovlbq, vmovltq): New.
	* config/arm/arm-mve-builtins-base.def (vmovlbq, vmovltq): New.
	* config/arm/arm-mve-builtins-base.h (vmovlbq, vmovltq): New.
	* config/arm/arm_mve.h (vmovlbq): Remove.
	(vmovltq): Remove.
	(vmovlbq_m): Remove.
	(vmovltq_m): Remove.
	(vmovlbq_x): Remove.
	(vmovltq_x): Remove.
	(vmovlbq_s8): Remove.
	(vmovlbq_s16): Remove.
	(vmovltq_s8): Remove.
	(vmovltq_s16): Remove.
	(vmovltq_u8): Remove.
	(vmovltq_u16): Remove.
	(vmovlbq_u8): Remove.
	(vmovlbq_u16): Remove.
	(vmovlbq_m_s8): Remove.
	(vmovltq_m_s8): Remove.
	(vmovlbq_m_u8): Remove.
	(vmovltq_m_u8): Remove.
	(vmovlbq_m_s16): Remove.
	(vmovltq_m_s16): Remove.
	(vmovlbq_m_u16): Remove.
	(vmovltq_m_u16): Remove.
	(vmovlbq_x_s8): Remove.
	(vmovlbq_x_s16): Remove.
	(vmovlbq_x_u8): Remove.
	(vmovlbq_x_u16): Remove.
	(vmovltq_x_s8): Remove.
	(vmovltq_x_s16): Remove.
	(vmovltq_x_u8): Remove.
	(vmovltq_x_u16): Remove.
	(__arm_vmovlbq_s8): Remove.
	(__arm_vmovlbq_s16): Remove.
	(__arm_vmovltq_s8): Remove.
	(__arm_vmovltq_s16): Remove.
	(__arm_vmovltq_u8): Remove.
	(__arm_vmovltq_u16): Remove.
	(__arm_vmovlbq_u8): Remove.
	(__arm_vmovlbq_u16): Remove.
	(__arm_vmovlbq_m_s8): Remove.
	(__arm_vmovltq_m_s8): Remove.
	(__arm_vmovlbq_m_u8): Remove.
	(__arm_vmovltq_m_u8): Remove.
	(__arm_vmovlbq_m_s16): Remove.
	(__arm_vmovltq_m_s16): Remove.
	(__arm_vmovlbq_m_u16): Remove.
	(__arm_vmovltq_m_u16): Remove.
	(__arm_vmovlbq_x_s8): Remove.
	(__arm_vmovlbq_x_s16): Remove.
	(__arm_vmovlbq_x_u8): Remove.
	(__arm_vmovlbq_x_u16): Remove.
	(__arm_vmovltq_x_s8): Remove.
	(__arm_vmovltq_x_s16): Remove.
	(__arm_vmovltq_x_u8): Remove.
	(__arm_vmovltq_x_u16): Remove.
	(__arm_vmovlbq): Remove.
	(__arm_vmovltq): Remove.
	(__arm_vmovlbq_m): Remove.
	(__arm_vmovltq_m): Remove.
	(__arm_vmovlbq_x): Remove.
	(__arm_vmovltq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 454 -----------------------
 4 files changed, 6 insertions(+), 454 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index fdc0ff50b96..2dec15ac0b1 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -279,6 +279,8 @@ FUNCTION (vminnmq, unspec_based_mve_function_exact_insn, (UNKNOWN, UNKNOWN, SMIN
 FUNCTION_PRED_P_F (vminnmvq, VMINNMVQ)
 FUNCTION_WITH_RTX_M_NO_F (vminq, SMIN, UMIN, VMINQ)
 FUNCTION_PRED_P_S_U (vminvq, VMINVQ)
+FUNCTION_WITHOUT_N_NO_F (vmovlbq, VMOVLBQ)
+FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ)
 FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index dcfb426a7fb..b0de5af1013 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -48,6 +48,8 @@ DEF_MVE_FUNCTION (vminaq, binary_maxamina, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vminavq, binary_maxavminav, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vminq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vminvq, binary_maxvminv, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vmovlbq, unary_widen, integer_8_16, mx_or_none)
+DEF_MVE_FUNCTION (vmovltq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 5de70d5e1d4..fa2e97fd461 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -61,6 +61,8 @@ extern const function_base *const vminnmq;
 extern const function_base *const vminnmvq;
 extern const function_base *const vminq;
 extern const function_base *const vminvq;
+extern const function_base *const vmovlbq;
+extern const function_base *const vmovltq;
 extern const function_base *const vmovnbq;
 extern const function_base *const vmovntq;
 extern const function_base *const vmulhq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 21d7768a732..c0891b7592a 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,8 +42,6 @@
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vmovlbq(__a) __arm_vmovlbq(__a)
-#define vmovltq(__a) __arm_vmovltq(__a)
 #define vmvnq(__a) __arm_vmvnq(__a)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
@@ -118,8 +116,6 @@
 #define vmlaldavxq_p(__a, __b, __p) __arm_vmlaldavxq_p(__a, __b, __p)
 #define vmlsldavq_p(__a, __b, __p) __arm_vmlsldavq_p(__a, __b, __p)
 #define vmlsldavxq_p(__a, __b, __p) __arm_vmlsldavxq_p(__a, __b, __p)
-#define vmovlbq_m(__inactive, __a, __p) __arm_vmovlbq_m(__inactive, __a, __p)
-#define vmovltq_m(__inactive, __a, __p) __arm_vmovltq_m(__inactive, __a, __p)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vabavq_p(__a, __b, __c, __p) __arm_vabavq_p(__a, __b, __c, __p)
@@ -246,8 +242,6 @@
 #define vhcaddq_rot270_x(__a, __b, __p) __arm_vhcaddq_rot270_x(__a, __b, __p)
 #define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
 #define vbrsrq_x(__a, __b, __p) __arm_vbrsrq_x(__a, __b, __p)
-#define vmovlbq_x(__a, __p) __arm_vmovlbq_x(__a, __p)
-#define vmovltq_x(__a, __p) __arm_vmovltq_x(__a, __p)
 #define vmvnq_x(__a, __p) __arm_vmvnq_x(__a, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
@@ -322,10 +316,6 @@
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
-#define vmovlbq_s8(__a) __arm_vmovlbq_s8(__a)
-#define vmovlbq_s16(__a) __arm_vmovlbq_s16(__a)
-#define vmovltq_s8(__a) __arm_vmovltq_s8(__a)
-#define vmovltq_s16(__a) __arm_vmovltq_s16(__a)
 #define vmvnq_s8(__a) __arm_vmvnq_s8(__a)
 #define vmvnq_s16(__a) __arm_vmvnq_s16(__a)
 #define vmvnq_s32(__a) __arm_vmvnq_s32(__a)
@@ -344,10 +334,6 @@
 #define vmvnq_u8(__a) __arm_vmvnq_u8(__a)
 #define vmvnq_u16(__a) __arm_vmvnq_u16(__a)
 #define vmvnq_u32(__a) __arm_vmvnq_u32(__a)
-#define vmovltq_u8(__a) __arm_vmovltq_u8(__a)
-#define vmovltq_u16(__a) __arm_vmovltq_u16(__a)
-#define vmovlbq_u8(__a) __arm_vmovlbq_u8(__a)
-#define vmovlbq_u16(__a) __arm_vmovlbq_u16(__a)
 #define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
 #define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
 #define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
@@ -661,8 +647,6 @@
 #define vmlaldavxq_p_s16(__a, __b, __p) __arm_vmlaldavxq_p_s16(__a, __b, __p)
 #define vmlsldavq_p_s16(__a, __b, __p) __arm_vmlsldavq_p_s16(__a, __b, __p)
 #define vmlsldavxq_p_s16(__a, __b, __p) __arm_vmlsldavxq_p_s16(__a, __b, __p)
-#define vmovlbq_m_s8(__inactive, __a, __p) __arm_vmovlbq_m_s8(__inactive, __a, __p)
-#define vmovltq_m_s8(__inactive, __a, __p) __arm_vmovltq_m_s8(__inactive, __a, __p)
 #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
 #define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
@@ -671,8 +655,6 @@
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
 #define vmlaldavaq_u16(__a, __b, __c) __arm_vmlaldavaq_u16(__a, __b, __c)
 #define vmlaldavq_p_u16(__a, __b, __p) __arm_vmlaldavq_p_u16(__a, __b, __p)
-#define vmovlbq_m_u8(__inactive, __a, __p) __arm_vmovlbq_m_u8(__inactive, __a, __p)
-#define vmovltq_m_u8(__inactive, __a, __p) __arm_vmovltq_m_u8(__inactive, __a, __p)
 #define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
@@ -694,8 +676,6 @@
 #define vmlaldavxq_p_s32(__a, __b, __p) __arm_vmlaldavxq_p_s32(__a, __b, __p)
 #define vmlsldavq_p_s32(__a, __b, __p) __arm_vmlsldavq_p_s32(__a, __b, __p)
 #define vmlsldavxq_p_s32(__a, __b, __p) __arm_vmlsldavxq_p_s32(__a, __b, __p)
-#define vmovlbq_m_s16(__inactive, __a, __p) __arm_vmovlbq_m_s16(__inactive, __a, __p)
-#define vmovltq_m_s16(__inactive, __a, __p) __arm_vmovltq_m_s16(__inactive, __a, __p)
 #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
 #define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
@@ -704,8 +684,6 @@
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
 #define vmlaldavaq_u32(__a, __b, __c) __arm_vmlaldavaq_u32(__a, __b, __c)
 #define vmlaldavq_p_u32(__a, __b, __p) __arm_vmlaldavq_p_u32(__a, __b, __p)
-#define vmovlbq_m_u16(__inactive, __a, __p) __arm_vmovlbq_m_u16(__inactive, __a, __p)
-#define vmovltq_m_u16(__inactive, __a, __p) __arm_vmovltq_m_u16(__inactive, __a, __p)
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
 #define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
@@ -1251,14 +1229,6 @@
 #define vbrsrq_x_n_u8(__a, __b, __p) __arm_vbrsrq_x_n_u8(__a, __b, __p)
 #define vbrsrq_x_n_u16(__a, __b, __p) __arm_vbrsrq_x_n_u16(__a, __b, __p)
 #define vbrsrq_x_n_u32(__a, __b, __p) __arm_vbrsrq_x_n_u32(__a, __b, __p)
-#define vmovlbq_x_s8(__a, __p) __arm_vmovlbq_x_s8(__a, __p)
-#define vmovlbq_x_s16(__a, __p) __arm_vmovlbq_x_s16(__a, __p)
-#define vmovlbq_x_u8(__a, __p) __arm_vmovlbq_x_u8(__a, __p)
-#define vmovlbq_x_u16(__a, __p) __arm_vmovlbq_x_u16(__a, __p)
-#define vmovltq_x_s8(__a, __p) __arm_vmovltq_x_s8(__a, __p)
-#define vmovltq_x_s16(__a, __p) __arm_vmovltq_x_s16(__a, __p)
-#define vmovltq_x_u8(__a, __p) __arm_vmovltq_x_u8(__a, __p)
-#define vmovltq_x_u16(__a, __p) __arm_vmovltq_x_u16(__a, __p)
 #define vmvnq_x_s8(__a, __p) __arm_vmvnq_x_s8(__a, __p)
 #define vmvnq_x_s16(__a, __p) __arm_vmvnq_x_s16(__a, __p)
 #define vmvnq_x_s32(__a, __p) __arm_vmvnq_x_s32(__a, __p)
@@ -1493,34 +1463,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vmovlbq_sv16qi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vmovlbq_sv8hi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vmovltq_sv16qi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vmovltq_sv8hi (__a);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_s8 (int8x16_t __a)
@@ -1577,34 +1519,6 @@ __arm_vmvnq_u32 (uint32x4_t __a)
   return __builtin_mve_vmvnq_uv4si (__a);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vmovltq_uv16qi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vmovltq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vmovlbq_uv16qi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vmovlbq_uv8hi (__a);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_n_u16 (const int __imm)
@@ -3380,20 +3294,6 @@ __arm_vmlsldavxq_p_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmlsldavxq_p_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m_s8 (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_sv16qi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m_s8 (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -3415,20 +3315,6 @@ __arm_vmlaldavq_p_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmlaldavq_p_uv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m_u8 (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_uv16qi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m_u8 (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -3492,20 +3378,6 @@ __arm_vmlsldavxq_p_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmlsldavxq_p_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m_s16 (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m_s16 (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_sv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -3527,20 +3399,6 @@ __arm_vmlaldavq_p_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vmlaldavq_p_uv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m_u16 (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m_u16 (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_uv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -6887,62 +6745,6 @@ __arm_vbrsrq_x_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
   return __builtin_mve_vbrsrq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_sv16qi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_sv8hi (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_uv16qi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovlbq_m_uv8hi (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_sv16qi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_sv8hi (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_uv16qi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmovltq_m_uv8hi (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_x_s8 (int8x16_t __a, mve_pred16_t __p)
@@ -9552,34 +9354,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq (int8x16_t __a)
-{
- return __arm_vmovlbq_s8 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq (int16x8_t __a)
-{
- return __arm_vmovlbq_s16 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq (int8x16_t __a)
-{
- return __arm_vmovltq_s8 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq (int16x8_t __a)
-{
- return __arm_vmovltq_s16 (__a);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq (int8x16_t __a)
@@ -9622,34 +9396,6 @@ __arm_vmvnq (uint32x4_t __a)
  return __arm_vmvnq_u32 (__a);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq (uint8x16_t __a)
-{
- return __arm_vmovltq_u8 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq (uint16x8_t __a)
-{
- return __arm_vmovltq_u16 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq (uint8x16_t __a)
-{
- return __arm_vmovlbq_u8 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq (uint16x8_t __a)
-{
- return __arm_vmovlbq_u16 (__a);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint8x16_t __a, uint8x16_t __b)
@@ -11330,20 +11076,6 @@ __arm_vmlsldavxq_p (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vmlsldavxq_p_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_m_s8 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m (int16x8_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_m_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -11365,20 +11097,6 @@ __arm_vmlaldavq_p (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
  return __arm_vmlaldavq_p_u16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_m_u8 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m (uint16x8_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_m_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -11442,20 +11160,6 @@ __arm_vmlsldavxq_p (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vmlsldavxq_p_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_m_s16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m (int32x4_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_m_s16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
@@ -11477,20 +11181,6 @@ __arm_vmlaldavq_p (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vmlaldavq_p_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_m (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_m_u16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_m (uint32x4_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_m_u16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -14410,62 +14100,6 @@ __arm_vbrsrq_x (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
  return __arm_vbrsrq_x_n_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_x_s16 (__a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovlbq_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovlbq_x_u16 (__a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_x_s16 (__a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmovltq_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmovltq_x_u16 (__a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_x (int8x16_t __a, mve_pred16_t __p)
@@ -16622,20 +16256,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vmovlbq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovlbq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)));})
-
-#define __arm_vmovltq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovltq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovltq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovltq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovltq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)));})
-
 #define __arm_vcvtq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_f16_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
@@ -16965,22 +16585,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t]: __arm_vmovlbq_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t]: __arm_vmovlbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vmovltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t]: __arm_vmovltq_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t]: __arm_vmovltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovltq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovltq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -17714,20 +17318,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
-#define __arm_vmovlbq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovlbq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)));})
-
-#define __arm_vmovltq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovltq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovltq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovltq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovltq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)));})
-
 #define __arm_vmvnq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vmvnq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
@@ -18030,22 +17620,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmladhq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmladhq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)));})
 
-#define __arm_vmovlbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t]: __arm_vmovlbq_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t]: __arm_vmovlbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vmovltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t]: __arm_vmovltq_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t]: __arm_vmovltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t]: __arm_vmovltq_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t]: __arm_vmovltq_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
 #define __arm_vabavq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -18380,20 +17954,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot90_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot90_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovlbq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vmovltq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovltq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovltq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovltq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovltq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
 #define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -19310,20 +18870,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlsldavxq_p_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlsldavxq_p_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
-#define __arm_vmovlbq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovlbq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovlbq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovlbq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovlbq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vmovltq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmovltq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmovltq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmovltq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmovltq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
 #define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-10 13:30 ` [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape Christophe Lyon
@ 2023-05-10 14:52   ` Kyrylo Tkachov
  2023-05-11  8:21     ` Christophe Lyon
  0 siblings, 1 reply; 28+ messages in thread
From: Kyrylo Tkachov @ 2023-05-10 14:52 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Wednesday, May 10, 2023 2:31 PM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
> 
> This patch adds the unary_acc shape description.
> 
> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 29 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index bff1c3e843b..e77a0cc20ac 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>  };
>  SHAPE (unary)
> 
> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
> +
> +   i.e. a version of "unary" in which the source elements are half the
> +   size of the destination scalar, but have the same type class.
> +
> +   Example: vaddlvq.
> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
> +struct unary_acc_def : public overloaded_base<0>
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_none,
> preserve_user_namespace);
> +    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    /* FIXME: check that the return value is actually
> +       twice as wide as arg 0.  */

Any reason why we can't add that check now?
I'd rather not add new FIXMEs here...
Thanks,
Kyrill

> +    return r.resolve_unary ();
> +  }
> +};
> +SHAPE (unary_acc)
> +
>  /* <T0>_t foo_t0[_t1](<T1>_t)
> 
>     where the target type <t0> must be specified explicitly but the source
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index fc1bacbd4da..c062fe624c4 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -53,6 +53,7 @@ namespace arm_mve
>      extern const function_shape *const create;
>      extern const function_shape *const inherent;
>      extern const function_shape *const unary;
> +    extern const function_shape *const unary_acc;
>      extern const function_shape *const unary_convert;
>      extern const function_shape *const unary_int32;
>      extern const function_shape *const unary_int32_acc;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp
  2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
                   ` (18 preceding siblings ...)
  2023-05-10 13:30 ` [PATCH 20/20] arm: [MVE intrinsics] rework vmovlbq vmovltq Christophe Lyon
@ 2023-05-10 16:53 ` Kyrylo Tkachov
  19 siblings, 0 replies; 28+ messages in thread
From: Kyrylo Tkachov @ 2023-05-10 16:53 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Wednesday, May 10, 2023 2:30 PM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp
> 
> Factorize vcmp so that they use the same pattern.
> 

Besides my comments on patch 15/20, this series looks good.
So once that patch is resolved, this series is ok for trunk.
Thanks,
Kyrill

> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/iterators.md (MVE_CMP_M, MVE_CMP_M_F,
> MVE_CMP_M_N)
> 	(MVE_CMP_M_N_F, mve_cmp_op1): New.
> 	(isu): Add VCMP*
> 	(supf): Likewise.
> 	* config/arm/mve.md (mve_vcmp<mve_cmp_op>q_n_<mode>):
> Rename into ...
> 	(@mve_vcmp<mve_cmp_op>q_n_<mode>): ... this.
> 	(mve_vcmpeqq_m_f<mode>, mve_vcmpgeq_m_f<mode>)
> 	(mve_vcmpgtq_m_f<mode>, mve_vcmpleq_m_f<mode>)
> 	(mve_vcmpltq_m_f<mode>, mve_vcmpneq_m_f<mode>): Merge into
> ...
> 	(@mve_vcmp<mve_cmp_op1>q_m_f<mode>): ... this.
> 	(mve_vcmpcsq_m_u<mode>, mve_vcmpeqq_m_<supf><mode>)
> 	(mve_vcmpgeq_m_s<mode>, mve_vcmpgtq_m_s<mode>)
> 	(mve_vcmphiq_m_u<mode>, mve_vcmpleq_m_s<mode>)
> 	(mve_vcmpltq_m_s<mode>, mve_vcmpneq_m_<supf><mode>):
> Merge into
> 	...
> 	(@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>): ... this.
> 	(mve_vcmpcsq_m_n_u<mode>,
> mve_vcmpeqq_m_n_<supf><mode>)
> 	(mve_vcmpgeq_m_n_s<mode>, mve_vcmpgtq_m_n_s<mode>)
> 	(mve_vcmphiq_m_n_u<mode>, mve_vcmpleq_m_n_s<mode>)
> 	(mve_vcmpltq_m_n_s<mode>, mve_vcmpneq_m_n_<supf><mode>):
> Merge
> 	into ...
> 	(@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>): ... this.
> 	(mve_vcmpeqq_m_n_f<mode>, mve_vcmpgeq_m_n_f<mode>)
> 	(mve_vcmpgtq_m_n_f<mode>, mve_vcmpleq_m_n_f<mode>)
> 	(mve_vcmpltq_m_n_f<mode>, mve_vcmpneq_m_n_f<mode>):
> Merge into ...
> 	(@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>): ... this.
> ---
>  gcc/config/arm/iterators.md | 108 ++++++++++
>  gcc/config/arm/mve.md       | 414 +++---------------------------------
>  2 files changed, 135 insertions(+), 387 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 3c70fd7f56d..ef9fae0412b 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -583,6 +583,47 @@ (define_int_iterator MVE_FP_CREATE_ONLY [
>  		     VCREATEQ_F
>  		     ])
> 
> +;; MVE comparison iterators
> +(define_int_iterator MVE_CMP_M [
> +		     VCMPCSQ_M_U
> +		     VCMPEQQ_M_S VCMPEQQ_M_U
> +		     VCMPGEQ_M_S
> +		     VCMPGTQ_M_S
> +		     VCMPHIQ_M_U
> +		     VCMPLEQ_M_S
> +		     VCMPLTQ_M_S
> +		     VCMPNEQ_M_S VCMPNEQ_M_U
> +		     ])
> +
> +(define_int_iterator MVE_CMP_M_F [
> +		     VCMPEQQ_M_F
> +		     VCMPGEQ_M_F
> +		     VCMPGTQ_M_F
> +		     VCMPLEQ_M_F
> +		     VCMPLTQ_M_F
> +		     VCMPNEQ_M_F
> +		     ])
> +
> +(define_int_iterator MVE_CMP_M_N [
> +		     VCMPCSQ_M_N_U
> +		     VCMPEQQ_M_N_S VCMPEQQ_M_N_U
> +		     VCMPGEQ_M_N_S
> +		     VCMPGTQ_M_N_S
> +		     VCMPHIQ_M_N_U
> +		     VCMPLEQ_M_N_S
> +		     VCMPLTQ_M_N_S
> +		     VCMPNEQ_M_N_S VCMPNEQ_M_N_U
> +		     ])
> +
> +(define_int_iterator MVE_CMP_M_N_F [
> +		     VCMPEQQ_M_N_F
> +		     VCMPGEQ_M_N_F
> +		     VCMPGTQ_M_N_F
> +		     VCMPLEQ_M_N_F
> +		     VCMPLTQ_M_N_F
> +		     VCMPNEQ_M_N_F
> +		     ])
> +
>  (define_int_iterator MVE_VMAXVQ_VMINVQ [
>  		     VMAXAVQ_S
>  		     VMAXVQ_S VMAXVQ_U
> @@ -655,6 +696,37 @@ (define_code_attr mve_addsubmul [
>  		 (plus "vadd")
>  		 ])
> 
> +(define_int_attr mve_cmp_op1 [
> +		 (VCMPCSQ_M_U "cs")
> +		 (VCMPEQQ_M_S "eq") (VCMPEQQ_M_U "eq")
> +		 (VCMPGEQ_M_S "ge")
> +		 (VCMPGTQ_M_S "gt")
> +		 (VCMPHIQ_M_U "hi")
> +		 (VCMPLEQ_M_S "le")
> +		 (VCMPLTQ_M_S "lt")
> +		 (VCMPNEQ_M_S "ne") (VCMPNEQ_M_U "ne")
> +		 (VCMPEQQ_M_F "eq")
> +		 (VCMPGEQ_M_F "ge")
> +		 (VCMPGTQ_M_F "gt")
> +		 (VCMPLEQ_M_F "le")
> +		 (VCMPLTQ_M_F "lt")
> +		 (VCMPNEQ_M_F "ne")
> +		 (VCMPCSQ_M_N_U "cs")
> +		 (VCMPEQQ_M_N_S "eq") (VCMPEQQ_M_N_U "eq")
> +		 (VCMPGEQ_M_N_S "ge")
> +		 (VCMPGTQ_M_N_S "gt")
> +		 (VCMPHIQ_M_N_U "hi")
> +		 (VCMPLEQ_M_N_S "le")
> +		 (VCMPLTQ_M_N_S "lt")
> +		 (VCMPNEQ_M_N_S "ne") (VCMPNEQ_M_N_U "ne")
> +		 (VCMPEQQ_M_N_F "eq")
> +		 (VCMPGEQ_M_N_F "ge")
> +		 (VCMPGTQ_M_N_F "gt")
> +		 (VCMPLEQ_M_N_F "le")
> +		 (VCMPLTQ_M_N_F "lt")
> +		 (VCMPNEQ_M_N_F "ne")
> +		 ])
> +
>  (define_int_attr mve_insn [
>  		 (VABDQ_M_S "vabd") (VABDQ_M_U "vabd") (VABDQ_M_F
> "vabd")
>  		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
> @@ -836,6 +908,26 @@ (define_int_attr isu    [
>  		 (VCLSQ_M_S "s")
>  		 (VCLZQ_M_S "i")
>  		 (VCLZQ_M_U "i")
> +		 (VCMPCSQ_M_N_U "u")
> +		 (VCMPCSQ_M_U "u")
> +		 (VCMPEQQ_M_N_S "i")
> +		 (VCMPEQQ_M_N_U "i")
> +		 (VCMPEQQ_M_S "i")
> +		 (VCMPEQQ_M_U "i")
> +		 (VCMPGEQ_M_N_S "s")
> +		 (VCMPGEQ_M_S "s")
> +		 (VCMPGTQ_M_N_S "s")
> +		 (VCMPGTQ_M_S "s")
> +		 (VCMPHIQ_M_N_U "u")
> +		 (VCMPHIQ_M_U "u")
> +		 (VCMPLEQ_M_N_S "s")
> +		 (VCMPLEQ_M_S "s")
> +		 (VCMPLTQ_M_N_S "s")
> +		 (VCMPLTQ_M_S "s")
> +		 (VCMPNEQ_M_N_S "i")
> +		 (VCMPNEQ_M_N_U "i")
> +		 (VCMPNEQ_M_S "i")
> +		 (VCMPNEQ_M_U "i")
>  		 (VMOVNBQ_M_S "i") (VMOVNBQ_M_U "i")
>  		 (VMOVNBQ_S "i") (VMOVNBQ_U "i")
>  		 (VMOVNTQ_M_S "i") (VMOVNTQ_M_U "i")
> @@ -2082,6 +2174,22 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VMAXAQ_M_S "s")
>  		       (VMINAQ_S "s")
>  		       (VMINAQ_M_S "s")
> +		       (VCMPCSQ_M_N_U "u")
> +		       (VCMPCSQ_M_U "u")
> +		       (VCMPEQQ_M_N_S "s") (VCMPEQQ_M_N_U "u")
> +		       (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
> +		       (VCMPGEQ_M_N_S "s")
> +		       (VCMPGEQ_M_S "s")
> +		       (VCMPGTQ_M_N_S "s")
> +		       (VCMPGTQ_M_S "s")
> +		       (VCMPHIQ_M_N_U "u")
> +		       (VCMPHIQ_M_U "u")
> +		       (VCMPLEQ_M_N_S "s")
> +		       (VCMPLEQ_M_S "s")
> +		       (VCMPLTQ_M_N_S "s")
> +		       (VCMPLTQ_M_S "s")
> +		       (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u")
> +		       (VCMPNEQ_M_S "s") (VCMPNEQ_M_U "u")
>  		       ])
> 
>  ;; Both kinds of return insn.
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 45bca6d6215..191d1268ad6 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -695,7 +695,7 @@ (define_insn
> "@mve_vcmp<mve_cmp_op>q_<mode>"
>  ;;
>  ;; [vcmpcsq_n_, vcmpeqq_n_, vcmpgeq_n_, vcmpgtq_n_, vcmphiq_n_,
> vcmpleq_n_, vcmpltq_n_, vcmpneq_n_])
>  ;;
> -(define_insn "mve_vcmp<mve_cmp_op>q_n_<mode>"
> +(define_insn "@mve_vcmp<mve_cmp_op>q_n_<mode>"
>    [
>     (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
>  	(MVE_COMPARISONS:<MVE_VPRED>
> @@ -1766,18 +1766,23 @@ (define_insn "mve_vrmlaldavhq_<supf>v4si"
>  ])
> 
>  ;;
> -;; [vcmpeqq_m_f])
> +;; [vcmpeqq_m_f]
> +;; [vcmpgeq_m_f]
> +;; [vcmpgtq_m_f]
> +;; [vcmpleq_m_f]
> +;; [vcmpltq_m_f]
> +;; [vcmpneq_m_f]
>  ;;
> -(define_insn "mve_vcmpeqq_m_f<mode>"
> +(define_insn "@mve_vcmp<mve_cmp_op1>q_m_f<mode>"
>    [
>     (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
>  	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
>  		    (match_operand:MVE_0 2 "s_register_operand" "w")
>  		    (match_operand:<MVE_VPRED> 3 "vpr_register_operand"
> "Up")]
> -	 VCMPEQQ_M_F))
> +	 MVE_CMP_M_F))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	eq, %q1, %q2"
> +  "vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
>  ;;
> @@ -1954,257 +1959,47 @@ (define_insn "mve_vaddvaq_p_<supf><mode>"
> 
>  ;;
>  ;; [vcmpcsq_m_n_u])
> -;;
> -(define_insn "mve_vcmpcsq_m_n_u<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPCSQ_M_N_U))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.u%#<V_sz_elem>	cs, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpcsq_m_u])
> -;;
> -(define_insn "mve_vcmpcsq_m_u<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPCSQ_M_U))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.u%#<V_sz_elem>	cs, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpeqq_m_n_u, vcmpeqq_m_n_s])
> -;;
> -(define_insn "mve_vcmpeqq_m_n_<supf><mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPEQQ_M_N))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.i%#<V_sz_elem>	eq, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpeqq_m_u, vcmpeqq_m_s])
> -;;
> -(define_insn "mve_vcmpeqq_m_<supf><mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPEQQ_M))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.i%#<V_sz_elem>	eq, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpgeq_m_n_s])
> -;;
> -(define_insn "mve_vcmpgeq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGEQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	ge, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpgeq_m_s])
> -;;
> -(define_insn "mve_vcmpgeq_m_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGEQ_M_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	ge, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpgtq_m_n_s])
> -;;
> -(define_insn "mve_vcmpgtq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGTQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	gt, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpgtq_m_s])
> -;;
> -(define_insn "mve_vcmpgtq_m_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGTQ_M_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	gt, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmphiq_m_n_u])
> -;;
> -(define_insn "mve_vcmphiq_m_n_u<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPHIQ_M_N_U))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.u%#<V_sz_elem>	hi, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmphiq_m_u])
> -;;
> -(define_insn "mve_vcmphiq_m_u<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPHIQ_M_U))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.u%#<V_sz_elem>	hi, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpleq_m_n_s])
> -;;
> -(define_insn "mve_vcmpleq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLEQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	le, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpleq_m_s])
> -;;
> -(define_insn "mve_vcmpleq_m_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLEQ_M_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	le, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpltq_m_n_s])
> -;;
> -(define_insn "mve_vcmpltq_m_n_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLTQ_M_N_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	lt, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpltq_m_s])
> -;;
> -(define_insn "mve_vcmpltq_m_s<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLTQ_M_S))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.s%#<V_sz_elem>	lt, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpneq_m_n_u, vcmpneq_m_n_s])
>  ;;
> -(define_insn "mve_vcmpneq_m_n_<supf><mode>"
> +(define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_<supf><mode>"
>    [
>     (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
>  	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
>  		       (match_operand:<V_elem> 2 "s_register_operand" "r")
>  		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPNEQ_M_N))
> +	 MVE_CMP_M_N))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.i%#<V_sz_elem>	ne, %q1, %2"
> +  "vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vcmpneq_m_s, vcmpneq_m_u])
> +;; [vcmpcsq_m_u]
> +;; [vcmpeqq_m_u, vcmpeqq_m_s]
> +;; [vcmpgeq_m_s]
> +;; [vcmpgtq_m_s]
> +;; [vcmphiq_m_u]
> +;; [vcmpleq_m_s]
> +;; [vcmpltq_m_s]
> +;; [vcmpneq_m_s, vcmpneq_m_u]
>  ;;
> -(define_insn "mve_vcmpneq_m_<supf><mode>"
> +(define_insn "@mve_vcmp<mve_cmp_op1>q_m_<supf><mode>"
>    [
>     (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
>  	(unspec:<MVE_VPRED> [(match_operand:MVE_2 1
> "s_register_operand" "w")
>  		       (match_operand:MVE_2 2 "s_register_operand" "w")
>  		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPNEQ_M))
> +	 MVE_CMP_M))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vcmpt.i%#<V_sz_elem>	ne, %q1, %q2"
> +  "vpst\;vcmpt.<isu>%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %q2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -2785,177 +2580,22 @@ (define_insn "mve_vcmlaq<mve_rot><mode>"
> 
>  ;;
>  ;; [vcmpeqq_m_n_f])
> -;;
> -(define_insn "mve_vcmpeqq_m_n_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPEQQ_M_N_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	eq, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpgeq_m_f])
> -;;
> -(define_insn "mve_vcmpgeq_m_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGEQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	ge, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpgeq_m_n_f])
> -;;
> -(define_insn "mve_vcmpgeq_m_n_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGEQ_M_N_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	ge, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpgtq_m_f])
> -;;
> -(define_insn "mve_vcmpgtq_m_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGTQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	gt, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpgtq_m_n_f])
> -;;
> -(define_insn "mve_vcmpgtq_m_n_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPGTQ_M_N_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	gt, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpleq_m_f])
> -;;
> -(define_insn "mve_vcmpleq_m_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLEQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	le, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpleq_m_n_f])
> -;;
> -(define_insn "mve_vcmpleq_m_n_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLEQ_M_N_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	le, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpltq_m_f])
> -;;
> -(define_insn "mve_vcmpltq_m_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLTQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	lt, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpltq_m_n_f])
> -;;
> -(define_insn "mve_vcmpltq_m_n_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:<V_elem> 2 "s_register_operand" "r")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPLTQ_M_N_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	lt, %q1, %2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vcmpneq_m_f])
> -;;
> -(define_insn "mve_vcmpneq_m_f<mode>"
> -  [
> -   (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
> -	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPNEQ_M_F))
> -  ]
> -  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	ne, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
>  ;; [vcmpneq_m_n_f])
>  ;;
> -(define_insn "mve_vcmpneq_m_n_f<mode>"
> +(define_insn "@mve_vcmp<mve_cmp_op1>q_m_n_f<mode>"
>    [
>     (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up")
>  	(unspec:<MVE_VPRED> [(match_operand:MVE_0 1
> "s_register_operand" "w")
>  		       (match_operand:<V_elem> 2 "s_register_operand" "r")
>  		       (match_operand:<MVE_VPRED> 3
> "vpr_register_operand" "Up")]
> -	 VCMPNEQ_M_N_F))
> +	 MVE_CMP_M_N_F))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> -  "vpst\;vcmpt.f%#<V_sz_elem>	ne, %q1, %2"
> +  "vpst\;vcmpt.f%#<V_sz_elem>\t<mve_cmp_op1>, %q1, %2"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-10 14:52   ` Kyrylo Tkachov
@ 2023-05-11  8:21     ` Christophe Lyon
  2023-05-11  8:23       ` Kyrylo Tkachov
  2023-05-11  8:30       ` Richard Sandiford
  0 siblings, 2 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-11  8:21 UTC (permalink / raw)
  To: Kyrylo Tkachov, gcc-patches, Richard Earnshaw, Richard Sandiford



On 5/10/23 16:52, Kyrylo Tkachov wrote:
> 
> 
>> -----Original Message-----
>> From: Christophe Lyon <christophe.lyon@arm.com>
>> Sent: Wednesday, May 10, 2023 2:31 PM
>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>> <Richard.Sandiford@arm.com>
>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>
>> This patch adds the unary_acc shape description.
>>
>> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
>>
>> 	gcc/
>> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
>> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
>> ---
>>   gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
>>   gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>   2 files changed, 29 insertions(+)
>>
>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
>> mve-builtins-shapes.cc
>> index bff1c3e843b..e77a0cc20ac 100644
>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>>   };
>>   SHAPE (unary)
>>
>> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
>> +
>> +   i.e. a version of "unary" in which the source elements are half the
>> +   size of the destination scalar, but have the same type class.
>> +
>> +   Example: vaddlvq.
>> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
>> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
>> +struct unary_acc_def : public overloaded_base<0>
>> +{
>> +  void
>> +  build (function_builder &b, const function_group_info &group,
>> +	 bool preserve_user_namespace) const override
>> +  {
>> +    b.add_overloaded_functions (group, MODE_none,
>> preserve_user_namespace);
>> +    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
>> +  }
>> +
>> +  tree
>> +  resolve (function_resolver &r) const override
>> +  {
>> +    /* FIXME: check that the return value is actually
>> +       twice as wide as arg 0.  */
> 
> Any reason why we can't add that check now?
> I'd rather not add new FIXMEs here...

I understand :-)

That's because the resolver only knows about the arguments, not the 
return value:
   /* The arguments to the overloaded function.  */
   vec<tree, va_gc> &m_arglist;

I kept this like what already exists for AArch64/SVE, but we'll need to 
extend it to handle return values too, so that we can support all 
overloaded forms of vuninitialized
(see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)

I meant this extension to be a follow-up work when most intrinsics have 
been converted and the few remaining ones (eg. vuninitialized) needs an 
improved framework.  And that would enable to fix the FIXME.

Thanks,

Christophe


> Thanks,
> Kyrill
> 
>> +    return r.resolve_unary ();
>> +  }
>> +};
>> +SHAPE (unary_acc)
>> +
>>   /* <T0>_t foo_t0[_t1](<T1>_t)
>>
>>      where the target type <t0> must be specified explicitly but the source
>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
>> mve-builtins-shapes.h
>> index fc1bacbd4da..c062fe624c4 100644
>> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
>> @@ -53,6 +53,7 @@ namespace arm_mve
>>       extern const function_shape *const create;
>>       extern const function_shape *const inherent;
>>       extern const function_shape *const unary;
>> +    extern const function_shape *const unary_acc;
>>       extern const function_shape *const unary_convert;
>>       extern const function_shape *const unary_int32;
>>       extern const function_shape *const unary_int32_acc;
>> --
>> 2.34.1
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-11  8:21     ` Christophe Lyon
@ 2023-05-11  8:23       ` Kyrylo Tkachov
  2023-05-11  8:24         ` Christophe Lyon
  2023-05-11  8:30       ` Richard Sandiford
  1 sibling, 1 reply; 28+ messages in thread
From: Kyrylo Tkachov @ 2023-05-11  8:23 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford



> -----Original Message-----
> From: Christophe Lyon <Christophe.Lyon@arm.com>
> Sent: Thursday, May 11, 2023 9:21 AM
> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; gcc-patches@gcc.gnu.org;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Subject: Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
> 
> 
> 
> On 5/10/23 16:52, Kyrylo Tkachov wrote:
> >
> >
> >> -----Original Message-----
> >> From: Christophe Lyon <christophe.lyon@arm.com>
> >> Sent: Wednesday, May 10, 2023 2:31 PM
> >> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> >> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> >> <Richard.Sandiford@arm.com>
> >> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> >> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
> >>
> >> This patch adds the unary_acc shape description.
> >>
> >> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
> >>
> >> 	gcc/
> >> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
> >> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
> >> ---
> >>   gcc/config/arm/arm-mve-builtins-shapes.cc | 28
> +++++++++++++++++++++++
> >>   gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
> >>   2 files changed, 29 insertions(+)
> >>
> >> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc
> b/gcc/config/arm/arm-
> >> mve-builtins-shapes.cc
> >> index bff1c3e843b..e77a0cc20ac 100644
> >> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> >> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> >> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
> >>   };
> >>   SHAPE (unary)
> >>
> >> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
> >> +
> >> +   i.e. a version of "unary" in which the source elements are half the
> >> +   size of the destination scalar, but have the same type class.
> >> +
> >> +   Example: vaddlvq.
> >> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
> >> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
> >> +struct unary_acc_def : public overloaded_base<0>
> >> +{
> >> +  void
> >> +  build (function_builder &b, const function_group_info &group,
> >> +	 bool preserve_user_namespace) const override
> >> +  {
> >> +    b.add_overloaded_functions (group, MODE_none,
> >> preserve_user_namespace);
> >> +    build_all (b, "sw0,v0", group, MODE_none,
> preserve_user_namespace);
> >> +  }
> >> +
> >> +  tree
> >> +  resolve (function_resolver &r) const override
> >> +  {
> >> +    /* FIXME: check that the return value is actually
> >> +       twice as wide as arg 0.  */
> >
> > Any reason why we can't add that check now?
> > I'd rather not add new FIXMEs here...
> 
> I understand :-)
> 
> That's because the resolver only knows about the arguments, not the
> return value:
>    /* The arguments to the overloaded function.  */
>    vec<tree, va_gc> &m_arglist;
> 
> I kept this like what already exists for AArch64/SVE, but we'll need to
> extend it to handle return values too, so that we can support all
> overloaded forms of vuninitialized
> (see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)
> 
> I meant this extension to be a follow-up work when most intrinsics have
> been converted and the few remaining ones (eg. vuninitialized) needs an
> improved framework.  And that would enable to fix the FIXME.

Thanks for explaining.
The series is ok for trunk then.
Kyrill

> 
> Thanks,
> 
> Christophe
> 
> 
> > Thanks,
> > Kyrill
> >
> >> +    return r.resolve_unary ();
> >> +  }
> >> +};
> >> +SHAPE (unary_acc)
> >> +
> >>   /* <T0>_t foo_t0[_t1](<T1>_t)
> >>
> >>      where the target type <t0> must be specified explicitly but the source
> >> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h
> b/gcc/config/arm/arm-
> >> mve-builtins-shapes.h
> >> index fc1bacbd4da..c062fe624c4 100644
> >> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> >> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> >> @@ -53,6 +53,7 @@ namespace arm_mve
> >>       extern const function_shape *const create;
> >>       extern const function_shape *const inherent;
> >>       extern const function_shape *const unary;
> >> +    extern const function_shape *const unary_acc;
> >>       extern const function_shape *const unary_convert;
> >>       extern const function_shape *const unary_int32;
> >>       extern const function_shape *const unary_int32_acc;
> >> --
> >> 2.34.1
> >

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-11  8:23       ` Kyrylo Tkachov
@ 2023-05-11  8:24         ` Christophe Lyon
  0 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-11  8:24 UTC (permalink / raw)
  To: Kyrylo Tkachov, gcc-patches, Richard Earnshaw, Richard Sandiford



On 5/11/23 10:23, Kyrylo Tkachov wrote:
> 
> 
>> -----Original Message-----
>> From: Christophe Lyon <Christophe.Lyon@arm.com>
>> Sent: Thursday, May 11, 2023 9:21 AM
>> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>; gcc-patches@gcc.gnu.org;
>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>> <Richard.Sandiford@arm.com>
>> Subject: Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>
>>
>>
>> On 5/10/23 16:52, Kyrylo Tkachov wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Christophe Lyon <christophe.lyon@arm.com>
>>>> Sent: Wednesday, May 10, 2023 2:31 PM
>>>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>>>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>>>> <Richard.Sandiford@arm.com>
>>>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>>>> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>>>
>>>> This patch adds the unary_acc shape description.
>>>>
>>>> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
>>>>
>>>> 	gcc/
>>>> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
>>>> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
>>>> ---
>>>>    gcc/config/arm/arm-mve-builtins-shapes.cc | 28
>> +++++++++++++++++++++++
>>>>    gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>>>    2 files changed, 29 insertions(+)
>>>>
>>>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc
>> b/gcc/config/arm/arm-
>>>> mve-builtins-shapes.cc
>>>> index bff1c3e843b..e77a0cc20ac 100644
>>>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>>>>    };
>>>>    SHAPE (unary)
>>>>
>>>> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
>>>> +
>>>> +   i.e. a version of "unary" in which the source elements are half the
>>>> +   size of the destination scalar, but have the same type class.
>>>> +
>>>> +   Example: vaddlvq.
>>>> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
>>>> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
>>>> +struct unary_acc_def : public overloaded_base<0>
>>>> +{
>>>> +  void
>>>> +  build (function_builder &b, const function_group_info &group,
>>>> +	 bool preserve_user_namespace) const override
>>>> +  {
>>>> +    b.add_overloaded_functions (group, MODE_none,
>>>> preserve_user_namespace);
>>>> +    build_all (b, "sw0,v0", group, MODE_none,
>> preserve_user_namespace);
>>>> +  }
>>>> +
>>>> +  tree
>>>> +  resolve (function_resolver &r) const override
>>>> +  {
>>>> +    /* FIXME: check that the return value is actually
>>>> +       twice as wide as arg 0.  */
>>>
>>> Any reason why we can't add that check now?
>>> I'd rather not add new FIXMEs here...
>>
>> I understand :-)
>>
>> That's because the resolver only knows about the arguments, not the
>> return value:
>>     /* The arguments to the overloaded function.  */
>>     vec<tree, va_gc> &m_arglist;
>>
>> I kept this like what already exists for AArch64/SVE, but we'll need to
>> extend it to handle return values too, so that we can support all
>> overloaded forms of vuninitialized
>> (see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)
>>
>> I meant this extension to be a follow-up work when most intrinsics have
>> been converted and the few remaining ones (eg. vuninitialized) needs an
>> improved framework.  And that would enable to fix the FIXME.
> 
> Thanks for explaining.
> The series is ok for trunk then.

Great, thanks!

> Kyrill
> 
>>
>> Thanks,
>>
>> Christophe
>>
>>
>>> Thanks,
>>> Kyrill
>>>
>>>> +    return r.resolve_unary ();
>>>> +  }
>>>> +};
>>>> +SHAPE (unary_acc)
>>>> +
>>>>    /* <T0>_t foo_t0[_t1](<T1>_t)
>>>>
>>>>       where the target type <t0> must be specified explicitly but the source
>>>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h
>> b/gcc/config/arm/arm-
>>>> mve-builtins-shapes.h
>>>> index fc1bacbd4da..c062fe624c4 100644
>>>> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
>>>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
>>>> @@ -53,6 +53,7 @@ namespace arm_mve
>>>>        extern const function_shape *const create;
>>>>        extern const function_shape *const inherent;
>>>>        extern const function_shape *const unary;
>>>> +    extern const function_shape *const unary_acc;
>>>>        extern const function_shape *const unary_convert;
>>>>        extern const function_shape *const unary_int32;
>>>>        extern const function_shape *const unary_int32_acc;
>>>> --
>>>> 2.34.1
>>>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-11  8:21     ` Christophe Lyon
  2023-05-11  8:23       ` Kyrylo Tkachov
@ 2023-05-11  8:30       ` Richard Sandiford
  2023-05-11  9:54         ` Christophe Lyon
  1 sibling, 1 reply; 28+ messages in thread
From: Richard Sandiford @ 2023-05-11  8:30 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Kyrylo Tkachov, gcc-patches, Richard Earnshaw

Christophe Lyon <christophe.lyon@arm.com> writes:
> On 5/10/23 16:52, Kyrylo Tkachov wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Christophe Lyon <christophe.lyon@arm.com>
>>> Sent: Wednesday, May 10, 2023 2:31 PM
>>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>>> <Richard.Sandiford@arm.com>
>>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>>> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>>
>>> This patch adds the unary_acc shape description.
>>>
>>> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
>>>
>>> 	gcc/
>>> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
>>> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
>>> ---
>>>   gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
>>>   gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>>   2 files changed, 29 insertions(+)
>>>
>>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
>>> mve-builtins-shapes.cc
>>> index bff1c3e843b..e77a0cc20ac 100644
>>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>>> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>>>   };
>>>   SHAPE (unary)
>>>
>>> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
>>> +
>>> +   i.e. a version of "unary" in which the source elements are half the
>>> +   size of the destination scalar, but have the same type class.
>>> +
>>> +   Example: vaddlvq.
>>> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
>>> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
>>> +struct unary_acc_def : public overloaded_base<0>
>>> +{
>>> +  void
>>> +  build (function_builder &b, const function_group_info &group,
>>> +	 bool preserve_user_namespace) const override
>>> +  {
>>> +    b.add_overloaded_functions (group, MODE_none,
>>> preserve_user_namespace);
>>> +    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
>>> +  }
>>> +
>>> +  tree
>>> +  resolve (function_resolver &r) const override
>>> +  {
>>> +    /* FIXME: check that the return value is actually
>>> +       twice as wide as arg 0.  */
>> 
>> Any reason why we can't add that check now?
>> I'd rather not add new FIXMEs here...
>
> I understand :-)
>
> That's because the resolver only knows about the arguments, not the 
> return value:
>    /* The arguments to the overloaded function.  */
>    vec<tree, va_gc> &m_arglist;
>
> I kept this like what already exists for AArch64/SVE, but we'll need to 
> extend it to handle return values too, so that we can support all 
> overloaded forms of vuninitialized
> (see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)
>
> I meant this extension to be a follow-up work when most intrinsics have 
> been converted and the few remaining ones (eg. vuninitialized) needs an 
> improved framework.  And that would enable to fix the FIXME.

We can't resolve based on the return type though.  It has to be
arguments only.  E.g.:

   decltype(foo(a, b))

has to be well-defined, even though decltype (by design) provides no
context about "what the caller wants".

Thanks,
Richard

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-11  8:30       ` Richard Sandiford
@ 2023-05-11  9:54         ` Christophe Lyon
  2023-05-11 10:58           ` Richard Sandiford
  0 siblings, 1 reply; 28+ messages in thread
From: Christophe Lyon @ 2023-05-11  9:54 UTC (permalink / raw)
  To: Kyrylo Tkachov, gcc-patches, Richard Earnshaw, richard.sandiford



On 5/11/23 10:30, Richard Sandiford wrote:
> Christophe Lyon <christophe.lyon@arm.com> writes:
>> On 5/10/23 16:52, Kyrylo Tkachov wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Christophe Lyon <christophe.lyon@arm.com>
>>>> Sent: Wednesday, May 10, 2023 2:31 PM
>>>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>>>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>>>> <Richard.Sandiford@arm.com>
>>>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>>>> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>>>
>>>> This patch adds the unary_acc shape description.
>>>>
>>>> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
>>>>
>>>> 	gcc/
>>>> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
>>>> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
>>>> ---
>>>>    gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
>>>>    gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>>>    2 files changed, 29 insertions(+)
>>>>
>>>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
>>>> mve-builtins-shapes.cc
>>>> index bff1c3e843b..e77a0cc20ac 100644
>>>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>>>>    };
>>>>    SHAPE (unary)
>>>>
>>>> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
>>>> +
>>>> +   i.e. a version of "unary" in which the source elements are half the
>>>> +   size of the destination scalar, but have the same type class.
>>>> +
>>>> +   Example: vaddlvq.
>>>> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
>>>> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
>>>> +struct unary_acc_def : public overloaded_base<0>
>>>> +{
>>>> +  void
>>>> +  build (function_builder &b, const function_group_info &group,
>>>> +	 bool preserve_user_namespace) const override
>>>> +  {
>>>> +    b.add_overloaded_functions (group, MODE_none,
>>>> preserve_user_namespace);
>>>> +    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
>>>> +  }
>>>> +
>>>> +  tree
>>>> +  resolve (function_resolver &r) const override
>>>> +  {
>>>> +    /* FIXME: check that the return value is actually
>>>> +       twice as wide as arg 0.  */
>>>
>>> Any reason why we can't add that check now?
>>> I'd rather not add new FIXMEs here...
>>
>> I understand :-)
>>
>> That's because the resolver only knows about the arguments, not the
>> return value:
>>     /* The arguments to the overloaded function.  */
>>     vec<tree, va_gc> &m_arglist;
>>
>> I kept this like what already exists for AArch64/SVE, but we'll need to
>> extend it to handle return values too, so that we can support all
>> overloaded forms of vuninitialized
>> (see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)
>>
>> I meant this extension to be a follow-up work when most intrinsics have
>> been converted and the few remaining ones (eg. vuninitialized) needs an
>> improved framework.  And that would enable to fix the FIXME.
> 
> We can't resolve based on the return type though.  It has to be
> arguments only.  E.g.:
> 
>     decltype(foo(a, b))
> 
> has to be well-defined, even though decltype (by design) provides no
> context about "what the caller wants".
> 

So in fact we can probably get rid of (most of) the remaining 
definitions of vuninitializedq in arm_mve.h, but not by looking at the 
return type (re-reading this I'm wondering whether I overlooked this 
when I started the series....)

But for things like vaddlvq, we can't check that the result is actually 
written in a twice-as-large as the argument location?

Thanks,

Christophe


> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
  2023-05-11  9:54         ` Christophe Lyon
@ 2023-05-11 10:58           ` Richard Sandiford
  0 siblings, 0 replies; 28+ messages in thread
From: Richard Sandiford @ 2023-05-11 10:58 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Kyrylo Tkachov, gcc-patches, Richard Earnshaw

Christophe Lyon <christophe.lyon@arm.com> writes:
> On 5/11/23 10:30, Richard Sandiford wrote:
>> Christophe Lyon <christophe.lyon@arm.com> writes:
>>> On 5/10/23 16:52, Kyrylo Tkachov wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Christophe Lyon <christophe.lyon@arm.com>
>>>>> Sent: Wednesday, May 10, 2023 2:31 PM
>>>>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>>>>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>>>>> <Richard.Sandiford@arm.com>
>>>>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>>>>> Subject: [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape
>>>>>
>>>>> This patch adds the unary_acc shape description.
>>>>>
>>>>> 2022-10-25  Christophe Lyon  <christophe.lyon@arm.com>
>>>>>
>>>>> 	gcc/
>>>>> 	* config/arm/arm-mve-builtins-shapes.cc (unary_acc): New.
>>>>> 	* config/arm/arm-mve-builtins-shapes.h (unary_acc): New.
>>>>> ---
>>>>>    gcc/config/arm/arm-mve-builtins-shapes.cc | 28 +++++++++++++++++++++++
>>>>>    gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>>>>    2 files changed, 29 insertions(+)
>>>>>
>>>>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
>>>>> mve-builtins-shapes.cc
>>>>> index bff1c3e843b..e77a0cc20ac 100644
>>>>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>>>>> @@ -1066,6 +1066,34 @@ struct unary_def : public overloaded_base<0>
>>>>>    };
>>>>>    SHAPE (unary)
>>>>>
>>>>> +/* <S0:twice>_t vfoo[_<t0>](<T0>_t)
>>>>> +
>>>>> +   i.e. a version of "unary" in which the source elements are half the
>>>>> +   size of the destination scalar, but have the same type class.
>>>>> +
>>>>> +   Example: vaddlvq.
>>>>> +   int64_t [__arm_]vaddlvq[_s32](int32x4_t a)
>>>>> +   int64_t [__arm_]vaddlvq_p[_s32](int32x4_t a, mve_pred16_t p) */
>>>>> +struct unary_acc_def : public overloaded_base<0>
>>>>> +{
>>>>> +  void
>>>>> +  build (function_builder &b, const function_group_info &group,
>>>>> +	 bool preserve_user_namespace) const override
>>>>> +  {
>>>>> +    b.add_overloaded_functions (group, MODE_none,
>>>>> preserve_user_namespace);
>>>>> +    build_all (b, "sw0,v0", group, MODE_none, preserve_user_namespace);
>>>>> +  }
>>>>> +
>>>>> +  tree
>>>>> +  resolve (function_resolver &r) const override
>>>>> +  {
>>>>> +    /* FIXME: check that the return value is actually
>>>>> +       twice as wide as arg 0.  */
>>>>
>>>> Any reason why we can't add that check now?
>>>> I'd rather not add new FIXMEs here...
>>>
>>> I understand :-)
>>>
>>> That's because the resolver only knows about the arguments, not the
>>> return value:
>>>     /* The arguments to the overloaded function.  */
>>>     vec<tree, va_gc> &m_arglist;
>>>
>>> I kept this like what already exists for AArch64/SVE, but we'll need to
>>> extend it to handle return values too, so that we can support all
>>> overloaded forms of vuninitialized
>>> (see https://gcc.gnu.org/pipermail/gcc-patches/2023-April/616003.html)
>>>
>>> I meant this extension to be a follow-up work when most intrinsics have
>>> been converted and the few remaining ones (eg. vuninitialized) needs an
>>> improved framework.  And that would enable to fix the FIXME.
>> 
>> We can't resolve based on the return type though.  It has to be
>> arguments only.  E.g.:
>> 
>>     decltype(foo(a, b))
>> 
>> has to be well-defined, even though decltype (by design) provides no
>> context about "what the caller wants".
>> 
>
> So in fact we can probably get rid of (most of) the remaining 
> definitions of vuninitializedq in arm_mve.h, but not by looking at the 
> return type (re-reading this I'm wondering whether I overlooked this 
> when I started the series....)
>
> But for things like vaddlvq, we can't check that the result is actually 
> written in a twice-as-large as the argument location?

No.  All we can/should do is to resolve the typeless builtin to a fully-typed
builtin, based on the argument types.  The return type of that fully-typed
builtin determines the type of the function call expression (the CALL_EXPR).
It's then up to the frontend to do semantic/type checking of the
resolved expression type.

In other words, information only flows in one direction:

  argument types -> function overloading -> function return type

Thanks,
Richard

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2023-05-11 10:58 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-10 13:30 [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Christophe Lyon
2023-05-10 13:30 ` [PATCH 02/20] arm: [MVE intrinsics] add cmp shape Christophe Lyon
2023-05-10 13:30 ` [PATCH 03/20] arm: [MVE intrinsics] rework vcmp Christophe Lyon
2023-05-10 13:30 ` [PATCH 04/20] arm: [MVE intrinsics] factorize vrev16q vrev32q vrev64q Christophe Lyon
2023-05-10 13:30 ` [PATCH 05/20] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-10 13:30 ` [PATCH 06/20] arm: [MVE intrinsics] factorize vdupq Christophe Lyon
2023-05-10 13:30 ` [PATCH 07/20] arm: [MVE intrinsics] add unary_n shape Christophe Lyon
2023-05-10 13:30 ` [PATCH 08/20] arm: [MVE intrinsics] rework vdupq Christophe Lyon
2023-05-10 13:30 ` [PATCH 09/20] arm: [MVE intrinsics] factorize vaddvq Christophe Lyon
2023-05-10 13:30 ` [PATCH 10/20] arm: [MVE intrinsics] add unary_int32 shape Christophe Lyon
2023-05-10 13:30 ` [PATCH 11/20] arm: [MVE intrinsics] rework vaddvq Christophe Lyon
2023-05-10 13:30 ` [PATCH 12/20] arm: [MVE intrinsics] factorize vaddvaq Christophe Lyon
2023-05-10 13:30 ` [PATCH 13/20] arm: [MVE intrinsics] add unary_int32_acc shape Christophe Lyon
2023-05-10 13:30 ` [PATCH 14/20] arm: [MVE intrinsics] rework vaddvaq Christophe Lyon
2023-05-10 13:30 ` [PATCH 15/20] arm: [MVE intrinsics] add unary_acc shape Christophe Lyon
2023-05-10 14:52   ` Kyrylo Tkachov
2023-05-11  8:21     ` Christophe Lyon
2023-05-11  8:23       ` Kyrylo Tkachov
2023-05-11  8:24         ` Christophe Lyon
2023-05-11  8:30       ` Richard Sandiford
2023-05-11  9:54         ` Christophe Lyon
2023-05-11 10:58           ` Richard Sandiford
2023-05-10 13:30 ` [PATCH 16/20] arm: [MVE intrinsics] factorize vaddlvq Christophe Lyon
2023-05-10 13:30 ` [PATCH 17/20] arm: [MVE intrinsics] rework vaddlvq Christophe Lyon
2023-05-10 13:30 ` [PATCH 18/20] arm: [MVE intrinsics] factorize vmovlbq vmovltq Christophe Lyon
2023-05-10 13:30 ` [PATCH 19/20] arm: [MVE intrinsics] add unary_widen shape Christophe Lyon
2023-05-10 13:30 ` [PATCH 20/20] arm: [MVE intrinsics] rework vmovlbq vmovltq Christophe Lyon
2023-05-10 16:53 ` [PATCH 01/20] arm: [MVE intrinsics] factorize vcmp Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).