public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape
@ 2023-05-12  9:38 Christophe Lyon
  2023-05-12  9:38 ` [PATCH 02/26] arm: [MVE intrinsics] factorize vqdmullbq vqdmulltq Christophe Lyon
                   ` (25 more replies)
  0 siblings, 26 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_widen_opt_n shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_widen_opt_n): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_widen_opt_n): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 50 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 5a299a272f5..ee4bc3f8ea4 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1098,6 +1098,55 @@ struct binary_widen_n_def : public overloaded_base<0>
 };
 SHAPE (binary_widen_n)
 
+/* <T0:twice>_t vfoo[_t0](<T0>_t, <T0>_t)
+   <T0:twice>_t vfoo[_n_t0](<T0>_t, <S0>_t)
+
+   Example: vqdmullbq.
+   int32x4_t [__arm_]vqdmulltq[_n_s16](int16x8_t a, int16_t b)
+   int32x4_t [__arm_]vqdmulltq_m[_n_s16](int32x4_t inactive, int16x8_t a, int16_t b, mve_pred16_t p)
+   int32x4_t [__arm_]vqdmulltq[_s16](int16x8_t a, int16x8_t b)
+   int32x4_t [__arm_]vqdmulltq_m[_s16](int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)  */
+struct binary_widen_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "vw0,v0,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "vw0,v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    type_suffix_index wide_suffix
+      = find_type_suffix (type_suffixes[type].tclass,
+			  type_suffixes[type].element_bits * 2);
+
+    /* Skip last argument, may be scalar, will be checked below by
+       finish_opt_n_resolution.  */
+    unsigned int last_arg = i--;
+    for (; i > 0; i--)
+      if (!r.require_matching_vector_type (i, type))
+	return error_mark_node;
+
+    /* Check the inactive argument has the wide type.  */
+    if ((r.pred == PRED_m)
+	&& (r.infer_vector_type (0) != wide_suffix))
+    return r.report_no_such_form (type);
+
+    return r.finish_opt_n_resolution (last_arg, 0, type);
+  }
+};
+SHAPE (binary_widen_opt_n)
+
 /* Shape for comparison operations that operate on
    uniform types.
 
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index a28cd6a1547..07b12b4af68 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -53,6 +53,7 @@ namespace arm_mve
     extern const function_shape *const binary_rshift_narrow;
     extern const function_shape *const binary_rshift_narrow_unsigned;
     extern const function_shape *const binary_widen_n;
+    extern const function_shape *const binary_widen_opt_n;
     extern const function_shape *const cmp;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 02/26] arm: [MVE intrinsics] factorize vqdmullbq vqdmulltq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 03/26] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (24 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqdmullbq, vqdmulltq builtins so that they use the same
parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VQDMULLxQ, MVE_VQDMULLxQ_M)
	(MVE_VQDMULLxQ_M_N, MVE_VQDMULLxQ_N): New.
	(mve_insn): Add vqdmullb, vqdmullt.
	(supf): Add VQDMULLBQ_S, VQDMULLBQ_M_S, VQDMULLBQ_M_N_S,
	VQDMULLBQ_N_S, VQDMULLTQ_S, VQDMULLTQ_M_S, VQDMULLTQ_M_N_S,
	VQDMULLTQ_N_S.
	* config/arm/mve.md (mve_vqdmullbq_n_s<mode>)
	(mve_vqdmulltq_n_s<mode>): Merge into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vqdmullbq_s<mode>, mve_vqdmulltq_s<mode>): Merge into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(mve_vqdmullbq_m_n_s<mode>, mve_vqdmulltq_m_n_s<mode>): Merge into
	...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
	(mve_vqdmullbq_m_s<mode>, mve_vqdmulltq_m_s<mode>): Merge into ...
	(@mve_<mve_insn>q_m_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  36 +++++++++++++
 gcc/config/arm/mve.md       | 100 ++++++++----------------------------
 2 files changed, 56 insertions(+), 80 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index abd904da11e..f88da604c19 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -764,6 +764,26 @@ (define_int_iterator MVE_VMLxLDAVAxQ_P [
 		     VMLSLDAVAXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VQDMULLxQ [
+		     VQDMULLBQ_S
+		     VQDMULLTQ_S
+		     ])
+
+(define_int_iterator MVE_VQDMULLxQ_M [
+		     VQDMULLBQ_M_S
+		     VQDMULLTQ_M_S
+		     ])
+
+(define_int_iterator MVE_VQDMULLxQ_M_N [
+		     VQDMULLBQ_M_N_S
+		     VQDMULLTQ_M_N_S
+		     ])
+
+(define_int_iterator MVE_VQDMULLxQ_N [
+		     VQDMULLBQ_N_S
+		     VQDMULLTQ_N_S
+		     ])
+
 (define_int_iterator MVE_VQxDMLxDHxQ_S [
 		     VQDMLADHQ_S
 		     VQDMLADHXQ_S
@@ -985,6 +1005,14 @@ (define_int_attr mve_insn [
 		 (VQDMULHQ_M_S "vqdmulh")
 		 (VQDMULHQ_N_S "vqdmulh")
 		 (VQDMULHQ_S "vqdmulh")
+		 (VQDMULLBQ_M_N_S "vqdmullb")
+		 (VQDMULLBQ_M_S "vqdmullb")
+		 (VQDMULLBQ_N_S "vqdmullb")
+		 (VQDMULLBQ_S "vqdmullb")
+		 (VQDMULLTQ_M_N_S "vqdmullt")
+		 (VQDMULLTQ_M_S "vqdmullt")
+		 (VQDMULLTQ_N_S "vqdmullt")
+		 (VQDMULLTQ_S "vqdmullt")
 		 (VQMOVNBQ_M_S "vqmovnb") (VQMOVNBQ_M_U "vqmovnb")
 		 (VQMOVNBQ_S "vqmovnb") (VQMOVNBQ_U "vqmovnb")
 		 (VQMOVNTQ_M_S "vqmovnt") (VQMOVNTQ_M_U "vqmovnt")
@@ -2425,6 +2453,14 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQDMLASHQ_N_S "s")
 		       (VQRDMLAHQ_N_S "s")
 		       (VQRDMLASHQ_N_S "s")
+		       (VQDMULLBQ_S "s")
+		       (VQDMULLBQ_M_S "s")
+		       (VQDMULLBQ_M_N_S "s")
+		       (VQDMULLBQ_N_S "s")
+		       (VQDMULLTQ_S "s")
+		       (VQDMULLTQ_M_S "s")
+		       (VQDMULLTQ_M_N_S "s")
+		       (VQDMULLTQ_N_S "s")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 14634cbf333..e75a30b7ed4 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1503,62 +1503,34 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ])
 
 ;;
-;; [vqdmullbq_n_s])
+;; [vqdmullbq_n_s]
+;; [vqdmulltq_n_s]
 ;;
-(define_insn "mve_vqdmullbq_n_s<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w")
-				  (match_operand:<V_elem> 2 "s_register_operand" "r")]
-	 VQDMULLBQ_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmullb.s%#<V_sz_elem>	%q0, %q1, %2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmullbq_s])
-;;
-(define_insn "mve_vqdmullbq_s<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w")
-				  (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VQDMULLBQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vqdmullb.s%#<V_sz_elem>	%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vqdmulltq_n_s])
-;;
-(define_insn "mve_vqdmulltq_n_s<mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w")
 				  (match_operand:<V_elem> 2 "s_register_operand" "r")]
-	 VQDMULLTQ_N_S))
+	 MVE_VQDMULLxQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vqdmullt.s%#<V_sz_elem>	%q0, %q1, %2"
+  "<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
 ;;
-;; [vqdmulltq_s])
+;; [vqdmullbq_s]
+;; [vqdmulltq_s]
 ;;
-(define_insn "mve_vqdmulltq_s<mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:MVE_5 1 "s_register_operand" "w")
 				  (match_operand:MVE_5 2 "s_register_operand" "w")]
-	 VQDMULLTQ_S))
+	 MVE_VQDMULLxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vqdmullt.s%#<V_sz_elem>	%q0, %q1, %q2"
+  "<mve_insn>.s%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -3228,70 +3200,38 @@ (define_insn "mve_vmulltq_poly_m_p<mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vqdmullbq_m_n_s])
-;;
-(define_insn "mve_vqdmullbq_m_n_s<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQDMULLBQ_M_N_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqdmullbt.s%#<V_sz_elem>\t%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqdmullbq_m_s])
-;;
-(define_insn "mve_vqdmullbq_m_s<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:MVE_5 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQDMULLBQ_M_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vqdmullbt.s%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vqdmulltq_m_n_s])
+;; [vqdmullbq_m_n_s]
+;; [vqdmulltq_m_n_s]
 ;;
-(define_insn "mve_vqdmulltq_m_n_s<mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
 		       (match_operand:MVE_5 2 "s_register_operand" "w")
 		       (match_operand:<V_elem> 3 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQDMULLTQ_M_N_S))
+	 MVE_VQDMULLxQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqdmulltt.s%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
-;; [vqdmulltq_m_s])
+;; [vqdmullbq_m_s]
+;; [vqdmulltq_m_s]
 ;;
-(define_insn "mve_vqdmulltq_m_s<mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
 		       (match_operand:MVE_5 2 "s_register_operand" "w")
 		       (match_operand:MVE_5 3 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQDMULLTQ_M_S))
+	 MVE_VQDMULLxQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vqdmulltt.s%#<V_sz_elem>\t%q0, %q2, %q3"
+  "vpst\;<mve_insn>t.s%#<V_sz_elem>\t%q0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 03/26] arm: [MVE intrinsics] rework vqdmullbq vqdmulltq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
  2023-05-12  9:38 ` [PATCH 02/26] arm: [MVE intrinsics] factorize vqdmullbq vqdmulltq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 04/26] arm: [MVE intrinsics] factorize vrmlaldavhaq vrmlaldavhaxq vrmlsldavhaq vrmlsldavhaxq Christophe Lyon
                   ` (23 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqdmullbq, vqdmulltq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vqdmullbq, vqdmulltq): New.
	* config/arm/arm-mve-builtins-base.def (vqdmullbq, vqdmulltq):
	New.
	* config/arm/arm-mve-builtins-base.h (vqdmullbq, vqdmulltq): New.
	* config/arm/arm_mve.h (vqdmulltq): Remove.
	(vqdmullbq): Remove.
	(vqdmullbq_m): Remove.
	(vqdmulltq_m): Remove.
	(vqdmulltq_s16): Remove.
	(vqdmulltq_n_s16): Remove.
	(vqdmullbq_s16): Remove.
	(vqdmullbq_n_s16): Remove.
	(vqdmulltq_s32): Remove.
	(vqdmulltq_n_s32): Remove.
	(vqdmullbq_s32): Remove.
	(vqdmullbq_n_s32): Remove.
	(vqdmullbq_m_n_s32): Remove.
	(vqdmullbq_m_n_s16): Remove.
	(vqdmullbq_m_s32): Remove.
	(vqdmullbq_m_s16): Remove.
	(vqdmulltq_m_n_s32): Remove.
	(vqdmulltq_m_n_s16): Remove.
	(vqdmulltq_m_s32): Remove.
	(vqdmulltq_m_s16): Remove.
	(__arm_vqdmulltq_s16): Remove.
	(__arm_vqdmulltq_n_s16): Remove.
	(__arm_vqdmullbq_s16): Remove.
	(__arm_vqdmullbq_n_s16): Remove.
	(__arm_vqdmulltq_s32): Remove.
	(__arm_vqdmulltq_n_s32): Remove.
	(__arm_vqdmullbq_s32): Remove.
	(__arm_vqdmullbq_n_s32): Remove.
	(__arm_vqdmullbq_m_n_s32): Remove.
	(__arm_vqdmullbq_m_n_s16): Remove.
	(__arm_vqdmullbq_m_s32): Remove.
	(__arm_vqdmullbq_m_s16): Remove.
	(__arm_vqdmulltq_m_n_s32): Remove.
	(__arm_vqdmulltq_m_n_s16): Remove.
	(__arm_vqdmulltq_m_s32): Remove.
	(__arm_vqdmulltq_m_s16): Remove.
	(__arm_vqdmulltq): Remove.
	(__arm_vqdmullbq): Remove.
	(__arm_vqdmullbq_m): Remove.
	(__arm_vqdmulltq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 294 -----------------------
 4 files changed, 6 insertions(+), 294 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index ca2fb67a07c..5ecc61ebf03 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -320,6 +320,8 @@ FUNCTION_ONLY_N_NO_U_F (vqdmlashq, VQDMLASHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhq, VQDMLSDHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqdmlsdhxq, VQDMLSDHXQ)
 FUNCTION_WITH_M_N_NO_U_F (vqdmulhq, VQDMULHQ)
+FUNCTION_WITH_M_N_NO_U_F (vqdmullbq, VQDMULLBQ)
+FUNCTION_WITH_M_N_NO_U_F (vqdmulltq, VQDMULLTQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhq, VQRDMLADHQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqrdmladhxq, VQRDMLADHXQ)
 FUNCTION_ONLY_N_NO_U_F (vqrdmlahq, VQRDMLAHQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 601384d5a95..19cfd9933c0 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -85,6 +85,8 @@ DEF_MVE_FUNCTION (vqdmlashq, ternary_n, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmlsdhq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmlsdhxq, ternary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqdmulhq, binary_opt_n, all_signed, m_or_none)
+DEF_MVE_FUNCTION (vqdmullbq, binary_widen_opt_n, signed_16_32, m_or_none)
+DEF_MVE_FUNCTION (vqdmulltq, binary_widen_opt_n, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqmovunbq, binary_move_narrow_unsigned, signed_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 574ed97a4b3..ca66b3b8caf 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -98,6 +98,8 @@ extern const function_base *const vqdmlashq;
 extern const function_base *const vqdmlsdhq;
 extern const function_base *const vqdmlsdhxq;
 extern const function_base *const vqdmulhq;
+extern const function_base *const vqdmullbq;
+extern const function_base *const vqdmulltq;
 extern const function_base *const vqmovnbq;
 extern const function_base *const vqmovntq;
 extern const function_base *const vqmovunbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 09b9564ed48..e0025f017ca 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -55,8 +55,6 @@
 #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
-#define vqdmulltq(__a, __b) __arm_vqdmulltq(__a, __b)
-#define vqdmullbq(__a, __b) __arm_vqdmullbq(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
@@ -81,8 +79,6 @@
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
-#define vqdmullbq_m(__inactive, __a, __b, __p) __arm_vqdmullbq_m(__inactive, __a, __b, __p)
-#define vqdmulltq_m(__inactive, __a, __b, __p) __arm_vqdmulltq_m(__inactive, __a, __b, __p)
 #define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a, __b, __c, __p)
 #define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a, __b, __c, __p)
 #define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a, __b, __c, __p)
@@ -343,10 +339,6 @@
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
 #define vbicq_n_u16(__a,  __imm) __arm_vbicq_n_u16(__a,  __imm)
-#define vqdmulltq_s16(__a, __b) __arm_vqdmulltq_s16(__a, __b)
-#define vqdmulltq_n_s16(__a, __b) __arm_vqdmulltq_n_s16(__a, __b)
-#define vqdmullbq_s16(__a, __b) __arm_vqdmullbq_s16(__a, __b)
-#define vqdmullbq_n_s16(__a, __b) __arm_vqdmullbq_n_s16(__a, __b)
 #define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b)
 #define vcmulq_rot90_f16(__a, __b) __arm_vcmulq_rot90_f16(__a, __b)
 #define vcmulq_rot270_f16(__a, __b) __arm_vcmulq_rot270_f16(__a, __b)
@@ -359,10 +351,6 @@
 #define vmulltq_poly_p16(__a, __b) __arm_vmulltq_poly_p16(__a, __b)
 #define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b)
 #define vbicq_n_u32(__a,  __imm) __arm_vbicq_n_u32(__a,  __imm)
-#define vqdmulltq_s32(__a, __b) __arm_vqdmulltq_s32(__a, __b)
-#define vqdmulltq_n_s32(__a, __b) __arm_vqdmulltq_n_s32(__a, __b)
-#define vqdmullbq_s32(__a, __b) __arm_vqdmullbq_s32(__a, __b)
-#define vqdmullbq_n_s32(__a, __b) __arm_vqdmullbq_n_s32(__a, __b)
 #define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b)
 #define vcmulq_rot90_f32(__a, __b) __arm_vcmulq_rot90_f32(__a, __b)
 #define vcmulq_rot270_f32(__a, __b) __arm_vcmulq_rot270_f32(__a, __b)
@@ -540,14 +528,6 @@
 #define vmullbq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p16(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p8(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p)
-#define vqdmullbq_m_n_s32(__inactive, __a, __b, __p) __arm_vqdmullbq_m_n_s32(__inactive, __a, __b, __p)
-#define vqdmullbq_m_n_s16(__inactive, __a, __b, __p) __arm_vqdmullbq_m_n_s16(__inactive, __a, __b, __p)
-#define vqdmullbq_m_s32(__inactive, __a, __b, __p) __arm_vqdmullbq_m_s32(__inactive, __a, __b, __p)
-#define vqdmullbq_m_s16(__inactive, __a, __b, __p) __arm_vqdmullbq_m_s16(__inactive, __a, __b, __p)
-#define vqdmulltq_m_n_s32(__inactive, __a, __b, __p) __arm_vqdmulltq_m_n_s32(__inactive, __a, __b, __p)
-#define vqdmulltq_m_n_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_n_s16(__inactive, __a, __b, __p)
-#define vqdmulltq_m_s32(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s32(__inactive, __a, __b, __p)
-#define vqdmulltq_m_s16(__inactive, __a, __b, __p) __arm_vqdmulltq_m_s16(__inactive, __a, __b, __p)
 #define vrmlaldavhaq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_s32(__a, __b, __c, __p)
 #define vrmlaldavhaq_p_u32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_u32(__a, __b, __c, __p)
 #define vrmlaldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
@@ -1658,34 +1638,6 @@ __arm_vbicq_n_u16 (uint16x8_t __a, const int __imm)
   return __builtin_mve_vbicq_n_uv8hi (__a, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmulltq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vqdmulltq_n_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vqdmullbq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_n_s16 (int16x8_t __a, int16_t __b)
-{
-  return __builtin_mve_vqdmullbq_n_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_s16 (int16x8_t __a, const int __imm)
@@ -1714,34 +1666,6 @@ __arm_vbicq_n_u32 (uint32x4_t __a, const int __imm)
   return __builtin_mve_vbicq_n_uv4si (__a, __imm);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmulltq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqdmulltq_n_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vqdmullbq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vqdmullbq_n_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_s32 (int32x4_t __a, const int __imm)
@@ -2573,62 +2497,6 @@ __arm_vmulltq_poly_m_p16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b,
   return __builtin_mve_vmulltq_poly_m_pv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m_n_s32 (int64x2_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmullbq_m_n_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m_n_s16 (int32x4_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmullbq_m_n_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m_s32 (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmullbq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmullbq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m_n_s32 (int64x2_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmulltq_m_n_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m_n_s16 (int32x4_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmulltq_m_n_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m_s32 (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmulltq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vqdmulltq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -7926,34 +7794,6 @@ __arm_vbicq (uint16x8_t __a, const int __imm)
  return __arm_vbicq_n_u16 (__a, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmulltq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq (int16x8_t __a, int16_t __b)
-{
- return __arm_vqdmulltq_n_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vqdmullbq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq (int16x8_t __a, int16_t __b)
-{
- return __arm_vqdmullbq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int16x8_t __a, const int __imm)
@@ -7982,34 +7822,6 @@ __arm_vbicq (uint32x4_t __a, const int __imm)
  return __arm_vbicq_n_u32 (__a, __imm);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmulltq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq (int32x4_t __a, int32_t __b)
-{
- return __arm_vqdmulltq_n_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vqdmullbq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq (int32x4_t __a, int32_t __b)
-{
- return __arm_vqdmullbq_n_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int32x4_t __a, const int __imm)
@@ -8801,62 +8613,6 @@ __arm_vmulltq_poly_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve
  return __arm_vmulltq_poly_m_p16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m (int64x2_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmullbq_m_n_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m (int32x4_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmullbq_m_n_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmullbq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmullbq_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmullbq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m (int64x2_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmulltq_m_n_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m (int32x4_t __inactive, int16x8_t __a, int16_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmulltq_m_n_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmulltq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmulltq_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vqdmulltq_m_s16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vrmlaldavhaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -12961,22 +12717,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
   int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
 
-#define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vqdmullbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -13906,22 +13646,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
 
-#define __arm_vqdmulltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
-#define __arm_vqdmullbq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce3(p1, int)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)));})
-
 #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -14663,24 +14387,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
 
-#define __arm_vqdmullbq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmullbq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmullbq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmullbq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3));})
-
-#define __arm_vqdmulltq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_m_n_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmulltq_m_n_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce3(p2, int), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqdmulltq_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqdmulltq_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
-
 #define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 04/26] arm: [MVE intrinsics] factorize vrmlaldavhaq vrmlaldavhaxq vrmlsldavhaq vrmlsldavhaxq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
  2023-05-12  9:38 ` [PATCH 02/26] arm: [MVE intrinsics] factorize vqdmullbq vqdmulltq Christophe Lyon
  2023-05-12  9:38 ` [PATCH 03/26] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 05/26] arm: [MVE intrinsics] rework " Christophe Lyon
                   ` (22 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vrmlaldavhaq, vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq
builtins so that they use the same parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VRMLxLDAVHAxQ)
	(MVE_VRMLxLDAVHAxQ_P): New.
	(mve_insn): Add vrmlaldavha, vrmlaldavhax, vrmlsldavha,
	vrmlsldavhax.
	(supf): Add VRMLALDAVHAXQ_P_S, VRMLALDAVHAXQ_S, VRMLSLDAVHAQ_P_S,
	VRMLSLDAVHAQ_S, VRMLSLDAVHAXQ_P_S, VRMLSLDAVHAXQ_S,
	VRMLALDAVHAQ_P_S.
	* config/arm/mve.md (mve_vrmlaldavhaq_<supf>v4si)
	(mve_vrmlaldavhaxq_sv4si, mve_vrmlsldavhaxq_sv4si)
	(mve_vrmlsldavhaq_sv4si): Merge into ...
	(@mve_<mve_insn>q_<supf>v4si): ... this.
	(mve_vrmlaldavhaq_p_sv4si, mve_vrmlaldavhaq_p_uv4si)
	(mve_vrmlaldavhaxq_p_sv4si, mve_vrmlsldavhaq_p_sv4si)
	(mve_vrmlsldavhaxq_p_sv4si): Merge into ...
	(@mve_<mve_insn>q_p_<supf>v4si): ... this.
---
 gcc/config/arm/iterators.md |  29 ++++++++
 gcc/config/arm/mve.md       | 140 ++++--------------------------------
 2 files changed, 44 insertions(+), 125 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f88da604c19..116dd95fd88 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -809,6 +809,20 @@ (define_int_iterator MVE_VRMLxLDAVHxQ_P [
 		     VRMLSLDAVHXQ_P_S
 		     ])
 
+(define_int_iterator MVE_VRMLxLDAVHAxQ [
+		     VRMLALDAVHAQ_S VRMLALDAVHAQ_U
+		     VRMLALDAVHAXQ_S
+		     VRMLSLDAVHAQ_S
+		     VRMLSLDAVHAXQ_S
+		     ])
+
+(define_int_iterator MVE_VRMLxLDAVHAxQ_P [
+		     VRMLALDAVHAQ_P_S VRMLALDAVHAQ_P_U
+		     VRMLALDAVHAXQ_P_S
+		     VRMLSLDAVHAQ_P_S
+		     VRMLSLDAVHAXQ_P_S
+		     ])
+
 (define_int_iterator MVE_MOVN [
 		     VMOVNBQ_S VMOVNBQ_U
 		     VMOVNTQ_S VMOVNTQ_U
@@ -1077,10 +1091,18 @@ (define_int_attr mve_insn [
 		 (VREV64Q_S "vrev64") (VREV64Q_U "vrev64") (VREV64Q_F "vrev64")
 		 (VRHADDQ_M_S "vrhadd") (VRHADDQ_M_U "vrhadd")
 		 (VRHADDQ_S "vrhadd") (VRHADDQ_U "vrhadd")
+		 (VRMLALDAVHAQ_P_S "vrmlaldavha") (VRMLALDAVHAQ_P_U "vrmlaldavha")
+		 (VRMLALDAVHAQ_S "vrmlaldavha") (VRMLALDAVHAQ_U "vrmlaldavha")
+		 (VRMLALDAVHAXQ_P_S "vrmlaldavhax")
+		 (VRMLALDAVHAXQ_S "vrmlaldavhax")
 		 (VRMLALDAVHQ_P_S "vrmlaldavh") (VRMLALDAVHQ_P_U "vrmlaldavh")
 		 (VRMLALDAVHQ_S "vrmlaldavh") (VRMLALDAVHQ_U "vrmlaldavh")
 		 (VRMLALDAVHXQ_P_S "vrmlaldavhx")
 		 (VRMLALDAVHXQ_S "vrmlaldavhx")
+		 (VRMLSLDAVHAQ_P_S "vrmlsldavha")
+		 (VRMLSLDAVHAQ_S "vrmlsldavha")
+		 (VRMLSLDAVHAXQ_P_S "vrmlsldavhax")
+		 (VRMLSLDAVHAXQ_S "vrmlsldavhax")
 		 (VRMLSLDAVHQ_P_S "vrmlsldavh")
 		 (VRMLSLDAVHQ_S "vrmlsldavh")
 		 (VRMLSLDAVHXQ_P_S "vrmlsldavhx")
@@ -2461,6 +2483,13 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VQDMULLTQ_M_S "s")
 		       (VQDMULLTQ_M_N_S "s")
 		       (VQDMULLTQ_N_S "s")
+		       (VRMLALDAVHAXQ_P_S "s")
+		       (VRMLALDAVHAXQ_S "s")
+		       (VRMLSLDAVHAQ_P_S "s")
+		       (VRMLSLDAVHAQ_S "s")
+		       (VRMLSLDAVHAXQ_P_S "s")
+		       (VRMLSLDAVHAXQ_S "s")
+		       (VRMLALDAVHAQ_P_S "s") (VRMLALDAVHAQ_P_U "u")
 		       ])
 
 ;; Both kinds of return insn.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index e75a30b7ed4..b4faf7a4b18 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1677,18 +1677,21 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ])
 
 ;;
-;; [vrmlaldavhaq_s vrmlaldavhaq_u])
+;; [vrmlaldavhaq_s vrmlaldavhaq_u]
+;; [vrmlaldavhaxq_s]
+;; [vrmlsldavhaq_s]
+;; [vrmlsldavhaxq_s]
 ;;
-(define_insn "mve_vrmlaldavhaq_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
 		    (match_operand:V4SI 2 "s_register_operand" "w")
 		    (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VRMLALDAVHAQ))
+	 MVE_VRMLxLDAVHAxQ))
   ]
   "TARGET_HAVE_MVE"
-  "vrmlaldavha.<supf>32\t%Q0, %R0, %q2, %q3"
+  "<mve_insn>.<supf>32\t%Q0, %R0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2514,22 +2517,6 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrmlaldavhaxq_s])
-;;
-(define_insn "mve_vrmlaldavhaxq_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VRMLALDAVHAXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlaldavhax.s32 %Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vrmlaldavhq_p_u vrmlaldavhq_p_s]
 ;; [vrmlaldavhxq_p_s]
@@ -2549,22 +2536,6 @@ (define_insn "@mve_<mve_insn>q_p_<supf>v4si"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrmlsldavhaxq_s])
-;;
-(define_insn "mve_vrmlsldavhaxq_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VRMLSLDAVHAXQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlsldavhax.s32 %Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vcvtmq_m_s, vcvtmq_m_u])
 ;;
@@ -2662,22 +2633,6 @@ (define_insn "mve_vcvtq_m_from_f_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrmlsldavhaq_s])
-;;
-(define_insn "mve_vrmlsldavhaq_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		    (match_operand:V4SI 2 "s_register_operand" "w")
-		    (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VRMLSLDAVHAQ_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vrmlsldavha.s32 %Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-])
-
 ;;
 ;; [vabavq_p_s, vabavq_p_u])
 ;;
@@ -3131,19 +3086,22 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vrmlaldavhaq_p_s])
+;; [vrmlaldavhaq_p_s, vrmlaldavhaq_p_u]
+;; [vrmlaldavhaxq_p_s]
+;; [vrmlsldavhaq_p_s]
+;; [vrmlsldavhaxq_p_s]
 ;;
-(define_insn "mve_vrmlaldavhaq_p_sv4si"
+(define_insn "@mve_<mve_insn>q_p_<supf>v4si"
   [
    (set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
 		       (match_operand:V4SI 2 "s_register_operand" "w")
 		       (match_operand:V4SI 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRMLALDAVHAQ_P_S))
+		       (match_operand:V4BI 4 "vpr_register_operand" "Up")]
+	 MVE_VRMLxLDAVHAxQ_P))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vrmlaldavhat.s32\t%Q0, %R0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>32\t%Q0, %R0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3235,74 +3193,6 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vrmlaldavhaq_p_u])
-;;
-(define_insn "mve_vrmlaldavhaq_p_uv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRMLALDAVHAQ_P_U))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlaldavhat.u32\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vrmlaldavhaxq_p_s])
-;;
-(define_insn "mve_vrmlaldavhaxq_p_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRMLALDAVHAXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlaldavhaxt.s32\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vrmlsldavhaq_p_s])
-;;
-(define_insn "mve_vrmlsldavhaq_p_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRMLSLDAVHAQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlsldavhat.s32\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vrmlsldavhaxq_p_s])
-;;
-(define_insn "mve_vrmlsldavhaxq_p_sv4si"
-  [
-   (set (match_operand:DI 0 "s_register_operand" "=r")
-	(unspec:DI [(match_operand:DI 1 "s_register_operand" "0")
-		       (match_operand:V4SI 2 "s_register_operand" "w")
-		       (match_operand:V4SI 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VRMLSLDAVHAXQ_P_S))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vrmlsldavhaxt.s32\t%Q0, %R0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vabdq_m_f]
 ;; [vaddq_m_f]
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 05/26] arm: [MVE intrinsics] rework vrmlaldavhaq vrmlaldavhaxq vrmlsldavhaq vrmlsldavhaxq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (2 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 04/26] arm: [MVE intrinsics] factorize vrmlaldavhaq vrmlaldavhaxq vrmlsldavhaq vrmlsldavhaxq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 06/26] arm: [MVE intrinsics] add binary_lshift_unsigned shape Christophe Lyon
                   ` (21 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vrmlaldavhaq, vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq
using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vrmlaldavhaq)
	(vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq): New.
	* config/arm/arm-mve-builtins-base.def (vrmlaldavhaq)
	(vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq): New.
	* config/arm/arm-mve-builtins-base.h (vrmlaldavhaq)
	(vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq): New.
	* config/arm/arm-mve-builtins-functions.h: Handle vrmlaldavhaq,
	vrmlaldavhaxq, vrmlsldavhaq, vrmlsldavhaxq.
	* config/arm/arm_mve.h (vrmlaldavhaq): Remove.
	(vrmlaldavhaxq): Remove.
	(vrmlsldavhaq): Remove.
	(vrmlsldavhaxq): Remove.
	(vrmlaldavhaq_p): Remove.
	(vrmlaldavhaxq_p): Remove.
	(vrmlsldavhaq_p): Remove.
	(vrmlsldavhaxq_p): Remove.
	(vrmlaldavhaq_s32): Remove.
	(vrmlaldavhaq_u32): Remove.
	(vrmlaldavhaxq_s32): Remove.
	(vrmlsldavhaq_s32): Remove.
	(vrmlsldavhaxq_s32): Remove.
	(vrmlaldavhaq_p_s32): Remove.
	(vrmlaldavhaq_p_u32): Remove.
	(vrmlaldavhaxq_p_s32): Remove.
	(vrmlsldavhaq_p_s32): Remove.
	(vrmlsldavhaxq_p_s32): Remove.
	(__arm_vrmlaldavhaq_s32): Remove.
	(__arm_vrmlaldavhaq_u32): Remove.
	(__arm_vrmlaldavhaxq_s32): Remove.
	(__arm_vrmlsldavhaq_s32): Remove.
	(__arm_vrmlsldavhaxq_s32): Remove.
	(__arm_vrmlaldavhaq_p_s32): Remove.
	(__arm_vrmlaldavhaq_p_u32): Remove.
	(__arm_vrmlaldavhaxq_p_s32): Remove.
	(__arm_vrmlsldavhaq_p_s32): Remove.
	(__arm_vrmlsldavhaxq_p_s32): Remove.
	(__arm_vrmlaldavhaq): Remove.
	(__arm_vrmlaldavhaxq): Remove.
	(__arm_vrmlsldavhaq): Remove.
	(__arm_vrmlsldavhaxq): Remove.
	(__arm_vrmlaldavhaq_p): Remove.
	(__arm_vrmlaldavhaxq_p): Remove.
	(__arm_vrmlsldavhaq_p): Remove.
	(__arm_vrmlsldavhaxq_p): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc     |   4 +
 gcc/config/arm/arm-mve-builtins-base.def    |   4 +
 gcc/config/arm/arm-mve-builtins-base.h      |   4 +
 gcc/config/arm/arm-mve-builtins-functions.h |   4 +
 gcc/config/arm/arm_mve.h                    | 184 --------------------
 5 files changed, 16 insertions(+), 184 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 5ecc61ebf03..a2b227bb2aa 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -346,8 +346,12 @@ FUNCTION_WITHOUT_N_NO_F (vrev16q, VREV16Q)
 FUNCTION_WITHOUT_N (vrev32q, VREV32Q)
 FUNCTION_WITHOUT_N (vrev64q, VREV64Q)
 FUNCTION_WITHOUT_N_NO_F (vrhaddq, VRHADDQ)
+FUNCTION_PRED_P_S_U (vrmlaldavhaq, VRMLALDAVHAQ)
+FUNCTION_PRED_P_S (vrmlaldavhaxq, VRMLALDAVHAXQ)
 FUNCTION_PRED_P_S_U (vrmlaldavhq, VRMLALDAVHQ)
 FUNCTION_PRED_P_S (vrmlaldavhxq, VRMLALDAVHXQ)
+FUNCTION_PRED_P_S (vrmlsldavhaq, VRMLSLDAVHAQ)
+FUNCTION_PRED_P_S (vrmlsldavhaxq, VRMLSLDAVHAXQ)
 FUNCTION_PRED_P_S (vrmlsldavhq, VRMLSLDAVHQ)
 FUNCTION_PRED_P_S (vrmlsldavhxq, VRMLSLDAVHXQ)
 FUNCTION_WITHOUT_N_NO_F (vrmulhq, VRMULHQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 19cfd9933c0..c4ef74169dd 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -116,8 +116,12 @@ DEF_MVE_FUNCTION (vrev16q, unary, integer_8, mx_or_none)
 DEF_MVE_FUNCTION (vrev32q, unary, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vrev64q, unary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrhaddq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vrmlaldavhaq, binary_acca_int64, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlaldavhaxq, binary_acca_int64, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vrmlaldavhq, binary_acc_int64, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vrmlaldavhxq, binary_acc_int64, signed_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlsldavhaq, binary_acca_int64, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vrmlsldavhaxq, binary_acca_int64, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vrmlsldavhq, binary_acc_int64, signed_32, p_or_none)
 DEF_MVE_FUNCTION (vrmlsldavhxq, binary_acc_int64, signed_32, p_or_none)
 DEF_MVE_FUNCTION (vrmulhq, binary, all_integer, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index ca66b3b8caf..41b2e19c2d7 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -128,8 +128,12 @@ extern const function_base *const vrev16q;
 extern const function_base *const vrev32q;
 extern const function_base *const vrev64q;
 extern const function_base *const vrhaddq;
+extern const function_base *const vrmlaldavhaq;
+extern const function_base *const vrmlaldavhaxq;
 extern const function_base *const vrmlaldavhq;
 extern const function_base *const vrmlaldavhxq;
+extern const function_base *const vrmlsldavhaq;
+extern const function_base *const vrmlsldavhaxq;
 extern const function_base *const vrmlsldavhq;
 extern const function_base *const vrmlsldavhxq;
 extern const function_base *const vrmulhq;
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index 77a6269f0da..8f3bae4b7da 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -411,8 +411,12 @@ public:
 
     if (m_unspec_for_sint == VADDLVQ_S
 	|| m_unspec_for_sint == VADDLVAQ_S
+	|| m_unspec_for_sint == VRMLALDAVHAQ_S
+	|| m_unspec_for_sint == VRMLALDAVHAXQ_S
 	|| m_unspec_for_sint == VRMLALDAVHQ_S
 	|| m_unspec_for_sint == VRMLALDAVHXQ_S
+	|| m_unspec_for_sint == VRMLSLDAVHAQ_S
+	|| m_unspec_for_sint == VRMLSLDAVHAXQ_S
 	|| m_unspec_for_sint == VRMLSLDAVHQ_S
 	|| m_unspec_for_sint == VRMLSLDAVHXQ_S)
       {
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index e0025f017ca..c995093e12f 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -56,15 +56,11 @@
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
-#define vrmlaldavhaq(__a, __b, __c) __arm_vrmlaldavhaq(__a, __b, __c)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
-#define vrmlaldavhaxq(__a, __b, __c) __arm_vrmlaldavhaxq(__a, __b, __c)
-#define vrmlsldavhaq(__a, __b, __c) __arm_vrmlsldavhaq(__a, __b, __c)
-#define vrmlsldavhaxq(__a, __b, __c) __arm_vrmlsldavhaxq(__a, __b, __c)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
@@ -79,10 +75,6 @@
 #define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
-#define vrmlaldavhaq_p(__a, __b, __c, __p) __arm_vrmlaldavhaq_p(__a, __b, __c, __p)
-#define vrmlaldavhaxq_p(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p(__a, __b, __c, __p)
-#define vrmlsldavhaq_p(__a, __b, __c, __p) __arm_vrmlsldavhaq_p(__a, __b, __c, __p)
-#define vrmlsldavhaxq_p(__a, __b, __c, __p) __arm_vrmlsldavhaxq_p(__a, __b, __c, __p)
 #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
 #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
 #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
@@ -378,8 +370,6 @@
 #define vcvtq_m_f16_u16(__inactive, __a, __p) __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
 #define vcvtq_m_f32_s32(__inactive, __a, __p) __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
 #define vcvtq_m_f32_u32(__inactive, __a, __p) __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
-#define vrmlaldavhaq_s32(__a, __b, __c) __arm_vrmlaldavhaq_s32(__a, __b, __c)
-#define vrmlaldavhaq_u32(__a, __b, __c) __arm_vrmlaldavhaq_u32(__a, __b, __c)
 #define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
 #define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
 #define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
@@ -412,9 +402,6 @@
 #define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
 #define vpselq_u64(__a, __b, __p) __arm_vpselq_u64(__a, __b, __p)
 #define vpselq_s64(__a, __b, __p) __arm_vpselq_s64(__a, __b, __p)
-#define vrmlaldavhaxq_s32(__a, __b, __c) __arm_vrmlaldavhaxq_s32(__a, __b, __c)
-#define vrmlsldavhaq_s32(__a, __b, __c) __arm_vrmlsldavhaq_s32(__a, __b, __c)
-#define vrmlsldavhaxq_s32(__a, __b, __c) __arm_vrmlsldavhaxq_s32(__a, __b, __c)
 #define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
@@ -528,11 +515,6 @@
 #define vmullbq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p16(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p8(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p)
-#define vrmlaldavhaq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_s32(__a, __b, __c, __p)
-#define vrmlaldavhaq_p_u32(__a, __b, __c, __p) __arm_vrmlaldavhaq_p_u32(__a, __b, __c, __p)
-#define vrmlaldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlaldavhaxq_p_s32(__a, __b, __c, __p)
-#define vrmlsldavhaq_p_s32(__a, __b, __c, __p) __arm_vrmlsldavhaq_p_s32(__a, __b, __c, __p)
-#define vrmlsldavhaxq_p_s32(__a, __b, __c, __p) __arm_vrmlsldavhaxq_p_s32(__a, __b, __c, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
 #define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
@@ -1729,20 +1711,6 @@ __arm_vbicq_m_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vrmlaldavhaq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
-{
-  return __builtin_mve_vrmlaldavhaq_uv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
@@ -1979,27 +1947,6 @@ __arm_vpselq_s64 (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv2di (__a, __b, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vrmlaldavhaxq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vrmlsldavhaq_sv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaxq_s32 (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
-  return __builtin_mve_vrmlsldavhaxq_sv4si (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_n_s16 (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -2497,41 +2444,6 @@ __arm_vmulltq_poly_m_p16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b,
   return __builtin_mve_vmulltq_poly_m_pv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhaq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaxq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlaldavhaxq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlsldavhaq_p_sv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaxq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vrmlsldavhaxq_p_sv4si (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -7857,20 +7769,6 @@ __arm_vbicq_m_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
  return __arm_vbicq_m_n_u32 (__a, __imm, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vrmlaldavhaq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq (uint64_t __a, uint32x4_t __b, uint32x4_t __c)
-{
- return __arm_vrmlaldavhaq_u32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
@@ -8095,27 +7993,6 @@ __arm_vpselq (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
  return __arm_vpselq_s64 (__a, __b, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vrmlaldavhaxq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vrmlsldavhaq_s32 (__a, __b, __c);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaxq (int64_t __a, int32x4_t __b, int32x4_t __c)
-{
- return __arm_vrmlsldavhaxq_s32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
@@ -8613,41 +8490,6 @@ __arm_vmulltq_poly_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve
  return __arm_vmulltq_poly_m_p16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhaq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlaldavhaxq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vrmlaldavhaxq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vrmlsldavhaq_p_s32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline int64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vrmlsldavhaxq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vrmlsldavhaxq_p_s32 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -14305,12 +14147,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
 
-#define __arm_vrmlaldavhaxq_p(p0,p1,p2,p3) __arm_vrmlaldavhaxq_p_s32(p0,p1,p2,p3)
-
-#define __arm_vrmlsldavhaq_p(p0,p1,p2,p3) __arm_vrmlsldavhaq_p_s32(p0,p1,p2,p3)
-
-#define __arm_vrmlsldavhaxq_p(p0,p1,p2,p3) __arm_vrmlsldavhaxq_p_s32(p0,p1,p2,p3)
-
 #define __arm_vmvnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -14544,12 +14380,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
 
-#define __arm_vrmlaldavhaxq(p0,p1,p2) __arm_vrmlaldavhaxq_s32(p0,p1,p2)
-
-#define __arm_vrmlsldavhaq(p0,p1,p2) __arm_vrmlsldavhaq_s32(p0,p1,p2)
-
-#define __arm_vrmlsldavhaxq(p0,p1,p2) __arm_vrmlsldavhaxq_s32(p0,p1,p2)
-
 #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
@@ -14579,20 +14409,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
   int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
 
-#define __arm_vrmlaldavhaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vrmlaldavhaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vrmlaldavhaq_p_s32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vrmlaldavhaq_p_u32 (__ARM_mve_coerce3(p0, int), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 06/26] arm: [MVE intrinsics] add binary_lshift_unsigned shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (3 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 05/26] arm: [MVE intrinsics] rework " Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 07/26] arm: [MVE intrinsics] factorize vqshluq Christophe Lyon
                   ` (20 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_lshift_unsigned shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc
	(binary_lshift_unsigned): New.
	* config/arm/arm-mve-builtins-shapes.h
	(binary_lshift_unsigned): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 58 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 59 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ee4bc3f8ea4..91540838e03 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -526,6 +526,64 @@ struct binary_rshift_def : public overloaded_base<0>
 SHAPE (binary_rshift)
 
 
+/* <uT0>_t vfoo[_n_t0](<T0>_t, int)
+
+   Shape for vector saturating shift left operations that take a
+   vector of signed elements as first argument and an integer, and
+   produce a vector of unsigned elements.
+
+   Check that 'imm' is in the [0..#bits-1] range.
+
+   Example: vqshluq.
+   uint16x8_t [__arm_]vqshluq[_n_s16](int16x8_t a, const int imm)
+   uint16x8_t [__arm_]vqshluq_m[_n_s16](uint16x8_t inactive, int16x8_t a, const int imm, mve_pred16_t p)  */
+struct binary_lshift_unsigned_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "vu0,vs0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i-1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (r.pred == PRED_m)
+      {
+	/* With PRED_m, check that the 'inactive' first argument has
+	   the expeected unsigned type.  */
+	type_suffix_index return_type
+	  = find_type_suffix (TYPE_unsigned, type_suffixes[type].element_bits);
+
+	if (!r.require_matching_vector_type (0, return_type))
+	  return error_mark_node;
+      }
+
+    for (; i < nargs; ++i)
+      if (!r.require_integer_immediate (i))
+	return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 0, bits - 1);
+  }
+
+};
+SHAPE (binary_lshift_unsigned)
+
 /* <uT0>_t vfoo[_t0](<uT0>_t, <T0>_t)
 
    i.e. binary operations that take a vector of unsigned elements as first argument and a
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 07b12b4af68..6ae1443f26b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -41,6 +41,7 @@ namespace arm_mve
     extern const function_shape *const binary_acc_int64;
     extern const function_shape *const binary_acca_int32;
     extern const function_shape *const binary_acca_int64;
+    extern const function_shape *const binary_lshift_unsigned;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
     extern const function_shape *const binary_maxvminv;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 07/26] arm: [MVE intrinsics] factorize vqshluq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (4 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 06/26] arm: [MVE intrinsics] add binary_lshift_unsigned shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 08/26] arm: [MVE intrinsics] rework vqshluq Christophe Lyon
                   ` (19 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vqshluq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vqshlu.
	(supf): Add VQSHLUQ_M_N_S, VQSHLUQ_N_S.
	(VQSHLUQ_M_N, VQSHLUQ_N): New.
	* config/arm/mve.md (mve_vqshluq_n_s<mode>): Change name into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vqshluq_m_n_s<mode>): Change name into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  6 ++++++
 gcc/config/arm/mve.md       | 12 ++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 116dd95fd88..d1d14488b56 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1071,6 +1071,8 @@ (define_int_attr mve_insn [
 		 (VQSHLQ_N_S "vqshl") (VQSHLQ_N_U "vqshl")
 		 (VQSHLQ_R_S "vqshl") (VQSHLQ_R_U "vqshl")
 		 (VQSHLQ_S "vqshl") (VQSHLQ_U "vqshl")
+		 (VQSHLUQ_M_N_S "vqshlu")
+		 (VQSHLUQ_N_S "vqshlu")
 		 (VQSHRNBQ_M_N_S "vqshrnb") (VQSHRNBQ_M_N_U "vqshrnb")
 		 (VQSHRNBQ_N_S "vqshrnb") (VQSHRNBQ_N_U "vqshrnb")
 		 (VQSHRNTQ_M_N_S "vqshrnt") (VQSHRNTQ_M_N_U "vqshrnt")
@@ -2490,6 +2492,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VRMLSLDAVHAXQ_P_S "s")
 		       (VRMLSLDAVHAXQ_S "s")
 		       (VRMLALDAVHAQ_P_S "s") (VRMLALDAVHAQ_P_U "u")
+		       (VQSHLUQ_M_N_S "s")
+		       (VQSHLUQ_N_S "s")
 		       ])
 
 ;; Both kinds of return insn.
@@ -2793,6 +2797,8 @@ (define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
 (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
 (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
 (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
+(define_int_iterator VQSHLUQ_M_N [VQSHLUQ_M_N_S])
+(define_int_iterator VQSHLUQ_N [VQSHLUQ_N_S])
 
 ;; Define iterators for VCMLA operations
 (define_int_iterator VCMLA_OP [UNSPEC_VCMLA
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b4faf7a4b18..7898361b859 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1150,15 +1150,15 @@ (define_insn "@mve_<mve_insn>q_r_<supf><mode>"
 ;;
 ;; [vqshluq_n_s])
 ;;
-(define_insn "mve_vqshluq_n_s<mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
 		       (match_operand:SI 2 "<MVE_pred>" "<MVE_constraint>")]
-	 VQSHLUQ_N_S))
+	 VQSHLUQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vqshlu.s%#<V_sz_elem>\t%q0, %q1, %2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2653,17 +2653,17 @@ (define_insn "@mve_<mve_insn>q_p_<supf><mode>"
 ;;
 ;; [vqshluq_m_n_s])
 ;;
-(define_insn "mve_vqshluq_m_n_s<mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		       (match_operand:MVE_2 2 "s_register_operand" "w")
 		       (match_operand:SI 3 "<MVE_pred>" "<MVE_constraint>")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VQSHLUQ_M_N_S))
+	 VQSHLUQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\n\tvqshlut.s%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\n\t<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length" "8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 08/26] arm: [MVE intrinsics] rework vqshluq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (5 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 07/26] arm: [MVE intrinsics] factorize vqshluq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 09/26] arm: [MVE intrinsics] add binary_imm32 shape Christophe Lyon
                   ` (18 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vqshluq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vqshluq): New.
	* config/arm/arm-mve-builtins-base.def (vqshluq): New.
	* config/arm/arm-mve-builtins-base.h (vqshluq): New.
	* config/arm/arm_mve.h (vqshluq): Remove.
	(vqshluq_m): Remove.
	(vqshluq_n_s8): Remove.
	(vqshluq_n_s16): Remove.
	(vqshluq_n_s32): Remove.
	(vqshluq_m_n_s8): Remove.
	(vqshluq_m_n_s16): Remove.
	(vqshluq_m_n_s32): Remove.
	(__arm_vqshluq_n_s8): Remove.
	(__arm_vqshluq_n_s16): Remove.
	(__arm_vqshluq_n_s32): Remove.
	(__arm_vqshluq_m_n_s8): Remove.
	(__arm_vqshluq_m_n_s16): Remove.
	(__arm_vqshluq_m_n_s32): Remove.
	(__arm_vqshluq): Remove.
	(__arm_vqshluq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 111 -----------------------
 4 files changed, 3 insertions(+), 111 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index a2b227bb2aa..739ab604843 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -332,6 +332,7 @@ FUNCTION_WITHOUT_N_NO_U_F (vqnegq, VQNEGQ)
 FUNCTION_WITH_M_N_NO_F (vqrshlq, VQRSHLQ)
 FUNCTION_WITH_M_N_NO_U_F (vqrdmulhq, VQRDMULHQ)
 FUNCTION_WITH_M_N_R (vqshlq, VQSHLQ)
+FUNCTION_ONLY_N_NO_U_F (vqshluq, VQSHLUQ)
 FUNCTION_ONLY_N_NO_F (vqrshrnbq, VQRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vqrshrntq, VQRSHRNTQ)
 FUNCTION_ONLY_N_NO_U_F (vqrshrunbq, VQRSHRUNBQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index c4ef74169dd..3f7bb414e40 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -106,6 +106,7 @@ DEF_MVE_FUNCTION (vqrshrunbq, binary_rshift_narrow_unsigned, signed_16_32, m_or_
 DEF_MVE_FUNCTION (vqrshruntq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqshlq, binary_lshift_r, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vqshluq, binary_lshift_unsigned, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vqshrunbq, binary_rshift_narrow_unsigned, signed_16_32, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 41b2e19c2d7..797f8ba2f5e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -118,6 +118,7 @@ extern const function_base *const vqrshrntq;
 extern const function_base *const vqrshrunbq;
 extern const function_base *const vqrshruntq;
 extern const function_base *const vqshlq;
+extern const function_base *const vqshluq;
 extern const function_base *const vqshrnbq;
 extern const function_base *const vqshrntq;
 extern const function_base *const vqshrunbq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index c995093e12f..673a3df1bfd 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -50,7 +50,6 @@
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
-#define vqshluq(__a, __imm) __arm_vqshluq(__a, __imm)
 #define vhcaddq_rot90(__a, __b) __arm_vhcaddq_rot90(__a, __b)
 #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
@@ -62,7 +61,6 @@
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
-#define vqshluq_m(__inactive, __a, __imm, __p) __arm_vqshluq_m(__inactive, __a, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -284,7 +282,6 @@
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
 #define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
-#define vqshluq_n_s8(__a,  __imm) __arm_vqshluq_n_s8(__a,  __imm)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
@@ -301,7 +298,6 @@
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
 #define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
-#define vqshluq_n_s16(__a,  __imm) __arm_vqshluq_n_s16(__a,  __imm)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
@@ -318,7 +314,6 @@
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
-#define vqshluq_n_s32(__a,  __imm) __arm_vqshluq_n_s32(__a,  __imm)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
@@ -446,16 +441,13 @@
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
 #define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
-#define vqshluq_m_n_s8(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s8(__inactive, __a,  __imm, __p)
 #define vsriq_m_n_u8(__a, __b,  __imm, __p) __arm_vsriq_m_n_u8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s16(__a, __b,  __imm, __p) __arm_vsriq_m_n_s16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
-#define vqshluq_m_n_s16(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s16(__inactive, __a,  __imm, __p)
 #define vsriq_m_n_u16(__a, __b,  __imm, __p) __arm_vsriq_m_n_u16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
 #define vsriq_m_n_s32(__a, __b,  __imm, __p) __arm_vsriq_m_n_s32(__a, __b,  __imm, __p)
-#define vqshluq_m_n_s32(__inactive, __a,  __imm, __p) __arm_vqshluq_m_n_s32(__inactive, __a,  __imm, __p)
 #define vsriq_m_n_u32(__a, __b,  __imm, __p) __arm_vsriq_m_n_u32(__a, __b,  __imm, __p)
 #define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
 #define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
@@ -1287,13 +1279,6 @@ __arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_n_s8 (int8x16_t __a, const int __imm)
-{
-  return __builtin_mve_vqshluq_n_sv16qi (__a, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
@@ -1408,13 +1393,6 @@ __arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vqshluq_n_sv8hi (__a, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
@@ -1529,13 +1507,6 @@ __arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
   return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vqshluq_n_sv4si (__a, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
@@ -1982,13 +1953,6 @@ __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t
   return __builtin_mve_vsriq_m_n_sv16qi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m_n_s8 (uint8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshluq_m_n_sv16qi (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -2003,13 +1967,6 @@ __arm_vsriq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t
   return __builtin_mve_vsriq_m_n_sv8hi (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m_n_s16 (uint16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshluq_m_n_sv8hi (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -2024,13 +1981,6 @@ __arm_vsriq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t
   return __builtin_mve_vsriq_m_n_sv4si (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m_n_s32 (uint32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vqshluq_m_n_sv4si (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -7377,13 +7327,6 @@ __arm_vbrsrq (uint8x16_t __a, int32_t __b)
  return __arm_vbrsrq_n_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq (int8x16_t __a, const int __imm)
-{
- return __arm_vqshluq_n_s8 (__a, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int8x16_t __a, int8x16_t __b)
@@ -7496,13 +7439,6 @@ __arm_vbrsrq (uint16x8_t __a, int32_t __b)
  return __arm_vbrsrq_n_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq (int16x8_t __a, const int __imm)
-{
- return __arm_vqshluq_n_s16 (__a, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int16x8_t __a, int16x8_t __b)
@@ -7615,13 +7551,6 @@ __arm_vbrsrq (uint32x4_t __a, int32_t __b)
  return __arm_vbrsrq_n_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq (int32x4_t __a, const int __imm)
-{
- return __arm_vqshluq_n_s32 (__a, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int32x4_t __a, int32x4_t __b)
@@ -8028,13 +7957,6 @@ __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
  return __arm_vsriq_m_n_s8 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m (uint8x16_t __inactive, int8x16_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshluq_m_n_s8 (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -8049,13 +7971,6 @@ __arm_vsriq_m (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
  return __arm_vsriq_m_n_s16 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m (uint16x8_t __inactive, int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshluq_m_n_s16 (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
@@ -8070,13 +7985,6 @@ __arm_vsriq_m (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
  return __arm_vsriq_m_n_s32 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqshluq_m (uint32x4_t __inactive, int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vqshluq_m_n_s32 (__inactive, __a, __imm, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
@@ -12553,12 +12461,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \
   int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));})
 
-#define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
-
 #define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -13383,12 +13285,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
 
-#define __arm_vqshluq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vqshluq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vqshluq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vqshluq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1));})
-
 #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -14161,13 +14057,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce1(__p1, int) , p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce1(__p1, int) , p2));})
 
-#define __arm_vqshluq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vqshluq_m_n_s8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vqshluq_m_n_s16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vqshluq_m_n_s32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3));})
-
 #define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 09/26] arm: [MVE intrinsics] add binary_imm32 shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (6 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 08/26] arm: [MVE intrinsics] rework vqshluq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 10/26] arm: [MVE intrinsics] factorize vrbsrq Christophe Lyon
                   ` (17 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_imm32 shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_imm32): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_imm32): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 27 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 28 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 91540838e03..c2e138c12e1 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -489,6 +489,33 @@ struct binary_acca_int64_def : public overloaded_base<0>
 };
 SHAPE (binary_acca_int64)
 
+/* <T0>_t vfoo[_n_t0](<T0>_t, int32_t)
+
+   i.e. the shape for binary operations that operate on
+   a vector and an int32_t.
+
+   Example: vbrsrq.
+   int16x8_t [__arm_]vbrsrq[_n_s16](int16x8_t a, int32_t b)
+   int16x8_t [__arm_]vbrsrq_m[_n_s16](int16x8_t inactive, int16x8_t a, int32_t b, mve_pred16_t p)
+   int16x8_t [__arm_]vbrsrq_x[_n_s16](int16x8_t a, int32_t b, mve_pred16_t p)  */
+struct binary_imm32_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1, 1);
+  }
+};
+SHAPE (binary_imm32)
+
 /* <T0>_t vfoo[_n_t0](<T0>_t, const int)
 
    Shape for vector shift right operations that take a vector first
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 6ae1443f26b..bba38194ce2 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -41,6 +41,7 @@ namespace arm_mve
     extern const function_shape *const binary_acc_int64;
     extern const function_shape *const binary_acca_int32;
     extern const function_shape *const binary_acca_int64;
+    extern const function_shape *const binary_imm32;
     extern const function_shape *const binary_lshift_unsigned;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 10/26] arm: [MVE intrinsics] factorize vrbsrq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (7 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 09/26] arm: [MVE intrinsics] add binary_imm32 shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 11/26] arm: [MVE intrinsics] rework vbrsrq Christophe Lyon
                   ` (16 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vrbsrq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_VBRSR_M_N_FP, MVE_VBRSR_N_FP): New.
	(mve_insn): Add vbrsr.
	* config/arm/mve.md (mve_vbrsrq_n_f<mode>): Rename into ...
	(@mve_<mve_insn>q_n_f<mode>): ... this.
	(mve_vbrsrq_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vbrsrq_m_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
	(mve_vbrsrq_m_n_f<mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_f<mode>): ... this.
---
 gcc/config/arm/iterators.md | 10 ++++++++++
 gcc/config/arm/mve.md       | 20 ++++++++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index d1d14488b56..dfc8d9cae72 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -610,6 +610,14 @@ (define_int_iterator MVE_FP_CREATE_ONLY [
 		     VCREATEQ_F
 		     ])
 
+(define_int_iterator MVE_VBRSR_M_N_FP [
+		     VBRSRQ_M_N_F
+		     ])
+
+(define_int_iterator MVE_VBRSR_N_FP [
+		     VBRSRQ_N_F
+		     ])
+
 ;; MVE comparison iterators
 (define_int_iterator MVE_CMP_M [
 		     VCMPCSQ_M_U
@@ -900,6 +908,8 @@ (define_int_attr mve_insn [
 		 (VBICQ_M_N_S "vbic") (VBICQ_M_N_U "vbic")
 		 (VBICQ_M_S "vbic") (VBICQ_M_U "vbic") (VBICQ_M_F "vbic")
 		 (VBICQ_N_S "vbic") (VBICQ_N_U "vbic")
+		 (VBRSRQ_M_N_S "vbrsr") (VBRSRQ_M_N_U "vbrsr") (VBRSRQ_M_N_F "vbrsr")
+		 (VBRSRQ_N_S "vbrsr") (VBRSRQ_N_U "vbrsr") (VBRSRQ_N_F "vbrsr")
 		 (VCLSQ_M_S "vcls")
 		 (VCLSQ_S "vcls")
 		 (VCLZQ_M_S "vclz") (VCLZQ_M_U "vclz")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 7898361b859..beca74d4964 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -529,15 +529,15 @@ (define_insn "mve_vpnotv16bi"
 ;;
 ;; [vbrsrq_n_f])
 ;;
-(define_insn "mve_vbrsrq_n_f<mode>"
+(define_insn "@mve_<mve_insn>q_n_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
 		       (match_operand:SI 2 "s_register_operand" "r")]
-	 VBRSRQ_N_F))
+	 MVE_VBRSR_N_FP))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vbrsr.<V_sz_elem>  %q0, %q1, %2"
+  "<mve_insn>.<V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -826,7 +826,7 @@ (define_expand "mve_vbicq_s<mode>"
 ;;
 ;; [vbrsrq_n_u, vbrsrq_n_s])
 ;;
-(define_insn "mve_vbrsrq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
@@ -834,7 +834,7 @@ (define_insn "mve_vbrsrq_n_<supf><mode>"
 	 VBRSRQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vbrsr.%#<V_sz_elem>	%q0, %q1, %2"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q1, %2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2802,7 +2802,7 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vbrsrq_m_n_u, vbrsrq_m_n_s])
 ;;
-(define_insn "mve_vbrsrq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2812,7 +2812,7 @@ (define_insn "mve_vbrsrq_m_n_<supf><mode>"
 	 VBRSRQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vbrsrt.%#<V_sz_elem>	%q0, %q2, %3"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3257,17 +3257,17 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
 ;;
 ;; [vbrsrq_m_n_f])
 ;;
-(define_insn "mve_vbrsrq_m_n_f<mode>"
+(define_insn "@mve_<mve_insn>q_m_n_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")
 		       (match_operand:SI 3 "s_register_operand" "r")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VBRSRQ_M_N_F))
+	 MVE_VBRSR_M_N_FP))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vbrsrt.%#<V_sz_elem>	%q0, %q2, %3"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 11/26] arm: [MVE intrinsics] rework vbrsrq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (8 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 10/26] arm: [MVE intrinsics] factorize vrbsrq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 12/26] arm: [MVE intrinsics] add mvn shape Christophe Lyon
                   ` (15 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vbrsrq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vbrsrq): New.
	* config/arm/arm-mve-builtins-base.def (vbrsrq): New.
	* config/arm/arm-mve-builtins-base.h (vbrsrq): New.
	* config/arm/arm_mve.h (vbrsrq): Remove.
	(vbrsrq_m): Remove.
	(vbrsrq_x): Remove.
	(vbrsrq_n_f16): Remove.
	(vbrsrq_n_f32): Remove.
	(vbrsrq_n_u8): Remove.
	(vbrsrq_n_s8): Remove.
	(vbrsrq_n_u16): Remove.
	(vbrsrq_n_s16): Remove.
	(vbrsrq_n_u32): Remove.
	(vbrsrq_n_s32): Remove.
	(vbrsrq_m_n_s8): Remove.
	(vbrsrq_m_n_s32): Remove.
	(vbrsrq_m_n_s16): Remove.
	(vbrsrq_m_n_u8): Remove.
	(vbrsrq_m_n_u32): Remove.
	(vbrsrq_m_n_u16): Remove.
	(vbrsrq_m_n_f32): Remove.
	(vbrsrq_m_n_f16): Remove.
	(vbrsrq_x_n_s8): Remove.
	(vbrsrq_x_n_s16): Remove.
	(vbrsrq_x_n_s32): Remove.
	(vbrsrq_x_n_u8): Remove.
	(vbrsrq_x_n_u16): Remove.
	(vbrsrq_x_n_u32): Remove.
	(vbrsrq_x_n_f16): Remove.
	(vbrsrq_x_n_f32): Remove.
	(__arm_vbrsrq_n_u8): Remove.
	(__arm_vbrsrq_n_s8): Remove.
	(__arm_vbrsrq_n_u16): Remove.
	(__arm_vbrsrq_n_s16): Remove.
	(__arm_vbrsrq_n_u32): Remove.
	(__arm_vbrsrq_n_s32): Remove.
	(__arm_vbrsrq_m_n_s8): Remove.
	(__arm_vbrsrq_m_n_s32): Remove.
	(__arm_vbrsrq_m_n_s16): Remove.
	(__arm_vbrsrq_m_n_u8): Remove.
	(__arm_vbrsrq_m_n_u32): Remove.
	(__arm_vbrsrq_m_n_u16): Remove.
	(__arm_vbrsrq_x_n_s8): Remove.
	(__arm_vbrsrq_x_n_s16): Remove.
	(__arm_vbrsrq_x_n_s32): Remove.
	(__arm_vbrsrq_x_n_u8): Remove.
	(__arm_vbrsrq_x_n_u16): Remove.
	(__arm_vbrsrq_x_n_u32): Remove.
	(__arm_vbrsrq_n_f16): Remove.
	(__arm_vbrsrq_n_f32): Remove.
	(__arm_vbrsrq_m_n_f32): Remove.
	(__arm_vbrsrq_m_n_f16): Remove.
	(__arm_vbrsrq_x_n_f16): Remove.
	(__arm_vbrsrq_x_n_f32): Remove.
	(__arm_vbrsrq): Remove.
	(__arm_vbrsrq_m): Remove.
	(__arm_vbrsrq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 426 -----------------------
 4 files changed, 4 insertions(+), 426 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 739ab604843..2fb81c197da 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -250,6 +250,7 @@ FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
+FUNCTION_ONLY_N (vbrsrq, VBRSRQ)
 FUNCTION_WITHOUT_N_NO_U_F (vclsq, VCLSQ)
 FUNCTION (vclzq, unspec_based_mve_function_exact_insn, (CLZ, CLZ, CLZ, -1, -1, -1, VCLZQ_M_S, VCLZQ_M_U, -1, -1, -1 ,-1))
 FUNCTION (vcmpeqq, unspec_based_mve_function_exact_insn_vcmp, (EQ, EQ, EQ, VCMPEQQ_M_S, VCMPEQQ_M_U, VCMPEQQ_M_F, VCMPEQQ_M_N_S, VCMPEQQ_M_N_U, VCMPEQQ_M_N_F))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 3f7bb414e40..e53cb2c1992 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -27,6 +27,7 @@ DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vaddvaq, unary_int32_acc, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vaddvq, unary_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vclsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vclzq, unary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vcmpcsq, cmp, all_unsigned, m_or_none)
@@ -146,6 +147,7 @@ DEF_MVE_FUNCTION (vabdq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmpeqq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpgeq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpgtq, cmp, all_float, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 797f8ba2f5e..49c60536961 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -32,6 +32,7 @@ extern const function_base *const vaddq;
 extern const function_base *const vaddvaq;
 extern const function_base *const vaddvq;
 extern const function_base *const vandq;
+extern const function_base *const vbrsrq;
 extern const function_base *const vclsq;
 extern const function_base *const vclzq;
 extern const function_base *const vcmpcsq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 673a3df1bfd..76c45a28eb3 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -49,7 +49,6 @@
 #define vcaddq_rot90(__a, __b) __arm_vcaddq_rot90(__a, __b)
 #define vcaddq_rot270(__a, __b) __arm_vcaddq_rot270(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vbrsrq(__a, __b) __arm_vbrsrq(__a, __b)
 #define vhcaddq_rot90(__a, __b) __arm_vhcaddq_rot90(__a, __b)
 #define vhcaddq_rot270(__a, __b) __arm_vhcaddq_rot270(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
@@ -62,7 +61,6 @@
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
-#define vbrsrq_m(__inactive, __a, __b, __p) __arm_vbrsrq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
 #define vcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
 #define vhcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vhcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -155,7 +153,6 @@
 #define vhcaddq_rot90_x(__a, __b, __p) __arm_vhcaddq_rot90_x(__a, __b, __p)
 #define vhcaddq_rot270_x(__a, __b, __p) __arm_vhcaddq_rot270_x(__a, __b, __p)
 #define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
-#define vbrsrq_x(__a, __b, __p) __arm_vbrsrq_x(__a, __b, __p)
 #define vmvnq_x(__a, __p) __arm_vmvnq_x(__a, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
@@ -265,8 +262,6 @@
 #define vctp64q(__a) __arm_vctp64q(__a)
 #define vctp8q(__a) __arm_vctp8q(__a)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vbrsrq_n_f16(__a, __b) __arm_vbrsrq_n_f16(__a, __b)
-#define vbrsrq_n_f32(__a, __b) __arm_vbrsrq_n_f32(__a, __b)
 #define vcvtq_n_f16_s16(__a,  __imm6) __arm_vcvtq_n_f16_s16(__a,  __imm6)
 #define vcvtq_n_f32_s32(__a,  __imm6) __arm_vcvtq_n_f32_s32(__a,  __imm6)
 #define vcvtq_n_f16_u16(__a,  __imm6) __arm_vcvtq_n_f16_u16(__a,  __imm6)
@@ -281,7 +276,6 @@
 #define vcaddq_rot90_u8(__a, __b) __arm_vcaddq_rot90_u8(__a, __b)
 #define vcaddq_rot270_u8(__a, __b) __arm_vcaddq_rot270_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
-#define vbrsrq_n_u8(__a, __b) __arm_vbrsrq_n_u8(__a, __b)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
 #define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
 #define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
@@ -289,7 +283,6 @@
 #define vhcaddq_rot270_s8(__a, __b) __arm_vhcaddq_rot270_s8(__a, __b)
 #define vcaddq_rot90_s8(__a, __b) __arm_vcaddq_rot90_s8(__a, __b)
 #define vcaddq_rot270_s8(__a, __b) __arm_vcaddq_rot270_s8(__a, __b)
-#define vbrsrq_n_s8(__a, __b) __arm_vbrsrq_n_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
 #define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
@@ -297,7 +290,6 @@
 #define vcaddq_rot90_u16(__a, __b) __arm_vcaddq_rot90_u16(__a, __b)
 #define vcaddq_rot270_u16(__a, __b) __arm_vcaddq_rot270_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
-#define vbrsrq_n_u16(__a, __b) __arm_vbrsrq_n_u16(__a, __b)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
 #define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
 #define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
@@ -305,7 +297,6 @@
 #define vhcaddq_rot270_s16(__a, __b) __arm_vhcaddq_rot270_s16(__a, __b)
 #define vcaddq_rot90_s16(__a, __b) __arm_vcaddq_rot90_s16(__a, __b)
 #define vcaddq_rot270_s16(__a, __b) __arm_vcaddq_rot270_s16(__a, __b)
-#define vbrsrq_n_s16(__a, __b) __arm_vbrsrq_n_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
 #define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
@@ -313,7 +304,6 @@
 #define vcaddq_rot90_u32(__a, __b) __arm_vcaddq_rot90_u32(__a, __b)
 #define vcaddq_rot270_u32(__a, __b) __arm_vcaddq_rot270_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
-#define vbrsrq_n_u32(__a, __b) __arm_vbrsrq_n_u32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
 #define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
@@ -321,7 +311,6 @@
 #define vhcaddq_rot270_s32(__a, __b) __arm_vhcaddq_rot270_s32(__a, __b)
 #define vcaddq_rot90_s32(__a, __b) __arm_vcaddq_rot90_s32(__a, __b)
 #define vcaddq_rot270_s32(__a, __b) __arm_vcaddq_rot270_s32(__a, __b)
-#define vbrsrq_n_s32(__a, __b) __arm_vbrsrq_n_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
@@ -455,12 +444,6 @@
 #define vbicq_m_u8(__inactive, __a, __b, __p) __arm_vbicq_m_u8(__inactive, __a, __b, __p)
 #define vbicq_m_u32(__inactive, __a, __b, __p) __arm_vbicq_m_u32(__inactive, __a, __b, __p)
 #define vbicq_m_u16(__inactive, __a, __b, __p) __arm_vbicq_m_u16(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_s8(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_s8(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_s32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_s32(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_s16(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_s16(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_u8(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_u8(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_u32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_u32(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_u16(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_u16(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m_s8(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_s8(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m_s32(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_s32(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m_s16(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_s16(__inactive, __a, __b, __p)
@@ -509,8 +492,6 @@
 #define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_f32(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f32(__inactive, __a, __b, __p)
-#define vbrsrq_m_n_f16(__inactive, __a, __b, __p) __arm_vbrsrq_m_n_f16(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f32(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m_f16(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m_f16(__inactive, __a, __b, __p)
 #define vcaddq_rot90_m_f32(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m_f32(__inactive, __a, __b, __p)
@@ -883,12 +864,6 @@
 #define vbicq_x_u8(__a, __b, __p) __arm_vbicq_x_u8(__a, __b, __p)
 #define vbicq_x_u16(__a, __b, __p) __arm_vbicq_x_u16(__a, __b, __p)
 #define vbicq_x_u32(__a, __b, __p) __arm_vbicq_x_u32(__a, __b, __p)
-#define vbrsrq_x_n_s8(__a, __b, __p) __arm_vbrsrq_x_n_s8(__a, __b, __p)
-#define vbrsrq_x_n_s16(__a, __b, __p) __arm_vbrsrq_x_n_s16(__a, __b, __p)
-#define vbrsrq_x_n_s32(__a, __b, __p) __arm_vbrsrq_x_n_s32(__a, __b, __p)
-#define vbrsrq_x_n_u8(__a, __b, __p) __arm_vbrsrq_x_n_u8(__a, __b, __p)
-#define vbrsrq_x_n_u16(__a, __b, __p) __arm_vbrsrq_x_n_u16(__a, __b, __p)
-#define vbrsrq_x_n_u32(__a, __b, __p) __arm_vbrsrq_x_n_u32(__a, __b, __p)
 #define vmvnq_x_s8(__a, __p) __arm_vmvnq_x_s8(__a, __p)
 #define vmvnq_x_s16(__a, __p) __arm_vmvnq_x_s16(__a, __p)
 #define vmvnq_x_s32(__a, __p) __arm_vmvnq_x_s32(__a, __p)
@@ -953,8 +928,6 @@
 #define vcvtq_x_n_u32_f32(__a,  __imm6, __p) __arm_vcvtq_x_n_u32_f32(__a,  __imm6, __p)
 #define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p)
 #define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p)
-#define vbrsrq_x_n_f16(__a, __b, __p) __arm_vbrsrq_x_n_f16(__a, __b, __p)
-#define vbrsrq_x_n_f32(__a, __b, __p) __arm_vbrsrq_x_n_f32(__a, __b, __p)
 #define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p)
 #define vornq_x_f32(__a, __b, __p) __arm_vornq_x_f32(__a, __b, __p)
 #define vadciq_s32(__a, __b,  __carry_out) __arm_vadciq_s32(__a, __b,  __carry_out)
@@ -1272,13 +1245,6 @@ __arm_vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vbicq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_u8 (uint8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
@@ -1328,13 +1294,6 @@ __arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b)
   return __builtin_mve_vcaddq_rot270v16qi (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_s8 (int8x16_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s8 (int8x16_t __a, int8x16_t __b)
@@ -1386,13 +1345,6 @@ __arm_vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vbicq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_u16 (uint16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
@@ -1442,13 +1394,6 @@ __arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b)
   return __builtin_mve_vcaddq_rot270v8hi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_s16 (int16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s16 (int16x8_t __a, int16x8_t __b)
@@ -1500,13 +1445,6 @@ __arm_vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vbicq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_u32 (uint32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_uv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
@@ -1556,13 +1494,6 @@ __arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vcaddq_rot270v4si (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_s32 (int32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s32 (int32x4_t __a, int32x4_t __b)
@@ -2030,48 +1961,6 @@ __arm_vbicq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vbicq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_s8 (int8x16_t __inactive, int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_s32 (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_s16 (int16x8_t __inactive, int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_u8 (uint8x16_t __inactive, uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_u32 (uint32x4_t __inactive, uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_u16 (uint16x8_t __inactive, uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -4585,48 +4474,6 @@ __arm_vbicq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_s8 (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_s16 (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_s32 (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_u8 (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_u16 (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_u32 (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_x_s8 (int8x16_t __a, mve_pred16_t __p)
@@ -5641,20 +5488,6 @@ __arm_vcvtmq_s32_f32 (float32x4_t __a)
   return __builtin_mve_vcvtmq_sv4si (__a);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_f16 (float16x8_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_n_f32 (float32x4_t __a, int32_t __b)
-{
-  return __builtin_mve_vbrsrq_n_fv4sf (__a, __b);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtq_n_f16_s16 (int16x8_t __a, const int __imm6)
@@ -6202,20 +6035,6 @@ __arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve
   return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_f32 (float32x4_t __inactive, float32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_fv4sf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m_n_f16 (float16x8_t __inactive, float16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_fv8hf (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -7048,20 +6867,6 @@ __arm_vbicq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_f16 (float16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_fv8hf (__arm_vuninitializedq_f16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x_n_f32 (float32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbrsrq_m_n_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -7320,13 +7125,6 @@ __arm_vbicq (uint8x16_t __a, uint8x16_t __b)
  return __arm_vbicq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (uint8x16_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_u8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int8x16_t __a, int8x16_t __b)
@@ -7376,13 +7174,6 @@ __arm_vcaddq_rot270 (int8x16_t __a, int8x16_t __b)
  return __arm_vcaddq_rot270_s8 (__a, __b);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (int8x16_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int8x16_t __a, int8x16_t __b)
@@ -7432,13 +7223,6 @@ __arm_vbicq (uint16x8_t __a, uint16x8_t __b)
  return __arm_vbicq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (uint16x8_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_u16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int16x8_t __a, int16x8_t __b)
@@ -7488,13 +7272,6 @@ __arm_vcaddq_rot270 (int16x8_t __a, int16x8_t __b)
  return __arm_vcaddq_rot270_s16 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (int16x8_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int16x8_t __a, int16x8_t __b)
@@ -7544,13 +7321,6 @@ __arm_vbicq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vbicq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (uint32x4_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_u32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (int32x4_t __a, int32x4_t __b)
@@ -7600,13 +7370,6 @@ __arm_vcaddq_rot270 (int32x4_t __a, int32x4_t __b)
  return __arm_vcaddq_rot270_s32 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (int32x4_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int32x4_t __a, int32x4_t __b)
@@ -8034,48 +7797,6 @@ __arm_vbicq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vbicq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (int8x16_t __inactive, int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (int32x4_t __inactive, int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (int16x8_t __inactive, int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (uint8x16_t __inactive, uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (uint32x4_t __inactive, uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (uint16x8_t __inactive, uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -10162,48 +9883,6 @@ __arm_vbicq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vbicq_x_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (int8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (int16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (int32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (uint8x16_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (uint16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (uint32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_x (int8x16_t __a, mve_pred16_t __p)
@@ -10822,20 +10501,6 @@ __arm_vcvtq (uint32x4_t __a)
  return __arm_vcvtq_f32_u32 (__a);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (float16x8_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_f16 (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq (float32x4_t __a, int32_t __b)
-{
- return __arm_vbrsrq_n_f32 (__a, __b);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtq_n (int16x8_t __a, const int __imm6)
@@ -11340,20 +11005,6 @@ __arm_vbicq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pre
  return __arm_vbicq_m_f16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (float32x4_t __inactive, float32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_f32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_m (float16x8_t __inactive, float16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_m_n_f16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -11942,20 +11593,6 @@ __arm_vbicq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
  return __arm_vbicq_x_f32 (__a, __b, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (float16x8_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbrsrq_x (float32x4_t __a, int32_t __b, mve_pred16_t __p)
-{
- return __arm_vbrsrq_x_n_f32 (__a, __b, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_x (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
@@ -12450,17 +12087,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
 
-#define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbrsrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbrsrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));})
-
 #define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -12689,18 +12315,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vbrsrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbrsrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbrsrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbrsrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbrsrq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbrsrq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3));})
-
 #define __arm_vcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -13143,17 +12757,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vbrsrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbrsrq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbrsrq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vbrsrq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vbrsrq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), p2, p3));})
-
 #define __arm_vcaddq_rot270_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -13349,15 +12952,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcaddq_rot270_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcaddq_rot270_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbrsrq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbrsrq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
 #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -13443,17 +13037,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vbrsrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbrsrq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbrsrq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbrsrq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __p2, p3));})
-
 #define __arm_vcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -13796,15 +13379,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
 
-#define __arm_vbrsrq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vbrsrq_x_n_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbrsrq_x_n_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbrsrq_x_n_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vbrsrq_x_n_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbrsrq_x_n_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbrsrq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce1(p0, int8_t *), p1), \
   int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce1(p0, int16_t *), p1), \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 12/26] arm: [MVE intrinsics] add mvn shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (9 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 11/26] arm: [MVE intrinsics] rework vbrsrq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 13/26] arm: [MVE intrinsics] factorize vmvnq Christophe Lyon
                   ` (14 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the mvn shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (mvn): New.
	* config/arm/arm-mve-builtins-shapes.h (mvn): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 50 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index c2e138c12e1..6401a79c570 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1297,6 +1297,55 @@ struct inherent_def : public nonoverloaded_base
 };
 SHAPE (inherent)
 
+/* <T0>_t vfoo[_t0](<T0>_t)
+   <T0>_t vfoo_n_t0(<sT0>_t)
+
+   For MODE_n, define only the 16 and 32 bits versions.
+
+   Example: vmvnq.
+   int16x8_t [__arm_]vmvnq[_s16](int16x8_t a)
+   int16x8_t [__arm_]vmvnq_m[_s16](int16x8_t inactive, int16x8_t a, mve_pred16_t p)
+   int16x8_t [__arm_]vmvnq_x[_s16](int16x8_t a, mve_pred16_t p)
+   int16x8_t [__arm_]vmvnq_n_s16(const int16_t imm)
+   int16x8_t [__arm_]vmvnq_m[_n_s16](int16x8_t inactive, const int16_t imm, mve_pred16_t p)
+   int16x8_t [__arm_]vmvnq_x_n_s16(const int16_t imm, mve_pred16_t p)  */
+struct mvn_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    /* Do not build a separate instance for MODE_n, since we want to
+       share vmvnq_m[_n_s16] with vmvnq_m[_s16].  */
+    build_all (b, "v0,v0", group, MODE_none, preserve_user_namespace);
+    build_16_32 (b, "v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (1, i, nargs)
+	/* Same type for arg 0 and 1 if _m, so using 0 is OK */
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+  /* Skip last argument, may be scalar.  */
+    unsigned int last_arg = i;
+    for (i = 0; i < last_arg; i++)
+      if (!r.require_matching_vector_type (i, type))
+	return error_mark_node;
+
+    if (last_arg == 0)
+      return r.resolve_to (r.mode_suffix_id, type);
+
+    return r.finish_opt_n_resolution (last_arg, 0, type);
+  }
+};
+SHAPE (mvn)
+
 /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, <T0>_t)
 
    i.e. the standard shape for ternary operations that operate on
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index bba38194ce2..ba53e8cc52e 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -59,6 +59,7 @@ namespace arm_mve
     extern const function_shape *const cmp;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
+    extern const function_shape *const mvn;
     extern const function_shape *const ternary;
     extern const function_shape *const ternary_n;
     extern const function_shape *const unary;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 13/26] arm: [MVE intrinsics] factorize vmvnq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (10 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 12/26] arm: [MVE intrinsics] add mvn shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 14/26] arm: [MVE intrinsics] rework vmvnq Christophe Lyon
                   ` (13 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmvnq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vmvn.
	* config/arm/mve.md (mve_vmvnq_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vmvnq_m_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_<supf><mode>): ... this.
	(mve_vmvnq_m_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  3 +++
 gcc/config/arm/mve.md       | 12 ++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index dfc8d9cae72..7fbfea49ff3 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1002,6 +1002,9 @@ (define_int_attr mve_insn [
 		 (VMULQ_M_N_S "vmul") (VMULQ_M_N_U "vmul") (VMULQ_M_N_F "vmul")
 		 (VMULQ_M_S "vmul") (VMULQ_M_U "vmul") (VMULQ_M_F "vmul")
 		 (VMULQ_N_S "vmul") (VMULQ_N_U "vmul") (VMULQ_N_F "vmul")
+		 (VMVNQ_M_N_S "vmvn") (VMVNQ_M_N_U "vmvn")
+		 (VMVNQ_M_S "vmvn") (VMVNQ_M_U "vmvn")
+		 (VMVNQ_N_S "vmvn") (VMVNQ_N_U "vmvn")
 		 (VNEGQ_M_F "vneg")
 		 (VNEGQ_M_S "vneg")
 		 (VORRQ_M_N_S "vorr") (VORRQ_M_N_U "vorr")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index beca74d4964..57ba65d3c76 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -459,14 +459,14 @@ (define_insn "mve_vcvtaq_<supf><mode>"
 ;;
 ;; [vmvnq_n_u, vmvnq_n_s])
 ;;
-(define_insn "mve_vmvnq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:<V_elem> 1 "immediate_operand" "i")]
 	 VMVNQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vmvn.i%#<V_sz_elem>  %q0, %1"
+  "<mve_insn>.i%#<V_sz_elem>\t%q0, %1"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1953,7 +1953,7 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ;;
 ;; [vmvnq_m_s, vmvnq_m_u])
 ;;
-(define_insn "mve_vmvnq_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -1962,7 +1962,7 @@ (define_insn "mve_vmvnq_m_<supf><mode>"
 	 VMVNQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmvnt %q0, %q2"
+  "vpst\;<mve_insn>t\t%q0, %q2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2423,7 +2423,7 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vmvnq_m_n_u, vmvnq_m_n_s])
 ;;
-(define_insn "mve_vmvnq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
@@ -2432,7 +2432,7 @@ (define_insn "mve_vmvnq_m_n_<supf><mode>"
 	 VMVNQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmvnt.i%#<V_sz_elem>	%q0, %2"
+  "vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 14/26] arm: [MVE intrinsics] rework vmvnq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (11 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 13/26] arm: [MVE intrinsics] factorize vmvnq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 15/26] arm: [MVE intrinsics] add ternary_opt_n shape Christophe Lyon
                   ` (12 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmvnq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc
	(FUNCTION_WITH_RTX_M_N_NO_F): New.
	(vmvnq): New.
	* config/arm/arm-mve-builtins-base.def (vmvnq): New.
	* config/arm/arm-mve-builtins-base.h (vmvnq): New.
	* config/arm/arm_mve.h (vmvnq): Remove.
	(vmvnq_m): Remove.
	(vmvnq_x): Remove.
	(vmvnq_s8): Remove.
	(vmvnq_s16): Remove.
	(vmvnq_s32): Remove.
	(vmvnq_n_s16): Remove.
	(vmvnq_n_s32): Remove.
	(vmvnq_u8): Remove.
	(vmvnq_u16): Remove.
	(vmvnq_u32): Remove.
	(vmvnq_n_u16): Remove.
	(vmvnq_n_u32): Remove.
	(vmvnq_m_u8): Remove.
	(vmvnq_m_s8): Remove.
	(vmvnq_m_u16): Remove.
	(vmvnq_m_s16): Remove.
	(vmvnq_m_u32): Remove.
	(vmvnq_m_s32): Remove.
	(vmvnq_m_n_s16): Remove.
	(vmvnq_m_n_u16): Remove.
	(vmvnq_m_n_s32): Remove.
	(vmvnq_m_n_u32): Remove.
	(vmvnq_x_s8): Remove.
	(vmvnq_x_s16): Remove.
	(vmvnq_x_s32): Remove.
	(vmvnq_x_u8): Remove.
	(vmvnq_x_u16): Remove.
	(vmvnq_x_u32): Remove.
	(vmvnq_x_n_s16): Remove.
	(vmvnq_x_n_s32): Remove.
	(vmvnq_x_n_u16): Remove.
	(vmvnq_x_n_u32): Remove.
	(__arm_vmvnq_s8): Remove.
	(__arm_vmvnq_s16): Remove.
	(__arm_vmvnq_s32): Remove.
	(__arm_vmvnq_n_s16): Remove.
	(__arm_vmvnq_n_s32): Remove.
	(__arm_vmvnq_u8): Remove.
	(__arm_vmvnq_u16): Remove.
	(__arm_vmvnq_u32): Remove.
	(__arm_vmvnq_n_u16): Remove.
	(__arm_vmvnq_n_u32): Remove.
	(__arm_vmvnq_m_u8): Remove.
	(__arm_vmvnq_m_s8): Remove.
	(__arm_vmvnq_m_u16): Remove.
	(__arm_vmvnq_m_s16): Remove.
	(__arm_vmvnq_m_u32): Remove.
	(__arm_vmvnq_m_s32): Remove.
	(__arm_vmvnq_m_n_s16): Remove.
	(__arm_vmvnq_m_n_u16): Remove.
	(__arm_vmvnq_m_n_s32): Remove.
	(__arm_vmvnq_m_n_u32): Remove.
	(__arm_vmvnq_x_s8): Remove.
	(__arm_vmvnq_x_s16): Remove.
	(__arm_vmvnq_x_s32): Remove.
	(__arm_vmvnq_x_u8): Remove.
	(__arm_vmvnq_x_u16): Remove.
	(__arm_vmvnq_x_u32): Remove.
	(__arm_vmvnq_x_n_s16): Remove.
	(__arm_vmvnq_x_n_s32): Remove.
	(__arm_vmvnq_x_n_u16): Remove.
	(__arm_vmvnq_x_n_u32): Remove.
	(__arm_vmvnq): Remove.
	(__arm_vmvnq_m): Remove.
	(__arm_vmvnq_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |  10 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 438 -----------------------
 4 files changed, 12 insertions(+), 438 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 2fb81c197da..6286d4a147a 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -103,6 +103,15 @@ namespace arm_mve {
     UNSPEC##_M_S, UNSPEC##_M_U, UNSPEC##_M_F,				\
     -1, -1, -1))
 
+  /* Helper for builtins with RTX codes, _m predicated and _n
+     overrides, but no floating-point version.  */
+#define FUNCTION_WITH_RTX_M_N_NO_F(NAME, RTX, UNSPEC) FUNCTION		\
+  (NAME, unspec_based_mve_function_exact_insn,				\
+   (RTX, RTX, UNKNOWN,							\
+    UNSPEC##_N_S, UNSPEC##_N_U, -1,					\
+    UNSPEC##_M_S, UNSPEC##_M_U, -1,					\
+    UNSPEC##_M_N_S, UNSPEC##_M_N_U, -1))
+
   /* Helper for builtins with RTX codes, _m predicated and _n overrides.  */
 #define FUNCTION_WITH_RTX_M_N_NO_N_F(NAME, RTX, UNSPEC) FUNCTION	\
   (NAME, unspec_based_mve_function_exact_insn,				\
@@ -306,6 +315,7 @@ FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
+FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqabsq, VQABSQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index e53cb2c1992..141d057924e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -75,6 +75,7 @@ DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 49c60536961..b1783a2c917 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -88,6 +88,7 @@ extern const function_base *const vmovnbq;
 extern const function_base *const vmovntq;
 extern const function_base *const vmulhq;
 extern const function_base *const vmulq;
+extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
 extern const function_base *const vorrq;
 extern const function_base *const vqabsq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 76c45a28eb3..69ded8703cd 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -42,7 +42,6 @@
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vmvnq(__a) __arm_vmvnq(__a)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
 #define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
@@ -56,7 +55,6 @@
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
-#define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
@@ -153,7 +151,6 @@
 #define vhcaddq_rot90_x(__a, __b, __p) __arm_vhcaddq_rot90_x(__a, __b, __p)
 #define vhcaddq_rot270_x(__a, __b, __p) __arm_vhcaddq_rot270_x(__a, __b, __p)
 #define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
-#define vmvnq_x(__a, __p) __arm_vmvnq_x(__a, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
 #define vadciq_m(__inactive, __a, __b, __carry_out, __p) __arm_vadciq_m(__inactive, __a, __b, __carry_out, __p)
@@ -227,11 +224,6 @@
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
-#define vmvnq_s8(__a) __arm_vmvnq_s8(__a)
-#define vmvnq_s16(__a) __arm_vmvnq_s16(__a)
-#define vmvnq_s32(__a) __arm_vmvnq_s32(__a)
-#define vmvnq_n_s16( __imm) __arm_vmvnq_n_s16( __imm)
-#define vmvnq_n_s32( __imm) __arm_vmvnq_n_s32( __imm)
 #define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a)
 #define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a)
 #define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a)
@@ -242,11 +234,6 @@
 #define vcvtmq_s32_f32(__a) __arm_vcvtmq_s32_f32(__a)
 #define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
 #define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
-#define vmvnq_u8(__a) __arm_vmvnq_u8(__a)
-#define vmvnq_u16(__a) __arm_vmvnq_u16(__a)
-#define vmvnq_u32(__a) __arm_vmvnq_u32(__a)
-#define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
-#define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
 #define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
 #define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
 #define vcvtpq_u16_f16(__a) __arm_vcvtpq_u16_f16(__a)
@@ -362,26 +349,20 @@
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
-#define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
-#define vmvnq_m_s8(__inactive, __a, __p) __arm_vmvnq_m_s8(__inactive, __a, __p)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
 #define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
 #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
-#define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
-#define vmvnq_m_s16(__inactive, __a, __p) __arm_vmvnq_m_s16(__inactive, __a, __p)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
 #define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
 #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
-#define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
-#define vmvnq_m_s32(__inactive, __a, __p) __arm_vmvnq_m_s32(__inactive, __a, __p)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
 #define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
 #define vpselq_u64(__a, __b, __p) __arm_vpselq_u64(__a, __b, __p)
@@ -390,7 +371,6 @@
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
 #define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
-#define vmvnq_m_n_s16(__inactive,  __imm, __p) __arm_vmvnq_m_n_s16(__inactive,  __imm, __p)
 #define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c)
 #define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c)
 #define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a, __b, __c)
@@ -404,12 +384,10 @@
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
 #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p)
 #define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
-#define vmvnq_m_n_u16(__inactive,  __imm, __p) __arm_vmvnq_m_n_u16(__inactive,  __imm, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
 #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
-#define vmvnq_m_n_s32(__inactive,  __imm, __p) __arm_vmvnq_m_n_s32(__inactive,  __imm, __p)
 #define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c)
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
 #define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a, __b, __c)
@@ -423,7 +401,6 @@
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
 #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p)
 #define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
-#define vmvnq_m_n_u32(__inactive,  __imm, __p) __arm_vmvnq_m_n_u32(__inactive,  __imm, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
@@ -864,16 +841,6 @@
 #define vbicq_x_u8(__a, __b, __p) __arm_vbicq_x_u8(__a, __b, __p)
 #define vbicq_x_u16(__a, __b, __p) __arm_vbicq_x_u16(__a, __b, __p)
 #define vbicq_x_u32(__a, __b, __p) __arm_vbicq_x_u32(__a, __b, __p)
-#define vmvnq_x_s8(__a, __p) __arm_vmvnq_x_s8(__a, __p)
-#define vmvnq_x_s16(__a, __p) __arm_vmvnq_x_s16(__a, __p)
-#define vmvnq_x_s32(__a, __p) __arm_vmvnq_x_s32(__a, __p)
-#define vmvnq_x_u8(__a, __p) __arm_vmvnq_x_u8(__a, __p)
-#define vmvnq_x_u16(__a, __p) __arm_vmvnq_x_u16(__a, __p)
-#define vmvnq_x_u32(__a, __p) __arm_vmvnq_x_u32(__a, __p)
-#define vmvnq_x_n_s16( __imm, __p) __arm_vmvnq_x_n_s16( __imm, __p)
-#define vmvnq_x_n_s32( __imm, __p) __arm_vmvnq_x_n_s32( __imm, __p)
-#define vmvnq_x_n_u16( __imm, __p) __arm_vmvnq_x_n_u16( __imm, __p)
-#define vmvnq_x_n_u32( __imm, __p) __arm_vmvnq_x_n_u32( __imm, __p)
 #define vornq_x_s8(__a, __b, __p) __arm_vornq_x_s8(__a, __b, __p)
 #define vornq_x_s16(__a, __b, __p) __arm_vornq_x_s16(__a, __b, __p)
 #define vornq_x_s32(__a, __b, __p) __arm_vornq_x_s32(__a, __b, __p)
@@ -1096,76 +1063,6 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_s8 (int8x16_t __a)
-{
-  return __builtin_mve_vmvnq_sv16qi (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vmvnq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_s32 (int32x4_t __a)
-{
-  return __builtin_mve_vmvnq_sv4si (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_n_s16 (const int16_t __imm)
-{
-  return __builtin_mve_vmvnq_n_sv8hi (__imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_n_s32 (const int32_t __imm)
-{
-  return __builtin_mve_vmvnq_n_sv4si (__imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_u8 (uint8x16_t __a)
-{
-  return __builtin_mve_vmvnq_uv16qi (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vmvnq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_u32 (uint32x4_t __a)
-{
-  return __builtin_mve_vmvnq_uv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_n_u16 (const int __imm)
-{
-  return __builtin_mve_vmvnq_n_uv8hi (__imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_n_u32 (const int __imm)
-{
-  return __builtin_mve_vmvnq_n_uv4si (__imm);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vctp16q (uint32_t __a)
@@ -1681,13 +1578,6 @@ __arm_vpselq_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv16qi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -1702,13 +1592,6 @@ __arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_s8 (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv16qi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -1737,13 +1620,6 @@ __arm_vpselq_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv8hi (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -1758,13 +1634,6 @@ __arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_s16 (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv8hi (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -1793,13 +1662,6 @@ __arm_vpselq_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv4si (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -1814,13 +1676,6 @@ __arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
   return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_s32 (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv4si (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -1849,34 +1704,6 @@ __arm_vpselq_s64 (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
   return __builtin_mve_vpselq_sv2di (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_n_s16 (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_sv8hi (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_n_u16 (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_uv8hi (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_n_s32 (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_sv4si (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m_n_u32 (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_uv4si (__inactive, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -4474,76 +4301,6 @@ __arm_vbicq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
   return __builtin_mve_vbicq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_s8 (int8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_u8 (uint8x16_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_n_s16 (const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_sv8hi (__arm_vuninitializedq_s16 (), __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_n_s32 (const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_sv4si (__arm_vuninitializedq_s32 (), __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_n_u16 (const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x_n_u32 (const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vmvnq_m_n_uv4si (__arm_vuninitializedq_u32 (), __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -7041,48 +6798,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
  __arm_vst4q_u32 (__addr, __value);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (int8x16_t __a)
-{
- return __arm_vmvnq_s8 (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (int16x8_t __a)
-{
- return __arm_vmvnq_s16 (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (int32x4_t __a)
-{
- return __arm_vmvnq_s32 (__a);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (uint8x16_t __a)
-{
- return __arm_vmvnq_u8 (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (uint16x8_t __a)
-{
- return __arm_vmvnq_u16 (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq (uint32x4_t __a)
-{
- return __arm_vmvnq_u32 (__a);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq (uint8x16_t __a, uint8x16_t __b)
@@ -7517,13 +7232,6 @@ __arm_vpselq (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
  return __arm_vpselq_s8 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_u8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -7538,13 +7246,6 @@ __arm_vsliq (uint8x16_t __a, uint8x16_t __b, const int __imm)
  return __arm_vsliq_n_u8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int8x16_t __inactive, int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_s8 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -7573,13 +7274,6 @@ __arm_vpselq (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
  return __arm_vpselq_s16 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_u16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -7594,13 +7288,6 @@ __arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
  return __arm_vsliq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_s16 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -7629,13 +7316,6 @@ __arm_vpselq (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
  return __arm_vpselq_s32 (__a, __b, __p);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_u32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -7650,13 +7330,6 @@ __arm_vsliq (uint32x4_t __a, uint32x4_t __b, const int __imm)
  return __arm_vsliq_n_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_s32 (__inactive, __a, __p);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -7685,34 +7358,6 @@ __arm_vpselq (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
  return __arm_vpselq_s64 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int16x8_t __inactive, const int __imm, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_n_s16 (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint16x8_t __inactive, const int __imm, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_n_u16 (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (int32x4_t __inactive, const int __imm, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_n_s32 (__inactive, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_m (uint32x4_t __inactive, const int __imm, mve_pred16_t __p)
-{
- return __arm_vmvnq_m_n_u32 (__inactive, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -9883,48 +9528,6 @@ __arm_vbicq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
  return __arm_vbicq_x_u32 (__a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (int8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_s8 (__a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_s16 (__a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_s32 (__a, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (uint8x16_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_u8 (__a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_u16 (__a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmvnq_x (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vmvnq_x_u32 (__a, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -11988,15 +11591,6 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
 
-#define __arm_vmvnq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmvnq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmvnq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vmvnq_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmvnq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
 #define __arm_vcvtq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_f16_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
@@ -12879,15 +12473,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
-#define __arm_vmvnq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmvnq_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmvnq_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vmvnq_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmvnq_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
 #define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -13431,15 +13016,6 @@ extern void *__ARM_undef;
 
 
 
-#define __arm_vmvnq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vmvnq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vmvnq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vmvnq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vmvnq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vmvnq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vmvnq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vdwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u8 ((uint32_t) __p1, p2, p3, p4), \
@@ -13617,20 +13193,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
 
-#define __arm_vmvnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmvnq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmvnq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmvnq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmvnq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmvnq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmvnq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce1(__p1, int) , p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce1(__p1, int) , p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce1(__p1, int) , p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmvnq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce1(__p1, int) , p2));})
-
 #define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 15/26] arm: [MVE intrinsics] add ternary_opt_n shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (12 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 14/26] arm: [MVE intrinsics] rework vmvnq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 16/26] arm: [MVE intrinsics] factorize vfmaq vfmsq vfmasq Christophe Lyon
                   ` (11 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the ternary_opt_n shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (ternary_opt_n): New.
	* config/arm/arm-mve-builtins-shapes.h (ternary_opt_n): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 30 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 31 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 6401a79c570..43532601fbe 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1399,6 +1399,36 @@ struct ternary_n_def : public overloaded_base<0>
 };
 SHAPE (ternary_n)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, <T0>_t)
+   <T0>_t vfoo[_n_t0](<T0>_t, <T0>_t, <S0>_t)
+
+   i.e. the standard shape for ternary operations that operate on
+   uniform types.
+
+   Example: vfmaq.
+   float16x8_t [__arm_]vfmaq[_n_f16](float16x8_t add, float16x8_t m1, float16_t m2)
+   float16x8_t [__arm_]vfmaq_m[_n_f16](float16x8_t add, float16x8_t m1, float16_t m2, mve_pred16_t p)
+   float16x8_t [__arm_]vfmaq[_f16](float16x8_t add, float16x8_t m1, float16x8_t m2)
+   float16x8_t [__arm_]vfmaq_m[_f16](float16x8_t add, float16x8_t m1, float16x8_t m2, mve_pred16_t p)  */
+struct ternary_opt_n_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,s0", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform_opt_n (3);
+  }
+};
+SHAPE (ternary_opt_n)
+
 /* <T0>_t vfoo[_t0](<T0>_t)
 
    i.e. the standard shape for unary operations that operate on
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index ba53e8cc52e..f67a484c146 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -62,6 +62,7 @@ namespace arm_mve
     extern const function_shape *const mvn;
     extern const function_shape *const ternary;
     extern const function_shape *const ternary_n;
+    extern const function_shape *const ternary_opt_n;
     extern const function_shape *const unary;
     extern const function_shape *const unary_acc;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 16/26] arm: [MVE intrinsics] factorize vfmaq vfmsq vfmasq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (13 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 15/26] arm: [MVE intrinsics] add ternary_opt_n shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 17/26] arm: [MVE intrinsics] rework vfmaq vfmasq vfmsq Christophe Lyon
                   ` (10 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmvnq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (MVE_FP_M_BINARY): Add VFMAQ_M_F,
	VFMSQ_M_F.
	(MVE_FP_M_N_BINARY): Add VFMAQ_M_N_F, VFMASQ_M_N_F.
	(MVE_VFMxQ_F, MVE_VFMAxQ_N_F): New.
	(mve_insn): Add vfma, vfmas, vfms.
	* config/arm/mve.md (mve_vfmaq_f<mode>, mve_vfmsq_f<mode>): Merge
	into ...
	(@mve_<mve_insn>q_f<mode>): ... this.
	(mve_vfmaq_n_f<mode>, mve_vfmasq_n_f<mode>): Merge into ...
	(@mve_<mve_insn>q_n_f<mode>): ... this.
	(mve_vfmaq_m_f<mode>, mve_vfmsq_m_f<mode>): Merge into
	@mve_<mve_insn>q_m_f<mode>.
	(mve_vfmaq_m_n_f<mode>, mve_vfmasq_m_n_f<mode>): Merge into
	@mve_<mve_insn>q_m_n_f<mode>.
---
 gcc/config/arm/iterators.md |  20 ++++++
 gcc/config/arm/mve.md       | 123 +++++-------------------------------
 2 files changed, 35 insertions(+), 108 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7fbfea49ff3..022744f04d9 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -577,6 +577,8 @@ (define_int_iterator MVE_SHRN_M_N [
 (define_int_iterator MVE_FP_M_BINARY   [
 		     VABDQ_M_F
 		     VADDQ_M_F
+		     VFMAQ_M_F
+		     VFMSQ_M_F
 		     VMAXNMQ_M_F
 		     VMINNMQ_M_F
 		     VMULQ_M_F
@@ -592,6 +594,8 @@ (define_int_iterator MVE_FP_M_BINARY_LOGIC   [
 
 (define_int_iterator MVE_FP_M_N_BINARY [
 		     VADDQ_M_N_F
+		     VFMAQ_M_N_F
+		     VFMASQ_M_N_F
 		     VMULQ_M_N_F
 		     VSUBQ_M_N_F
 		     ])
@@ -659,6 +663,14 @@ (define_int_iterator MVE_CMP_M_N_F [
 		     VCMPNEQ_M_N_F
 		     ])
 
+(define_int_iterator MVE_VFMxQ_F [
+		     VFMAQ_F VFMSQ_F
+		     ])
+
+(define_int_iterator MVE_VFMAxQ_N_F [
+		     VFMAQ_N_F VFMASQ_N_F
+		     ])
+
 (define_int_iterator MVE_VMAXVQ_VMINVQ [
 		     VMAXAVQ_S
 		     VMAXVQ_S VMAXVQ_U
@@ -917,6 +929,14 @@ (define_int_attr mve_insn [
 		 (VDUPQ_M_N_S "vdup") (VDUPQ_M_N_U "vdup") (VDUPQ_M_N_F "vdup")
 		 (VDUPQ_N_S "vdup") (VDUPQ_N_U "vdup") (VDUPQ_N_F "vdup")
 		 (VEORQ_M_S "veor") (VEORQ_M_U "veor") (VEORQ_M_F "veor")
+		 (VFMAQ_F "vfma")
+		 (VFMAQ_M_F "vfma")
+		 (VFMAQ_M_N_F "vfma")
+		 (VFMAQ_N_F "vfma")
+		 (VFMASQ_M_N_F "vfmas")
+		 (VFMASQ_N_F "vfmas")
+		 (VFMSQ_F "vfms")
+		 (VFMSQ_M_F "vfms")
 		 (VHADDQ_M_N_S "vhadd") (VHADDQ_M_N_U "vhadd")
 		 (VHADDQ_M_S "vhadd") (VHADDQ_M_U "vhadd")
 		 (VHADDQ_N_S "vhadd") (VHADDQ_N_U "vhadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 57ba65d3c76..b87798730a2 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2246,65 +2246,36 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
    (set_attr "length""8")])
 
 ;;
-;; [vfmaq_f])
+;; [vfmaq_f]
+;; [vfmsq_f]
 ;;
-(define_insn "mve_vfmaq_f<mode>"
+(define_insn "@mve_<mve_insn>q_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")
 		       (match_operand:MVE_0 3 "s_register_operand" "w")]
-	 VFMAQ_F))
+	 MVE_VFMxQ_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vfma.f%#<V_sz_elem>	%q0, %q2, %q3"
+  "<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %q3"
   [(set_attr "type" "mve_move")
 ])
 
 ;;
-;; [vfmaq_n_f])
+;; [vfmaq_n_f]
+;; [vfmasq_n_f]
 ;;
-(define_insn "mve_vfmaq_n_f<mode>"
+(define_insn "@mve_<mve_insn>q_n_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")
 		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VFMAQ_N_F))
+	 MVE_VFMAxQ_N_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vfma.f%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vfmasq_n_f])
-;;
-(define_insn "mve_vfmasq_n_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
-	 VFMASQ_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vfmas.f%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-])
-;;
-;; [vfmsq_f])
-;;
-(define_insn "mve_vfmsq_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:MVE_0 3 "s_register_operand" "w")]
-	 VFMSQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vfms.f%#<V_sz_elem>	%q0, %q2, %q3"
+  "<mve_insn>.f%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -3196,6 +3167,8 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vabdq_m_f]
 ;; [vaddq_m_f]
+;; [vfmaq_m_f]
+;; [vfmsq_m_f]
 ;; [vmaxnmq_m_f]
 ;; [vminnmq_m_f]
 ;; [vmulq_m_f]
@@ -3219,6 +3192,8 @@ (define_insn "@mve_<mve_insn>q_m_f<mode>"
 ;; [vaddq_m_n_f]
 ;; [vsubq_m_n_f]
 ;; [vmulq_m_n_f]
+;; [vfmaq_m_n_f]
+;; [vfmasq_m_n_f]
 ;;
 (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
   [
@@ -3230,7 +3205,7 @@ (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
 	 MVE_FP_M_N_BINARY))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;<mve_insn>t.f%#<V_sz_elem>	%q0, %q2, %3"
+  "vpst\;<mve_insn>t.f%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3441,74 +3416,6 @@ (define_insn "mve_vcmulq_rot90_m_f<mode>"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vfmaq_m_f])
-;;
-(define_insn "mve_vfmaq_m_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:MVE_0 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VFMAQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vfmat.f%#<V_sz_elem>	%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vfmaq_m_n_f])
-;;
-(define_insn "mve_vfmaq_m_n_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VFMAQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vfmat.f%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vfmasq_m_n_f])
-;;
-(define_insn "mve_vfmasq_m_n_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:<V_elem> 3 "s_register_operand" "r")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VFMASQ_M_N_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vfmast.f%#<V_sz_elem>	%q0, %q2, %3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vfmsq_m_f])
-;;
-(define_insn "mve_vfmsq_m_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:MVE_0 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VFMSQ_M_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vfmst.f%#<V_sz_elem>	%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
 ;;
 ;; [vornq_m_f])
 ;;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 17/26] arm: [MVE intrinsics] rework vfmaq vfmasq vfmsq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (14 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 16/26] arm: [MVE intrinsics] factorize vfmaq vfmsq vfmasq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 18/26] arm: [MVE intrinsics] factorize vpselq Christophe Lyon
                   ` (9 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vfmaq, vfmasq, vfmsq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vfmaq, vfmasq, vfmsq): New.
	* config/arm/arm-mve-builtins-base.def (vfmaq, vfmasq, vfmsq): New.
	* config/arm/arm-mve-builtins-base.h (vfmaq, vfmasq, vfmsq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vfmaq, vfmasq,
	vfmsq.
	* config/arm/arm_mve.h (vfmaq): Remove.
	(vfmasq): Remove.
	(vfmsq): Remove.
	(vfmaq_m): Remove.
	(vfmasq_m): Remove.
	(vfmsq_m): Remove.
	(vfmaq_f16): Remove.
	(vfmaq_n_f16): Remove.
	(vfmasq_n_f16): Remove.
	(vfmsq_f16): Remove.
	(vfmaq_f32): Remove.
	(vfmaq_n_f32): Remove.
	(vfmasq_n_f32): Remove.
	(vfmsq_f32): Remove.
	(vfmaq_m_f32): Remove.
	(vfmaq_m_f16): Remove.
	(vfmaq_m_n_f32): Remove.
	(vfmaq_m_n_f16): Remove.
	(vfmasq_m_n_f32): Remove.
	(vfmasq_m_n_f16): Remove.
	(vfmsq_m_f32): Remove.
	(vfmsq_m_f16): Remove.
	(__arm_vfmaq_f16): Remove.
	(__arm_vfmaq_n_f16): Remove.
	(__arm_vfmasq_n_f16): Remove.
	(__arm_vfmsq_f16): Remove.
	(__arm_vfmaq_f32): Remove.
	(__arm_vfmaq_n_f32): Remove.
	(__arm_vfmasq_n_f32): Remove.
	(__arm_vfmsq_f32): Remove.
	(__arm_vfmaq_m_f32): Remove.
	(__arm_vfmaq_m_f16): Remove.
	(__arm_vfmaq_m_n_f32): Remove.
	(__arm_vfmaq_m_n_f16): Remove.
	(__arm_vfmasq_m_n_f32): Remove.
	(__arm_vfmasq_m_n_f16): Remove.
	(__arm_vfmsq_m_f32): Remove.
	(__arm_vfmsq_m_f16): Remove.
	(__arm_vfmaq): Remove.
	(__arm_vfmasq): Remove.
	(__arm_vfmsq): Remove.
	(__arm_vfmaq_m): Remove.
	(__arm_vfmasq_m): Remove.
	(__arm_vfmsq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   3 +
 gcc/config/arm/arm-mve-builtins-base.def |   3 +
 gcc/config/arm/arm-mve-builtins-base.h   |   3 +
 gcc/config/arm/arm-mve-builtins.cc       |   3 +
 gcc/config/arm/arm_mve.h                 | 292 -----------------------
 5 files changed, 12 insertions(+), 292 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 6286d4a147a..91d397d6208 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -273,6 +273,9 @@ FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNK
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
 FUNCTION_ONLY_N (vdupq, VDUPQ)
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
+FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
+FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F))
+FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -1, -1, -1, -1, VFMSQ_M_F, -1, -1, -1))
 FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
 FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
 FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 141d057924e..8894f9e5372 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -158,6 +158,9 @@ DEF_MVE_FUNCTION (vcmpneq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
 DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
+DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
+DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index b1783a2c917..dabb52e800d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -46,6 +46,9 @@ extern const function_base *const vcmpneq;
 extern const function_base *const vcreateq;
 extern const function_base *const vdupq;
 extern const function_base *const veorq;
+extern const function_base *const vfmaq;
+extern const function_base *const vfmasq;
+extern const function_base *const vfmsq;
 extern const function_base *const vhaddq;
 extern const function_base *const vhsubq;
 extern const function_base *const vmaxaq;
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index c157a3ec8a3..87fcbc31f2f 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -678,6 +678,9 @@ function_instance::has_inactive_argument () const
       || base == functions::vcmpltq
       || base == functions::vcmpcsq
       || base == functions::vcmphiq
+      || base == functions::vfmaq
+      || base == functions::vfmasq
+      || base == functions::vfmsq
       || base == functions::vmaxaq
       || base == functions::vmaxnmaq
       || base == functions::vminaq
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 69ded8703cd..747cdc3509d 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -184,9 +184,6 @@
 #define vcmlaq_rot180(__a, __b, __c) __arm_vcmlaq_rot180(__a, __b, __c)
 #define vcmlaq_rot270(__a, __b, __c) __arm_vcmlaq_rot270(__a, __b, __c)
 #define vcmlaq_rot90(__a, __b, __c) __arm_vcmlaq_rot90(__a, __b, __c)
-#define vfmaq(__a, __b, __c) __arm_vfmaq(__a, __b, __c)
-#define vfmasq(__a, __b, __c) __arm_vfmasq(__a, __b, __c)
-#define vfmsq(__a, __b, __c) __arm_vfmsq(__a, __b, __c)
 #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p)
 #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p)
 #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p)
@@ -199,9 +196,6 @@
 #define vcmulq_rot180_m(__inactive, __a, __b, __p) __arm_vcmulq_rot180_m(__inactive, __a, __b, __p)
 #define vcmulq_rot270_m(__inactive, __a, __b, __p) __arm_vcmulq_rot270_m(__inactive, __a, __b, __p)
 #define vcmulq_rot90_m(__inactive, __a, __b, __p) __arm_vcmulq_rot90_m(__inactive, __a, __b, __p)
-#define vfmaq_m(__a, __b, __c, __p) __arm_vfmaq_m(__a, __b, __c, __p)
-#define vfmasq_m(__a, __b, __c, __p) __arm_vfmasq_m(__a, __b, __c, __p)
-#define vfmsq_m(__a, __b, __c, __p) __arm_vfmsq_m(__a, __b, __c, __p)
 #define vcmulq_x(__a, __b, __p) __arm_vcmulq_x(__a, __b, __p)
 #define vcmulq_rot90_x(__a, __b, __p) __arm_vcmulq_rot90_x(__a, __b, __p)
 #define vcmulq_rot180_x(__a, __b, __p) __arm_vcmulq_rot180_x(__a, __b, __p)
@@ -375,10 +369,6 @@
 #define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c)
 #define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a, __b, __c)
 #define vcmlaq_rot90_f16(__a, __b, __c) __arm_vcmlaq_rot90_f16(__a, __b, __c)
-#define vfmaq_f16(__a, __b, __c) __arm_vfmaq_f16(__a, __b, __c)
-#define vfmaq_n_f16(__a, __b, __c) __arm_vfmaq_n_f16(__a, __b, __c)
-#define vfmasq_n_f16(__a, __b, __c) __arm_vfmasq_n_f16(__a, __b, __c)
-#define vfmsq_f16(__a, __b, __c) __arm_vfmsq_f16(__a, __b, __c)
 #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
@@ -392,10 +382,6 @@
 #define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c)
 #define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a, __b, __c)
 #define vcmlaq_rot90_f32(__a, __b, __c) __arm_vcmlaq_rot90_f32(__a, __b, __c)
-#define vfmaq_f32(__a, __b, __c) __arm_vfmaq_f32(__a, __b, __c)
-#define vfmaq_n_f32(__a, __b, __c) __arm_vfmaq_n_f32(__a, __b, __c)
-#define vfmasq_n_f32(__a, __b, __c) __arm_vfmasq_n_f32(__a, __b, __c)
-#define vfmsq_f32(__a, __b, __c) __arm_vfmsq_f32(__a, __b, __c)
 #define vcvtmq_m_s32_f32(__inactive, __a, __p) __arm_vcvtmq_m_s32_f32(__inactive, __a, __p)
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
@@ -493,14 +479,6 @@
 #define vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p)
 #define vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p)
 #define vcvtq_m_n_u16_f16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_u16_f16(__inactive, __a,  __imm6, __p)
-#define vfmaq_m_f32(__a, __b, __c, __p) __arm_vfmaq_m_f32(__a, __b, __c, __p)
-#define vfmaq_m_f16(__a, __b, __c, __p) __arm_vfmaq_m_f16(__a, __b, __c, __p)
-#define vfmaq_m_n_f32(__a, __b, __c, __p) __arm_vfmaq_m_n_f32(__a, __b, __c, __p)
-#define vfmaq_m_n_f16(__a, __b, __c, __p) __arm_vfmaq_m_n_f16(__a, __b, __c, __p)
-#define vfmasq_m_n_f32(__a, __b, __c, __p) __arm_vfmasq_m_n_f32(__a, __b, __c, __p)
-#define vfmasq_m_n_f16(__a, __b, __c, __p) __arm_vfmasq_m_n_f16(__a, __b, __c, __p)
-#define vfmsq_m_f32(__a, __b, __c, __p) __arm_vfmsq_m_f32(__a, __b, __c, __p)
-#define vfmsq_m_f16(__a, __b, __c, __p) __arm_vfmsq_m_f16(__a, __b, __c, __p)
 #define vornq_m_f32(__inactive, __a, __b, __p) __arm_vornq_m_f32(__inactive, __a, __b, __p)
 #define vornq_m_f16(__inactive, __a, __b, __p) __arm_vornq_m_f16(__inactive, __a, __b, __p)
 #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
@@ -5540,34 +5518,6 @@ __arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
   return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vfmaq_fv8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
-{
-  return __builtin_mve_vfmaq_n_fv8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
-{
-  return __builtin_mve_vfmasq_n_fv8hf (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
-  return __builtin_mve_vfmsq_fv8hf (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -5659,34 +5609,6 @@ __arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
   return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vfmaq_fv4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
-{
-  return __builtin_mve_vfmaq_n_fv4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c)
-{
-  return __builtin_mve_vfmasq_n_fv4sf (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
-  return __builtin_mve_vfmsq_fv4sf (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -5960,62 +5882,6 @@ __arm_vcvtq_m_n_u16_f16 (uint16x8_t __inactive, float16x8_t __a, const int __imm
   return __builtin_mve_vcvtq_m_n_from_f_uv8hi (__inactive, __a, __imm6, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmaq_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmaq_m_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmaq_m_n_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmaq_m_n_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_m_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmasq_m_n_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_m_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmasq_m_n_fv8hf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmsq_m_fv4sf (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vfmsq_m_fv8hf (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -10356,34 +10222,6 @@ __arm_vcmlaq_rot90 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
  return __arm_vcmlaq_rot90_f16 (__a, __b, __c);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vfmaq_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq (float16x8_t __a, float16x8_t __b, float16_t __c)
-{
- return __arm_vfmaq_n_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq (float16x8_t __a, float16x8_t __b, float16_t __c)
-{
- return __arm_vfmasq_n_f16 (__a, __b, __c);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq (float16x8_t __a, float16x8_t __b, float16x8_t __c)
-{
- return __arm_vfmsq_f16 (__a, __b, __c);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -10475,34 +10313,6 @@ __arm_vcmlaq_rot90 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
  return __arm_vcmlaq_rot90_f32 (__a, __b, __c);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vfmaq_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq (float32x4_t __a, float32x4_t __b, float32_t __c)
-{
- return __arm_vfmaq_n_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq (float32x4_t __a, float32x4_t __b, float32_t __c)
-{
- return __arm_vfmasq_n_f32 (__a, __b, __c);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq (float32x4_t __a, float32x4_t __b, float32x4_t __c)
-{
- return __arm_vfmsq_f32 (__a, __b, __c);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -10776,62 +10586,6 @@ __arm_vcvtq_m_n (uint16x8_t __inactive, float16x8_t __a, const int __imm6, mve_p
  return __arm_vcvtq_m_n_u16_f16 (__inactive, __a, __imm6, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vfmaq_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vfmaq_m_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
-{
- return __arm_vfmaq_m_n_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmaq_m (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
-{
- return __arm_vfmaq_m_n_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_m (float32x4_t __a, float32x4_t __b, float32_t __c, mve_pred16_t __p)
-{
- return __arm_vfmasq_m_n_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmasq_m (float16x8_t __a, float16x8_t __b, float16_t __c, mve_pred16_t __p)
-{
- return __arm_vfmasq_m_n_f16 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vfmsq_m_f32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vfmsq_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vfmsq_m_f16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
@@ -11859,29 +11613,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtpq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtpq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vfmaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(p2, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(p2, double)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vfmsq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmsq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmsq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vfmasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(p2, double)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(p2, double)));})
-
 #define __arm_vpselq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -11991,29 +11722,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmulq_rot90_m_f16(__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmulq_rot90_m_f32(__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
 
-#define __arm_vfmaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(p2, double), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(p2, double), p3));})
-
-#define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(p2, double), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(p2, double), p3));})
-
-#define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmsq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmsq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 18/26] arm: [MVE intrinsics] factorize vpselq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (15 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 17/26] arm: [MVE intrinsics] rework vfmaq vfmasq vfmsq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 19/26] arm: [MVE intrinsics] add vpsel shape Christophe Lyon
                   ` (8 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vpselq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm.cc (arm_expand_vcond): Use gen_mve_q instead of
	gen_mve_vpselq.
	* config/arm/iterators.md (MVE_VPSELQ_F): New.
	(mve_insn): Add vpsel.
	* config/arm/mve.md (@mve_vpselq_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_<supf><mode>): ... this.
	(@mve_vpselq_f<mode>): Rename into ...
	(@mve_<mve_insn>q_f<mode>): ... this.
---
 gcc/config/arm/arm.cc       |  8 ++++----
 gcc/config/arm/iterators.md |  5 +++++
 gcc/config/arm/mve.md       | 18 +++++++++---------
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 06e0756e4e8..da7e9c81465 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -31633,13 +31633,13 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
       switch (GET_MODE_CLASS (cmp_mode))
 	{
 	case MODE_VECTOR_INT:
-	  emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0],
-				     operands[1], operands[2], mask));
+	  emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
+				operands[1], operands[2], mask));
 	  break;
 	case MODE_VECTOR_FLOAT:
 	  if (TARGET_HAVE_MVE_FLOAT)
-	    emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0],
-					 operands[1], operands[2], mask));
+	    emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
+				    operands[1], operands[2], mask));
 	  else
 	    gcc_unreachable ();
 	  break;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 022744f04d9..3d4a9cf9cc2 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -898,6 +898,10 @@ (define_int_attr mve_cmp_op1 [
 		 (VCMPNEQ_M_N_F "ne")
 		 ])
 
+(define_int_iterator MVE_VPSELQ_F [
+		     VPSELQ_F
+		     ])
+
 (define_int_attr mve_insn [
 		 (VABAVQ_P_S "vabav") (VABAVQ_P_U "vabav")
 		 (VABAVQ_S "vabav") (VABAVQ_U "vabav")
@@ -1030,6 +1034,7 @@ (define_int_attr mve_insn [
 		 (VORRQ_M_N_S "vorr") (VORRQ_M_N_U "vorr")
 		 (VORRQ_M_S "vorr") (VORRQ_M_U "vorr") (VORRQ_M_F "vorr")
 		 (VORRQ_N_S "vorr") (VORRQ_N_U "vorr")
+		 (VPSELQ_S "vpsel") (VPSELQ_U "vpsel") (VPSELQ_F "vpsel")
 		 (VQABSQ_M_S "vqabs")
 		 (VQABSQ_S "vqabs")
 		 (VQADDQ_M_N_S "vqadd") (VQADDQ_M_N_U "vqadd")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b87798730a2..c6f9c0b9afb 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1969,7 +1969,7 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vpselq_u, vpselq_s])
 ;;
-(define_insn "@mve_vpselq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:MVE_1 0 "s_register_operand" "=w")
 	(unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w")
@@ -1978,7 +1978,7 @@ (define_insn "@mve_vpselq_<supf><mode>"
 	 VPSELQ))
   ]
   "TARGET_HAVE_MVE"
-  "vpsel %q0, %q1, %q2"
+  "<mve_insn>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2427,16 +2427,16 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
 ;;
 ;; [vpselq_f])
 ;;
-(define_insn "@mve_vpselq_f<mode>"
+(define_insn "@mve_<mve_insn>q_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
 		       (match_operand:MVE_0 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VPSELQ_F))
+	 MVE_VPSELQ_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpsel %q0, %q1, %q2"
+  "<mve_insn>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -6867,12 +6867,12 @@ (define_expand "vcond_mask_<mode><MVE_vpred>"
   switch (GET_MODE_CLASS (<MODE>mode))
     {
       case MODE_VECTOR_INT:
-	emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
-				   operands[1], operands[2], operands[3]));
+	emit_insn (gen_mve_q (VPSELQ_S,	VPSELQ_S, <MODE>mode, operands[0],
+			      operands[1], operands[2], operands[3]));
 	break;
       case MODE_VECTOR_FLOAT:
-	emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0],
-				     operands[1], operands[2], operands[3]));
+	emit_insn (gen_mve_q_f (VPSELQ_F, <MODE>mode, operands[0],
+				operands[1], operands[2], operands[3]));
 	break;
       default:
 	gcc_unreachable ();
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 19/26] arm: [MVE intrinsics] add vpsel shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (16 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 18/26] arm: [MVE intrinsics] factorize vpselq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 20/26] arm: [MVE intrinsics] rework vpselq Christophe Lyon
                   ` (7 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the vpsel shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (vpsel): New.
	* config/arm/arm-mve-builtins-shapes.h (vpsel): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 39 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 40 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 43532601fbe..012cf3ef4c0 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1706,6 +1706,45 @@ struct unary_widen_acc_def : public overloaded_base<0>
 };
 SHAPE (unary_widen_acc)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, mve_pred16_t)
+
+   i.e. a version of the standard ternary shape in which
+   the final argument is always a set of predicates.
+
+   Example: vpselq.
+   int16x8_t [__arm_]vpselq[_s16](int16x8_t a, int16x8_t b, mve_pred16_t p)  */
+struct vpsel_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,p", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    unsigned int last_arg = i;
+    for (i = 0; i < last_arg; i++)
+      if (!r.require_matching_vector_type (i, type))
+	return error_mark_node;
+
+    if (!r.require_vector_type (2 , VECTOR_TYPE_mve_pred16_t))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (vpsel)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index f67a484c146..6e818092a87 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -71,6 +71,7 @@ namespace arm_mve
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_widen;
     extern const function_shape *const unary_widen_acc;
+    extern const function_shape *const vpsel;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 20/26] arm: [MVE intrinsics] rework vpselq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (17 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 19/26] arm: [MVE intrinsics] add vpsel shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 21/26] arm: [MVE intrinsics] add ternary_lshift shape Christophe Lyon
                   ` (6 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vpselq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vpselq): New.
	* config/arm/arm-mve-builtins-base.def (vpselq): New.
	* config/arm/arm-mve-builtins-base.h (vpselq): New.
	* config/arm/arm_mve.h (vpselq): Remove.
	(vpselq_u8): Remove.
	(vpselq_s8): Remove.
	(vpselq_u16): Remove.
	(vpselq_s16): Remove.
	(vpselq_u32): Remove.
	(vpselq_s32): Remove.
	(vpselq_u64): Remove.
	(vpselq_s64): Remove.
	(vpselq_f16): Remove.
	(vpselq_f32): Remove.
	(__arm_vpselq_u8): Remove.
	(__arm_vpselq_s8): Remove.
	(__arm_vpselq_u16): Remove.
	(__arm_vpselq_s16): Remove.
	(__arm_vpselq_u32): Remove.
	(__arm_vpselq_s32): Remove.
	(__arm_vpselq_u64): Remove.
	(__arm_vpselq_s64): Remove.
	(__arm_vpselq_f16): Remove.
	(__arm_vpselq_f32): Remove.
	(__arm_vpselq): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm_mve.h                 | 177 -----------------------
 4 files changed, 4 insertions(+), 177 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 91d397d6208..b1440ca489e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -320,6 +320,7 @@ FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
+FUNCTION_WITHOUT_M_N (vpselq, VPSELQ)
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqabsq, VQABSQ)
 FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 8894f9e5372..de4c473f618 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -78,6 +78,7 @@ DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vpselq, vpsel, all_integer_with_64, none)
 DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vqdmladhq, ternary, all_signed, m_or_none)
@@ -172,6 +173,7 @@ DEF_MVE_FUNCTION (vminnmvq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vpselq, vpsel, all_float, none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_float, none)
 DEF_MVE_FUNCTION (vrev32q, unary, float16, mx_or_none)
 DEF_MVE_FUNCTION (vrev64q, unary, all_float, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index dabb52e800d..ec5b4fbffb9 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -94,6 +94,7 @@ extern const function_base *const vmulq;
 extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
 extern const function_base *const vorrq;
+extern const function_base *const vpselq;
 extern const function_base *const vqabsq;
 extern const function_base *const vqaddq;
 extern const function_base *const vqdmladhq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 747cdc3509d..72b50764963 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -54,7 +54,6 @@
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
-#define vpselq(__a, __b, __p) __arm_vpselq(__a, __b, __p)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
 #define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
@@ -341,26 +340,18 @@
 #define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
 #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
-#define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
-#define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
 #define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
 #define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
-#define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
-#define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
 #define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
 #define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
-#define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
-#define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
 #define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
 #define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
-#define vpselq_u64(__a, __b, __p) __arm_vpselq_u64(__a, __b, __p)
-#define vpselq_s64(__a, __b, __p) __arm_vpselq_s64(__a, __b, __p)
 #define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
@@ -373,7 +364,6 @@
 #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
 #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
 #define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p)
-#define vpselq_f16(__a, __b, __p) __arm_vpselq_f16(__a, __b, __p)
 #define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
 #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
 #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
@@ -386,7 +376,6 @@
 #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
 #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
 #define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p)
-#define vpselq_f32(__a, __b, __p) __arm_vpselq_f32(__a, __b, __p)
 #define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
@@ -1542,20 +1531,6 @@ __arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
   return __res;
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_uv16qi (__a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_sv16qi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -1584,20 +1559,6 @@ __arm_vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
   return __builtin_mve_vsliq_n_sv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_uv8hi (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_sv8hi (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -1626,20 +1587,6 @@ __arm_vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
   return __builtin_mve_vsliq_n_sv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_uv4si (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_sv4si (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -1668,20 +1615,6 @@ __arm_vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
   return __builtin_mve_vsliq_n_sv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_u64 (uint64x2_t __a, uint64x2_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_uv2di (__a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_s64 (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_sv2di (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -5546,13 +5479,6 @@ __arm_vcvtq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtq_m_from_f_sv8hi (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_fv8hf (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -5637,13 +5563,6 @@ __arm_vcvtq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
   return __builtin_mve_vcvtq_m_from_f_sv4si (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vpselq_fv4sf (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -7084,20 +7003,6 @@ __arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
  return __arm_vshlcq_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_s8 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
@@ -7126,20 +7031,6 @@ __arm_vsliq (int8x16_t __a, int8x16_t __b, const int __imm)
  return __arm_vsliq_n_s8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_s16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -7168,20 +7059,6 @@ __arm_vsliq (int16x8_t __a, int16x8_t __b, const int __imm)
  return __arm_vsliq_n_s16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_s32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -7210,20 +7087,6 @@ __arm_vsliq (int32x4_t __a, int32x4_t __b, const int __imm)
  return __arm_vsliq_n_s32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (uint64x2_t __a, uint64x2_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_u64 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (int64x2_t __a, int64x2_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_s64 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -10250,13 +10113,6 @@ __arm_vcvtq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
  return __arm_vcvtq_m_s16_f16 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_f16 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
@@ -10341,13 +10197,6 @@ __arm_vcvtq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
  return __arm_vcvtq_m_s32_f32 (__inactive, __a, __p);
 }
 
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vpselq (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vpselq_f32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
@@ -11613,20 +11462,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtpq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtpq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
-#define __arm_vpselq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vpselq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vpselq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vpselq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int64x2_t]: __arm_vpselq_s64 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int64x2_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vpselq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vpselq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vpselq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint64x2_t]: __arm_vpselq_u64 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint64x2_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vpselq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vpselq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -12307,18 +12142,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vpselq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vpselq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vpselq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vpselq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int64x2_t]: __arm_vpselq_s64 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int64x2_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vpselq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vpselq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vpselq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint64x2_t]: __arm_vpselq_u64 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint64x2_t), p2));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 21/26] arm: [MVE intrinsics] add ternary_lshift shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (18 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 20/26] arm: [MVE intrinsics] rework vpselq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 22/26] arm: [MVE intrinsics] factorize vsliq Christophe Lyon
                   ` (5 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the ternary_lshift shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (ternary_lshift): New.
	* config/arm/arm-mve-builtins-shapes.h (ternary_lshift): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 38 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 39 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 012cf3ef4c0..a8e94b4f8f8 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1372,6 +1372,44 @@ struct ternary_def : public overloaded_base<0>
 };
 SHAPE (ternary)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, const int)
+
+   i.e. ternary operations that operate on a pair of vectors of the
+   same type as the destination, and take a third integer argument.
+
+   Check that 'imm' is in the [0..#bits-1] range.
+
+   Example: vsliq.
+   int16x8_t [__arm_]vsliq[_n_s16](int16x8_t a, int16x8_t b, const int imm)
+   int16x8_t [__arm_]vsliq_m[_n_s16](int16x8_t a, int16x8_t b, const int imm, mve_pred16_t p)  */
+struct ternary_lshift_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    if (c.mode_suffix_id != MODE_n)
+      return true;
+
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 0, bits - 1);
+  }
+};
+SHAPE (ternary_lshift)
+
 /* <T0>_t vfoo[_n_t0](<T0>_t, <T0>_t, <S0>_t)
 
    i.e. the standard shape for ternary operations that operate on a
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 6e818092a87..73375186d82 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -61,6 +61,7 @@ namespace arm_mve
     extern const function_shape *const inherent;
     extern const function_shape *const mvn;
     extern const function_shape *const ternary;
+    extern const function_shape *const ternary_lshift;
     extern const function_shape *const ternary_n;
     extern const function_shape *const ternary_opt_n;
     extern const function_shape *const unary;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 22/26] arm: [MVE intrinsics] factorize vsliq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (19 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 21/26] arm: [MVE intrinsics] add ternary_lshift shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 23/26] arm: [MVE intrinsics] rework vsliq Christophe Lyon
                   ` (4 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vsliq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn>): Add vsli.
	* config/arm/mve.md (mve_vsliq_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_n_<supf><mode>): ... this.
	(mve_vsliq_m_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3d4a9cf9cc2..7e7219033cf 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1181,6 +1181,8 @@ (define_int_attr mve_insn [
 		 (VSHRNTQ_N_S "vshrnt") (VSHRNTQ_N_U "vshrnt")
 		 (VSHRQ_M_N_S "vshr") (VSHRQ_M_N_U "vshr")
 		 (VSHRQ_N_S "vshr") (VSHRQ_N_U "vshr")
+		 (VSLIQ_M_N_S "vsli") (VSLIQ_M_N_U "vsli")
+		 (VSLIQ_N_S "vsli") (VSLIQ_N_U "vsli")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index c6f9c0b9afb..a1c2cad9d2e 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2058,7 +2058,7 @@ (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
 ;;
 ;; [vsliq_n_u, vsliq_n_s])
 ;;
-(define_insn "mve_vsliq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2067,7 +2067,7 @@ (define_insn "mve_vsliq_n_<supf><mode>"
 	 VSLIQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vsli.%#<V_sz_elem>\t%q0, %q2, %3"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2960,7 +2960,7 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
 ;;
 ;; [vsliq_m_n_u, vsliq_m_n_s])
 ;;
-(define_insn "mve_vsliq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
    [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2970,7 +2970,7 @@ (define_insn "mve_vsliq_m_n_<supf><mode>"
 	 VSLIQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vslit.%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 23/26] arm: [MVE intrinsics] rework vsliq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (20 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 22/26] arm: [MVE intrinsics] factorize vsliq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 24/26] arm: [MVE intrinsics] add ternary_rshift shape Christophe Lyon
                   ` (3 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vsliq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vsliq): New.
	* config/arm/arm-mve-builtins-base.def (vsliq): New.
	* config/arm/arm-mve-builtins-base.h (vsliq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vsliq.
	* config/arm/arm_mve.h (vsliq): Remove.
	(vsliq_m): Remove.
	(vsliq_n_u8): Remove.
	(vsliq_n_s8): Remove.
	(vsliq_n_u16): Remove.
	(vsliq_n_s16): Remove.
	(vsliq_n_u32): Remove.
	(vsliq_n_s32): Remove.
	(vsliq_m_n_s8): Remove.
	(vsliq_m_n_s32): Remove.
	(vsliq_m_n_s16): Remove.
	(vsliq_m_n_u8): Remove.
	(vsliq_m_n_u32): Remove.
	(vsliq_m_n_u16): Remove.
	(__arm_vsliq_n_u8): Remove.
	(__arm_vsliq_n_s8): Remove.
	(__arm_vsliq_n_u16): Remove.
	(__arm_vsliq_n_s16): Remove.
	(__arm_vsliq_n_u32): Remove.
	(__arm_vsliq_n_s32): Remove.
	(__arm_vsliq_m_n_s8): Remove.
	(__arm_vsliq_m_n_s32): Remove.
	(__arm_vsliq_m_n_s16): Remove.
	(__arm_vsliq_m_n_u8): Remove.
	(__arm_vsliq_m_n_u32): Remove.
	(__arm_vsliq_m_n_u16): Remove.
	(__arm_vsliq): Remove.
	(__arm_vsliq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm-mve-builtins.cc       |   3 +-
 gcc/config/arm/arm_mve.h                 | 212 -----------------------
 5 files changed, 5 insertions(+), 213 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index b1440ca489e..873c7d365f3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -387,6 +387,7 @@ FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
 FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
 FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
+FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index de4c473f618..2d1b87b90c3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -140,6 +140,7 @@ DEF_MVE_FUNCTION (vshlq, binary_lshift_r, all_integer, m_or_none) // "_r" forms
 DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index ec5b4fbffb9..84fff0f6d0e 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -160,6 +160,7 @@ extern const function_base *const vshlq;
 extern const function_base *const vshrnbq;
 extern const function_base *const vshrntq;
 extern const function_base *const vshrq;
+extern const function_base *const vsliq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 87fcbc31f2f..f5056bdd1bb 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -719,7 +719,8 @@ function_instance::has_inactive_argument () const
       || base == functions::vrshrnbq
       || base == functions::vrshrntq
       || base == functions::vshrnbq
-      || base == functions::vshrntq)
+      || base == functions::vshrntq
+      || base == functions::vsliq)
     return false;
 
   return true;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 72b50764963..72177f9c53e 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -55,7 +55,6 @@
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
-#define vsliq(__a, __b, __imm) __arm_vsliq(__a, __b, __imm)
 #define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
@@ -65,7 +64,6 @@
 #define vmullbq_int_m(__inactive, __a, __b, __p) __arm_vmullbq_int_m(__inactive, __a, __b, __p)
 #define vmulltq_int_m(__inactive, __a, __b, __p) __arm_vmulltq_int_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
-#define vsliq_m(__a, __b, __imm, __p) __arm_vsliq_m(__a, __b, __imm, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
 #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
@@ -341,17 +339,11 @@
 #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
 #define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
-#define vsliq_n_u8(__a, __b,  __imm) __arm_vsliq_n_u8(__a, __b,  __imm)
 #define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
-#define vsliq_n_s8(__a, __b,  __imm) __arm_vsliq_n_s8(__a, __b,  __imm)
 #define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
-#define vsliq_n_u16(__a, __b,  __imm) __arm_vsliq_n_u16(__a, __b,  __imm)
 #define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
-#define vsliq_n_s16(__a, __b,  __imm) __arm_vsliq_n_s16(__a, __b,  __imm)
 #define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
-#define vsliq_n_u32(__a, __b,  __imm) __arm_vsliq_n_u32(__a, __b,  __imm)
 #define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
-#define vsliq_n_s32(__a, __b,  __imm) __arm_vsliq_n_s32(__a, __b,  __imm)
 #define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
@@ -432,12 +424,6 @@
 #define vornq_m_u8(__inactive, __a, __b, __p) __arm_vornq_m_u8(__inactive, __a, __b, __p)
 #define vornq_m_u32(__inactive, __a, __b, __p) __arm_vornq_m_u32(__inactive, __a, __b, __p)
 #define vornq_m_u16(__inactive, __a, __b, __p) __arm_vornq_m_u16(__inactive, __a, __b, __p)
-#define vsliq_m_n_s8(__a, __b,  __imm, __p) __arm_vsliq_m_n_s8(__a, __b,  __imm, __p)
-#define vsliq_m_n_s32(__a, __b,  __imm, __p) __arm_vsliq_m_n_s32(__a, __b,  __imm, __p)
-#define vsliq_m_n_s16(__a, __b,  __imm, __p) __arm_vsliq_m_n_s16(__a, __b,  __imm, __p)
-#define vsliq_m_n_u8(__a, __b,  __imm, __p) __arm_vsliq_m_n_u8(__a, __b,  __imm, __p)
-#define vsliq_m_n_u32(__a, __b,  __imm, __p) __arm_vsliq_m_n_u32(__a, __b,  __imm, __p)
-#define vsliq_m_n_u16(__a, __b,  __imm, __p) __arm_vsliq_m_n_u16(__a, __b,  __imm, __p)
 #define vmullbq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p8(__inactive, __a, __b, __p)
 #define vmullbq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p16(__inactive, __a, __b, __p)
 #define vmulltq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p8(__inactive, __a, __b, __p)
@@ -1538,13 +1524,6 @@ __arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
   return __builtin_mve_vsriq_n_uv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_uv16qi (__a, __b, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -1552,13 +1531,6 @@ __arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
   return __builtin_mve_vsriq_n_sv16qi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_sv16qi (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -1566,13 +1538,6 @@ __arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
   return __builtin_mve_vsriq_n_uv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_uv8hi (__a, __b, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -1580,13 +1545,6 @@ __arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
   return __builtin_mve_vsriq_n_sv8hi (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_sv8hi (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -1594,13 +1552,6 @@ __arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
   return __builtin_mve_vsriq_n_uv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_uv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -1608,13 +1559,6 @@ __arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
   return __builtin_mve_vsriq_n_sv4si (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vsliq_n_sv4si (__a, __b, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -1951,48 +1895,6 @@ __arm_vornq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vornq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_sv16qi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_uv16qi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsliq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_poly_m_p8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -7010,13 +6912,6 @@ __arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
  return __arm_vsriq_n_u8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (uint8x16_t __a, uint8x16_t __b, const int __imm)
-{
- return __arm_vsliq_n_u8 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
@@ -7024,13 +6919,6 @@ __arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
  return __arm_vsriq_n_s8 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (int8x16_t __a, int8x16_t __b, const int __imm)
-{
- return __arm_vsliq_n_s8 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
@@ -7038,13 +6926,6 @@ __arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
  return __arm_vsriq_n_u16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsliq_n_u16 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int16x8_t __a, int16x8_t __b, const int __imm)
@@ -7052,13 +6933,6 @@ __arm_vsriq (int16x8_t __a, int16x8_t __b, const int __imm)
  return __arm_vsriq_n_s16 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (int16x8_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vsliq_n_s16 (__a, __b, __imm);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
@@ -7066,13 +6940,6 @@ __arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
  return __arm_vsriq_n_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (uint32x4_t __a, uint32x4_t __b, const int __imm)
-{
- return __arm_vsliq_n_u32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
@@ -7080,13 +6947,6 @@ __arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
  return __arm_vsriq_n_s32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq (int32x4_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vsliq_n_s32 (__a, __b, __imm);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
@@ -7423,48 +7283,6 @@ __arm_vornq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vornq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_s8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsliq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsliq_m_n_u16 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmullbq_poly_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -11356,16 +11174,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsliq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsliq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsliq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsliq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -12122,16 +11930,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vsliq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsliq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsliq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsliq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsliq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vsriq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -12714,16 +12512,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vsliq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsliq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsliq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsliq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsliq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsliq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t),  p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsliq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t),  p2, p3));})
-
 #define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 24/26] arm: [MVE intrinsics] add ternary_rshift shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (21 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 23/26] arm: [MVE intrinsics] rework vsliq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 25/26] arm: [MVE intrinsics] factorize vsriq Christophe Lyon
                   ` (2 subsequent siblings)
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the ternary_rshift shape description.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (ternary_rshift): New.
	* config/arm/arm-mve-builtins-shapes.h (ternary_rshift): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 38 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 39 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index a8e94b4f8f8..d4c30ed2e8c 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1467,6 +1467,44 @@ struct ternary_opt_n_def : public overloaded_base<0>
 };
 SHAPE (ternary_opt_n)
 
+/* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, const int)
+
+   i.e. ternary operations that operate on a pair of vectors of the
+   same type as the destination, and take a third integer argument.
+
+   Check that 'imm' is in the [1..#bits] range.
+
+   Example: vsriq.
+   int8x16_t [__arm_]vsriq[_n_s8](int8x16_t a, int8x16_t b, const int imm)
+   int8x16_t [__arm_]vsriq_m[_n_s8](int8x16_t a, int8x16_t b, const int imm, mve_pred16_t p)  */
+struct ternary_rshift_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2, 1);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    if (c.mode_suffix_id != MODE_n)
+      return true;
+
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (2, 1, bits);
+  }
+};
+SHAPE (ternary_rshift)
+
 /* <T0>_t vfoo[_t0](<T0>_t)
 
    i.e. the standard shape for unary operations that operate on
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 73375186d82..a1842f5845c 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -64,6 +64,7 @@ namespace arm_mve
     extern const function_shape *const ternary_lshift;
     extern const function_shape *const ternary_n;
     extern const function_shape *const ternary_opt_n;
+    extern const function_shape *const ternary_rshift;
     extern const function_shape *const unary;
     extern const function_shape *const unary_acc;
     extern const function_shape *const unary_convert;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 25/26] arm: [MVE intrinsics] factorize vsriq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (22 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 24/26] arm: [MVE intrinsics] add ternary_rshift shape Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12  9:38 ` [PATCH 26/26] arm: [MVE intrinsics] rework vsriq Christophe Lyon
  2023-05-12 10:17 ` [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Kyrylo Tkachov
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vsriq builtins so that they use parameterized names.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vsri.
	* config/arm/mve.md (mve_vsriq_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_n_<supf><mode>): .,. this.
	(mve_vsriq_m_n_<supf><mode>): Rename into ...
	(@mve_<mve_insn>q_m_n_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md | 2 ++
 gcc/config/arm/mve.md       | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 7e7219033cf..597c1dae640 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1183,6 +1183,8 @@ (define_int_attr mve_insn [
 		 (VSHRQ_N_S "vshr") (VSHRQ_N_U "vshr")
 		 (VSLIQ_M_N_S "vsli") (VSLIQ_M_N_U "vsli")
 		 (VSLIQ_N_S "vsli") (VSLIQ_N_U "vsli")
+		 (VSRIQ_M_N_S "vsri") (VSRIQ_M_N_U "vsri")
+		 (VSRIQ_N_S "vsri") (VSRIQ_N_U "vsri")
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a1c2cad9d2e..85d701a66b3 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -2074,7 +2074,7 @@ (define_insn "@mve_<mve_insn>q_n_<supf><mode>"
 ;;
 ;; [vsriq_n_u, vsriq_n_s])
 ;;
-(define_insn "mve_vsriq_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2083,7 +2083,7 @@ (define_insn "mve_vsriq_n_<supf><mode>"
 	 VSRIQ_N))
   ]
   "TARGET_HAVE_MVE"
-  "vsri.%#<V_sz_elem>\t%q0, %q2, %3"
+  "<mve_insn>.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2641,7 +2641,7 @@ (define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
 ;;
 ;; [vsriq_m_n_s, vsriq_m_n_u])
 ;;
-(define_insn "mve_vsriq_m_n_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_<supf><mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
@@ -2651,7 +2651,7 @@ (define_insn "mve_vsriq_m_n_<supf><mode>"
 	 VSRIQ_M_N))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vsrit.%#<V_sz_elem>\t%q0, %q2, %3"
+  "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %q2, %3"
   [(set_attr "type" "mve_move")
    (set_attr "length" "8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH 26/26] arm: [MVE intrinsics] rework vsriq
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (23 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 25/26] arm: [MVE intrinsics] factorize vsriq Christophe Lyon
@ 2023-05-12  9:38 ` Christophe Lyon
  2023-05-12 10:17 ` [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Kyrylo Tkachov
  25 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12  9:38 UTC (permalink / raw)
  To: gcc-patches, kyrylo.tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vsriq using the new MVE builtins framework.

2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vsriq): New.
	* config/arm/arm-mve-builtins-base.def (vsriq): New.
	* config/arm/arm-mve-builtins-base.h (vsriq): New.
	* config/arm/arm-mve-builtins.cc
	(function_instance::has_inactive_argument): Handle vsriq.
	* config/arm/arm_mve.h (vsriq): Remove.
	(vsriq_m): Remove.
	(vsriq_n_u8): Remove.
	(vsriq_n_s8): Remove.
	(vsriq_n_u16): Remove.
	(vsriq_n_s16): Remove.
	(vsriq_n_u32): Remove.
	(vsriq_n_s32): Remove.
	(vsriq_m_n_s8): Remove.
	(vsriq_m_n_u8): Remove.
	(vsriq_m_n_s16): Remove.
	(vsriq_m_n_u16): Remove.
	(vsriq_m_n_s32): Remove.
	(vsriq_m_n_u32): Remove.
	(__arm_vsriq_n_u8): Remove.
	(__arm_vsriq_n_s8): Remove.
	(__arm_vsriq_n_u16): Remove.
	(__arm_vsriq_n_s16): Remove.
	(__arm_vsriq_n_u32): Remove.
	(__arm_vsriq_n_s32): Remove.
	(__arm_vsriq_m_n_s8): Remove.
	(__arm_vsriq_m_n_u8): Remove.
	(__arm_vsriq_m_n_s16): Remove.
	(__arm_vsriq_m_n_u16): Remove.
	(__arm_vsriq_m_n_s32): Remove.
	(__arm_vsriq_m_n_u32): Remove.
	(__arm_vsriq): Remove.
	(__arm_vsriq_m): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   1 +
 gcc/config/arm/arm-mve-builtins-base.def |   1 +
 gcc/config/arm/arm-mve-builtins-base.h   |   1 +
 gcc/config/arm/arm-mve-builtins.cc       |   3 +-
 gcc/config/arm/arm_mve.h                 | 212 -----------------------
 5 files changed, 5 insertions(+), 213 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 873c7d365f3..af02397f1c4 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -388,6 +388,7 @@ FUNCTION_ONLY_N_NO_F (vshrnbq, VSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
 FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
 FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
+FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ)
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 2d1b87b90c3..ee08d063407 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -141,6 +141,7 @@ DEF_MVE_FUNCTION (vshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
+DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 84fff0f6d0e..942c8587446 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -161,6 +161,7 @@ extern const function_base *const vshrnbq;
 extern const function_base *const vshrntq;
 extern const function_base *const vshrq;
 extern const function_base *const vsliq;
+extern const function_base *const vsriq;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index f5056bdd1bb..7033e41a571 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -720,7 +720,8 @@ function_instance::has_inactive_argument () const
       || base == functions::vrshrntq
       || base == functions::vshrnbq
       || base == functions::vshrntq
-      || base == functions::vsliq)
+      || base == functions::vsliq
+      || base == functions::vsriq)
     return false;
 
   return true;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 72177f9c53e..1774e6eca2b 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -54,8 +54,6 @@
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
-#define vsriq(__a, __b, __imm) __arm_vsriq(__a, __b, __imm)
-#define vsriq_m(__a, __b, __imm, __p) __arm_vsriq_m(__a, __b, __imm, __p)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vcaddq_rot270_m(__inactive, __a, __b, __p) __arm_vcaddq_rot270_m(__inactive, __a, __b, __p)
 #define vcaddq_rot90_m(__inactive, __a, __b, __p) __arm_vcaddq_rot90_m(__inactive, __a, __b, __p)
@@ -338,12 +336,6 @@
 #define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
 #define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
 #define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
-#define vsriq_n_u8(__a, __b,  __imm) __arm_vsriq_n_u8(__a, __b,  __imm)
-#define vsriq_n_s8(__a, __b,  __imm) __arm_vsriq_n_s8(__a, __b,  __imm)
-#define vsriq_n_u16(__a, __b,  __imm) __arm_vsriq_n_u16(__a, __b,  __imm)
-#define vsriq_n_s16(__a, __b,  __imm) __arm_vsriq_n_s16(__a, __b,  __imm)
-#define vsriq_n_u32(__a, __b,  __imm) __arm_vsriq_n_u32(__a, __b,  __imm)
-#define vsriq_n_s32(__a, __b,  __imm) __arm_vsriq_n_s32(__a, __b,  __imm)
 #define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
 #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
 #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
@@ -372,16 +364,10 @@
 #define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
 #define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
 #define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
-#define vsriq_m_n_s8(__a, __b,  __imm, __p) __arm_vsriq_m_n_s8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
-#define vsriq_m_n_u8(__a, __b,  __imm, __p) __arm_vsriq_m_n_u8(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
-#define vsriq_m_n_s16(__a, __b,  __imm, __p) __arm_vsriq_m_n_s16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
-#define vsriq_m_n_u16(__a, __b,  __imm, __p) __arm_vsriq_m_n_u16(__a, __b,  __imm, __p)
 #define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
-#define vsriq_m_n_s32(__a, __b,  __imm, __p) __arm_vsriq_m_n_s32(__a, __b,  __imm, __p)
-#define vsriq_m_n_u32(__a, __b,  __imm, __p) __arm_vsriq_m_n_u32(__a, __b,  __imm, __p)
 #define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
 #define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
 #define vbicq_m_s16(__inactive, __a, __b, __p) __arm_vbicq_m_s16(__inactive, __a, __b, __p)
@@ -1517,90 +1503,6 @@ __arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
   return __res;
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_uv16qi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_sv16qi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_uv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_sv8hi (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_uv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm)
-{
-  return __builtin_mve_vsriq_n_sv4si (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_s8 (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_sv16qi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_uv16qi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_s16 (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_sv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_uv8hi (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_s32 (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_sv4si (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vsriq_m_n_uv4si (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -6905,90 +6807,6 @@ __arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
  return __arm_vshlcq_u32 (__a, __b, __imm);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (uint8x16_t __a, uint8x16_t __b, const int __imm)
-{
- return __arm_vsriq_n_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (int8x16_t __a, int8x16_t __b, const int __imm)
-{
- return __arm_vsriq_n_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (uint16x8_t __a, uint16x8_t __b, const int __imm)
-{
- return __arm_vsriq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (int16x8_t __a, int16x8_t __b, const int __imm)
-{
- return __arm_vsriq_n_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (uint32x4_t __a, uint32x4_t __b, const int __imm)
-{
- return __arm_vsriq_n_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq (int32x4_t __a, int32x4_t __b, const int __imm)
-{
- return __arm_vsriq_n_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (int8x16_t __a, int8x16_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_s8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (uint8x16_t __a, uint8x16_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (int16x8_t __a, int16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (uint16x8_t __a, uint16x8_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (int32x4_t __a, int32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsriq_m (uint32x4_t __a, uint32x4_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vsriq_m_n_u32 (__a, __b, __imm, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -11164,16 +10982,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vsriq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsriq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsriq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsriq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsriq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -11930,16 +11738,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
 
-#define __arm_vsriq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsriq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsriq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsriq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsriq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
 #define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -12512,16 +12310,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce1(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vsriq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vsriq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vsriq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsriq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vsriq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vsriq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsriq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
 #define __arm_vhcaddq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape
  2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
                   ` (24 preceding siblings ...)
  2023-05-12  9:38 ` [PATCH 26/26] arm: [MVE intrinsics] rework vsriq Christophe Lyon
@ 2023-05-12 10:17 ` Kyrylo Tkachov
  2023-05-12 10:38   ` Christophe Lyon
  25 siblings, 1 reply; 28+ messages in thread
From: Kyrylo Tkachov @ 2023-05-12 10:17 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford
  Cc: Christophe Lyon



> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@arm.com>
> Sent: Friday, May 12, 2023 10:39 AM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
> Subject: [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape
> 
> This patch adds the binary_widen_opt_n shape description.

This series is ok with one style nit fixed in this first patch...

> 
> 2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>
> 
> 	gcc/
> 	* config/arm/arm-mve-builtins-shapes.cc (binary_widen_opt_n):
> New.
> 	* config/arm/arm-mve-builtins-shapes.h (binary_widen_opt_n): New.
> ---
>  gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++++++++++++++++++++++
>  gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>  2 files changed, 50 insertions(+)
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
> mve-builtins-shapes.cc
> index 5a299a272f5..ee4bc3f8ea4 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
> @@ -1098,6 +1098,55 @@ struct binary_widen_n_def : public
> overloaded_base<0>
>  };
>  SHAPE (binary_widen_n)
> 
> +/* <T0:twice>_t vfoo[_t0](<T0>_t, <T0>_t)
> +   <T0:twice>_t vfoo[_n_t0](<T0>_t, <S0>_t)
> +
> +   Example: vqdmullbq.
> +   int32x4_t [__arm_]vqdmulltq[_n_s16](int16x8_t a, int16_t b)
> +   int32x4_t [__arm_]vqdmulltq_m[_n_s16](int32x4_t inactive, int16x8_t a,
> int16_t b, mve_pred16_t p)
> +   int32x4_t [__arm_]vqdmulltq[_s16](int16x8_t a, int16x8_t b)
> +   int32x4_t [__arm_]vqdmulltq_m[_s16](int32x4_t inactive, int16x8_t a,
> int16x8_t b, mve_pred16_t p)  */
> +struct binary_widen_opt_n_def : public overloaded_base<0>
> +{
> +  void
> +  build (function_builder &b, const function_group_info &group,
> +	 bool preserve_user_namespace) const override
> +  {
> +    b.add_overloaded_functions (group, MODE_none,
> preserve_user_namespace);
> +    build_all (b, "vw0,v0,v0", group, MODE_none,
> preserve_user_namespace);
> +    build_all (b, "vw0,v0,s0", group, MODE_n, preserve_user_namespace);
> +  }
> +
> +  tree
> +  resolve (function_resolver &r) const override
> +  {
> +    unsigned int i, nargs;
> +    type_suffix_index type;
> +    if (!r.check_gp_argument (2, i, nargs)
> +	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
> +      return error_mark_node;
> +
> +    type_suffix_index wide_suffix
> +      = find_type_suffix (type_suffixes[type].tclass,
> +			  type_suffixes[type].element_bits * 2);
> +
> +    /* Skip last argument, may be scalar, will be checked below by
> +       finish_opt_n_resolution.  */
> +    unsigned int last_arg = i--;
> +    for (; i > 0; i--)
> +      if (!r.require_matching_vector_type (i, type))
> +	return error_mark_node;
> +
> +    /* Check the inactive argument has the wide type.  */
> +    if ((r.pred == PRED_m)
> +	&& (r.infer_vector_type (0) != wide_suffix))
> +    return r.report_no_such_form (type);

Indentation is off here I think.
Thanks,
Kyrill

> +
> +    return r.finish_opt_n_resolution (last_arg, 0, type);
> +  }
> +};
> +SHAPE (binary_widen_opt_n)
> +
>  /* Shape for comparison operations that operate on
>     uniform types.
> 
> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
> mve-builtins-shapes.h
> index a28cd6a1547..07b12b4af68 100644
> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
> @@ -53,6 +53,7 @@ namespace arm_mve
>      extern const function_shape *const binary_rshift_narrow;
>      extern const function_shape *const binary_rshift_narrow_unsigned;
>      extern const function_shape *const binary_widen_n;
> +    extern const function_shape *const binary_widen_opt_n;
>      extern const function_shape *const cmp;
>      extern const function_shape *const create;
>      extern const function_shape *const inherent;
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape
  2023-05-12 10:17 ` [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Kyrylo Tkachov
@ 2023-05-12 10:38   ` Christophe Lyon
  0 siblings, 0 replies; 28+ messages in thread
From: Christophe Lyon @ 2023-05-12 10:38 UTC (permalink / raw)
  To: Kyrylo Tkachov, gcc-patches, Richard Earnshaw, Richard Sandiford



On 5/12/23 12:17, Kyrylo Tkachov wrote:
> 
> 
>> -----Original Message-----
>> From: Christophe Lyon <christophe.lyon@arm.com>
>> Sent: Friday, May 12, 2023 10:39 AM
>> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
>> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
>> <Richard.Sandiford@arm.com>
>> Cc: Christophe Lyon <Christophe.Lyon@arm.com>
>> Subject: [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape
>>
>> This patch adds the binary_widen_opt_n shape description.
> 
> This series is ok with one style nit fixed in this first patch...
> 
>>
>> 2022-12-12  Christophe Lyon  <christophe.lyon@arm.com>
>>
>> 	gcc/
>> 	* config/arm/arm-mve-builtins-shapes.cc (binary_widen_opt_n):
>> New.
>> 	* config/arm/arm-mve-builtins-shapes.h (binary_widen_opt_n): New.
>> ---
>>   gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++++++++++++++++++++++
>>   gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
>>   2 files changed, 50 insertions(+)
>>
>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-
>> mve-builtins-shapes.cc
>> index 5a299a272f5..ee4bc3f8ea4 100644
>> --- a/gcc/config/arm/arm-mve-builtins-shapes.cc
>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
>> @@ -1098,6 +1098,55 @@ struct binary_widen_n_def : public
>> overloaded_base<0>
>>   };
>>   SHAPE (binary_widen_n)
>>
>> +/* <T0:twice>_t vfoo[_t0](<T0>_t, <T0>_t)
>> +   <T0:twice>_t vfoo[_n_t0](<T0>_t, <S0>_t)
>> +
>> +   Example: vqdmullbq.
>> +   int32x4_t [__arm_]vqdmulltq[_n_s16](int16x8_t a, int16_t b)
>> +   int32x4_t [__arm_]vqdmulltq_m[_n_s16](int32x4_t inactive, int16x8_t a,
>> int16_t b, mve_pred16_t p)
>> +   int32x4_t [__arm_]vqdmulltq[_s16](int16x8_t a, int16x8_t b)
>> +   int32x4_t [__arm_]vqdmulltq_m[_s16](int32x4_t inactive, int16x8_t a,
>> int16x8_t b, mve_pred16_t p)  */
>> +struct binary_widen_opt_n_def : public overloaded_base<0>
>> +{
>> +  void
>> +  build (function_builder &b, const function_group_info &group,
>> +	 bool preserve_user_namespace) const override
>> +  {
>> +    b.add_overloaded_functions (group, MODE_none,
>> preserve_user_namespace);
>> +    build_all (b, "vw0,v0,v0", group, MODE_none,
>> preserve_user_namespace);
>> +    build_all (b, "vw0,v0,s0", group, MODE_n, preserve_user_namespace);
>> +  }
>> +
>> +  tree
>> +  resolve (function_resolver &r) const override
>> +  {
>> +    unsigned int i, nargs;
>> +    type_suffix_index type;
>> +    if (!r.check_gp_argument (2, i, nargs)
>> +	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
>> +      return error_mark_node;
>> +
>> +    type_suffix_index wide_suffix
>> +      = find_type_suffix (type_suffixes[type].tclass,
>> +			  type_suffixes[type].element_bits * 2);
>> +
>> +    /* Skip last argument, may be scalar, will be checked below by
>> +       finish_opt_n_resolution.  */
>> +    unsigned int last_arg = i--;
>> +    for (; i > 0; i--)
>> +      if (!r.require_matching_vector_type (i, type))
>> +	return error_mark_node;
>> +
>> +    /* Check the inactive argument has the wide type.  */
>> +    if ((r.pred == PRED_m)
>> +	&& (r.infer_vector_type (0) != wide_suffix))
>> +    return r.report_no_such_form (type);
> 
> Indentation is off here I think.

Indeed!

Thanks,

Christophe

> Thanks,
> Kyrill
> 
>> +
>> +    return r.finish_opt_n_resolution (last_arg, 0, type);
>> +  }
>> +};
>> +SHAPE (binary_widen_opt_n)
>> +
>>   /* Shape for comparison operations that operate on
>>      uniform types.
>>
>> diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-
>> mve-builtins-shapes.h
>> index a28cd6a1547..07b12b4af68 100644
>> --- a/gcc/config/arm/arm-mve-builtins-shapes.h
>> +++ b/gcc/config/arm/arm-mve-builtins-shapes.h
>> @@ -53,6 +53,7 @@ namespace arm_mve
>>       extern const function_shape *const binary_rshift_narrow;
>>       extern const function_shape *const binary_rshift_narrow_unsigned;
>>       extern const function_shape *const binary_widen_n;
>> +    extern const function_shape *const binary_widen_opt_n;
>>       extern const function_shape *const cmp;
>>       extern const function_shape *const create;
>>       extern const function_shape *const inherent;
>> --
>> 2.34.1
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2023-05-12 10:39 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-12  9:38 [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 02/26] arm: [MVE intrinsics] factorize vqdmullbq vqdmulltq Christophe Lyon
2023-05-12  9:38 ` [PATCH 03/26] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-12  9:38 ` [PATCH 04/26] arm: [MVE intrinsics] factorize vrmlaldavhaq vrmlaldavhaxq vrmlsldavhaq vrmlsldavhaxq Christophe Lyon
2023-05-12  9:38 ` [PATCH 05/26] arm: [MVE intrinsics] rework " Christophe Lyon
2023-05-12  9:38 ` [PATCH 06/26] arm: [MVE intrinsics] add binary_lshift_unsigned shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 07/26] arm: [MVE intrinsics] factorize vqshluq Christophe Lyon
2023-05-12  9:38 ` [PATCH 08/26] arm: [MVE intrinsics] rework vqshluq Christophe Lyon
2023-05-12  9:38 ` [PATCH 09/26] arm: [MVE intrinsics] add binary_imm32 shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 10/26] arm: [MVE intrinsics] factorize vrbsrq Christophe Lyon
2023-05-12  9:38 ` [PATCH 11/26] arm: [MVE intrinsics] rework vbrsrq Christophe Lyon
2023-05-12  9:38 ` [PATCH 12/26] arm: [MVE intrinsics] add mvn shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 13/26] arm: [MVE intrinsics] factorize vmvnq Christophe Lyon
2023-05-12  9:38 ` [PATCH 14/26] arm: [MVE intrinsics] rework vmvnq Christophe Lyon
2023-05-12  9:38 ` [PATCH 15/26] arm: [MVE intrinsics] add ternary_opt_n shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 16/26] arm: [MVE intrinsics] factorize vfmaq vfmsq vfmasq Christophe Lyon
2023-05-12  9:38 ` [PATCH 17/26] arm: [MVE intrinsics] rework vfmaq vfmasq vfmsq Christophe Lyon
2023-05-12  9:38 ` [PATCH 18/26] arm: [MVE intrinsics] factorize vpselq Christophe Lyon
2023-05-12  9:38 ` [PATCH 19/26] arm: [MVE intrinsics] add vpsel shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 20/26] arm: [MVE intrinsics] rework vpselq Christophe Lyon
2023-05-12  9:38 ` [PATCH 21/26] arm: [MVE intrinsics] add ternary_lshift shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 22/26] arm: [MVE intrinsics] factorize vsliq Christophe Lyon
2023-05-12  9:38 ` [PATCH 23/26] arm: [MVE intrinsics] rework vsliq Christophe Lyon
2023-05-12  9:38 ` [PATCH 24/26] arm: [MVE intrinsics] add ternary_rshift shape Christophe Lyon
2023-05-12  9:38 ` [PATCH 25/26] arm: [MVE intrinsics] factorize vsriq Christophe Lyon
2023-05-12  9:38 ` [PATCH 26/26] arm: [MVE intrinsics] rework vsriq Christophe Lyon
2023-05-12 10:17 ` [PATCH 01/26] arm: [MVE intrinsics] add binary_widen_opt_n shape Kyrylo Tkachov
2023-05-12 10:38   ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).