[PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq
@ 2023-08-14 18:34 Christophe Lyon
  2023-08-14 18:34 ` [PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull Christophe Lyon
                   ` (8 more replies)
  0 siblings, 9 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Factorize vmullbq, vmulltq so that they use the same parameterized
names.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (mve_insn): Add vmullb, vmullt.
	(isu): Add VMULLBQ_INT_S, VMULLBQ_INT_U, VMULLTQ_INT_S,
	VMULLTQ_INT_U.
	(supf): Add VMULLBQ_POLY_P, VMULLTQ_POLY_P, VMULLBQ_POLY_M_P,
	VMULLTQ_POLY_M_P.
	(VMULLBQ_INT, VMULLTQ_INT, VMULLBQ_INT_M, VMULLTQ_INT_M): Delete.
	(VMULLxQ_INT, VMULLxQ_POLY, VMULLxQ_INT_M, VMULLxQ_POLY_M): New.
	* config/arm/mve.md (mve_vmullbq_int_<supf><mode>)
	(mve_vmulltq_int_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_int_<supf><mode>) ... this.
	(mve_vmulltq_poly_p<mode>, mve_vmullbq_poly_p<mode>): Merge into ...
	(@mve_<mve_insn>q_poly_<supf><mode>): ... this.
	(mve_vmullbq_int_m_<supf><mode>, mve_vmulltq_int_m_<supf><mode>): Merge into ...
	(@mve_<mve_insn>q_int_m_<supf><mode>): ... this.
	(mve_vmullbq_poly_m_p<mode>, mve_vmulltq_poly_m_p<mode>): Merge into ...
	(@mve_<mve_insn>q_poly_m_<supf><mode>): ... this.
---
 gcc/config/arm/iterators.md |  23 +++++++--
 gcc/config/arm/mve.md       | 100 ++++++++----------------------------
 2 files changed, 38 insertions(+), 85 deletions(-)

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b13ff53d36f..fb003bcd67b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -917,6 +917,7 @@
 
 (define_int_attr mve_insn [
 		 (UNSPEC_VCADD90 "vcadd") (UNSPEC_VCADD270 "vcadd")
+		 (UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla") (UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
 		 (UNSPEC_VCMUL "vcmul") (UNSPEC_VCMUL90 "vcmul") (UNSPEC_VCMUL180 "vcmul") (UNSPEC_VCMUL270 "vcmul")
 		 (VABAVQ_P_S "vabav") (VABAVQ_P_U "vabav")
 		 (VABAVQ_S "vabav") (VABAVQ_U "vabav")
@@ -1044,6 +1045,13 @@
 		 (VMOVNTQ_S "vmovnt") (VMOVNTQ_U "vmovnt")
 		 (VMULHQ_M_S "vmulh") (VMULHQ_M_U "vmulh")
 		 (VMULHQ_S "vmulh") (VMULHQ_U "vmulh")
+		 (VMULLBQ_INT_M_S "vmullb") (VMULLBQ_INT_M_U "vmullb")
+		 (VMULLBQ_INT_S "vmullb") (VMULLBQ_INT_U "vmullb")
+		 (VMULLBQ_POLY_M_P "vmullb") (VMULLTQ_POLY_M_P "vmullt")
+		 (VMULLBQ_POLY_P "vmullb")
+		 (VMULLTQ_INT_M_S "vmullt") (VMULLTQ_INT_M_U "vmullt")
+		 (VMULLTQ_INT_S "vmullt") (VMULLTQ_INT_U "vmullt")
+		 (VMULLTQ_POLY_P "vmullt")
 		 (VMULQ_M_N_S "vmul") (VMULQ_M_N_U "vmul") (VMULQ_M_N_F "vmul")
 		 (VMULQ_M_S "vmul") (VMULQ_M_U "vmul") (VMULQ_M_F "vmul")
 		 (VMULQ_N_S "vmul") (VMULQ_N_U "vmul") (VMULQ_N_F "vmul")
@@ -1209,7 +1217,6 @@
 		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub") (VSUBQ_M_N_F "vsub")
 		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F "vsub")
 		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F "vsub")
-		 (UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla") (UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
 		 ])
 
 (define_int_attr isu    [
@@ -1246,6 +1253,8 @@
 		 (VMOVNBQ_S "i") (VMOVNBQ_U "i")
 		 (VMOVNTQ_M_S "i") (VMOVNTQ_M_U "i")
 		 (VMOVNTQ_S "i") (VMOVNTQ_U "i")
+		 (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u")
+		 (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u")
 		 (VNEGQ_M_S "s")
 		 (VQABSQ_M_S "s")
 		 (VQMOVNBQ_M_S "s") (VQMOVNBQ_M_U "u")
@@ -2330,6 +2339,10 @@
 		       (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
 		       (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
 		       (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
+		       (VMULLBQ_POLY_P "p")
+		       (VMULLTQ_POLY_P "p")
+		       (VMULLBQ_POLY_M_P "p")
+		       (VMULLTQ_POLY_M_P "p")
 		       (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
 		       (VMULQ_U "u")
 		       (VQADDQ_N_S "s") (VQADDQ_N_U "u")
@@ -2713,8 +2726,8 @@
 (define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
 (define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
 (define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
-(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
-(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
+(define_int_iterator VMULLxQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S VMULLTQ_INT_U VMULLTQ_INT_S])
+(define_int_iterator VMULLxQ_POLY [VMULLBQ_POLY_P VMULLTQ_POLY_P])
 (define_int_iterator VMULQ [VMULQ_U VMULQ_S])
 (define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
 (define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
@@ -2815,7 +2828,8 @@
 (define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
 (define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
 (define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
-(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
+(define_int_iterator VMULLxQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S VMULLTQ_INT_M_U VMULLTQ_INT_M_S])
+(define_int_iterator VMULLxQ_POLY_M [VMULLBQ_POLY_M_P VMULLTQ_POLY_M_P])
 (define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
 (define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
 (define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
@@ -2844,7 +2858,6 @@
 (define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
 (define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
 (define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
-(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U])
 (define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
 (define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
 (define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index a2cbcff1a6f..2001e95a5f1 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -976,32 +976,18 @@
 ])
 
 ;;
-;; [vmullbq_int_u, vmullbq_int_s])
+;; [vmullbq_int_u, vmullbq_int_s]
+;; [vmulltq_int_u, vmulltq_int_s]
 ;;
-(define_insn "mve_vmullbq_int_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_int_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:MVE_2 1 "s_register_operand" "w")
 				  (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMULLBQ_INT))
+	 VMULLxQ_INT))
   ]
   "TARGET_HAVE_MVE"
-  "vmullb.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmulltq_int_u, vmulltq_int_s])
-;;
-(define_insn "mve_vmulltq_int_<supf><mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:MVE_2 1 "s_register_operand" "w")
-				  (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMULLTQ_INT))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmullt.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+  "<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -1528,32 +1514,18 @@
 ])
 
 ;;
-;; [vmulltq_poly_p])
-;;
-(define_insn "mve_vmulltq_poly_p<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
-	(unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w")
-				  (match_operand:MVE_3 2 "s_register_operand" "w")]
-	 VMULLTQ_POLY_P))
-  ]
-  "TARGET_HAVE_MVE"
-  "vmullt.p%#<V_sz_elem>\t%q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
-
-;;
-;; [vmullbq_poly_p])
+;; [vmulltq_poly_p]
+;; [vmullbq_poly_p]
 ;;
-(define_insn "mve_vmullbq_poly_p<mode>"
+(define_insn "@mve_<mve_insn>q_poly_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
 	(unspec:<V_double_width> [(match_operand:MVE_3 1 "s_register_operand" "w")
 				  (match_operand:MVE_3 2 "s_register_operand" "w")]
-	 VMULLBQ_POLY_P))
+	 VMULLxQ_POLY))
   ]
   "TARGET_HAVE_MVE"
-  "vmullb.p%#<V_sz_elem>\t%q0, %q1, %q2"
+  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2816,36 +2788,20 @@
    (set_attr "length""8")])
 
 ;;
-;; [vmullbq_int_m_u, vmullbq_int_m_s])
+;; [vmullbq_int_m_u, vmullbq_int_m_s]
+;; [vmulltq_int_m_s, vmulltq_int_m_u]
 ;;
-(define_insn "mve_vmullbq_int_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_int_m_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
 	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
 				  (match_operand:MVE_2 2 "s_register_operand" "w")
 				  (match_operand:MVE_2 3 "s_register_operand" "w")
 				  (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMULLBQ_INT_M))
+	 VMULLxQ_INT_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmullbt.<supf>%#<V_sz_elem>	%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmulltq_int_m_s, vmulltq_int_m_u])
-;;
-(define_insn "mve_vmulltq_int_m_<supf><mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "<earlyclobber_32>")
-	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
-				  (match_operand:MVE_2 2 "s_register_operand" "w")
-				  (match_operand:MVE_2 3 "s_register_operand" "w")
-				  (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMULLTQ_INT_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmulltt.<supf>%#<V_sz_elem>	%q0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -3006,36 +2962,20 @@
    (set_attr "length""8")])
 
 ;;
-;; [vmullbq_poly_m_p])
+;; [vmullbq_poly_m_p]
+;; [vmulltq_poly_m_p]
 ;;
-(define_insn "mve_vmullbq_poly_m_p<mode>"
+(define_insn "@mve_<mve_insn>q_poly_m_<supf><mode>"
   [
    (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
 	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
 		       (match_operand:MVE_3 2 "s_register_operand" "w")
 		       (match_operand:MVE_3 3 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMULLBQ_POLY_M_P))
+	 VMULLxQ_POLY_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vmullbt.p%#<V_sz_elem>\t%q0, %q2, %q3"
-  [(set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vmulltq_poly_m_p])
-;;
-(define_insn "mve_vmulltq_poly_m_p<mode>"
-  [
-   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
-	(unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
-		       (match_operand:MVE_3 2 "s_register_operand" "w")
-		       (match_operand:MVE_3 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VMULLTQ_POLY_M_P))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vmulltt.p%#<V_sz_elem>\t%q0, %q2, %q3"
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 3/9] arm: [MVE intrinsics] add binary_widen shape Christophe Lyon
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Introduce a function that will be used to build vmull intrinsics with
the _int variant.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-functions.h (class
	unspec_mve_function_exact_insn_vmull): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h | 74 +++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index a6573844319..c0fc450f886 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -838,6 +838,80 @@ public:
   }
 };
 
+
+/* Map the vmull-related function directly to CODE (UNSPEC, UNSPEC, M)
+   where M is the vector mode associated with type suffix 0.  We need
+   this special case because the builtins have _int in their
+   names.  */
+class unspec_mve_function_exact_insn_vmull : public function_base
+{
+public:
+  CONSTEXPR unspec_mve_function_exact_insn_vmull (int unspec_for_sint,
+						  int unspec_for_uint,
+						  int unspec_for_m_sint,
+						  int unspec_for_m_uint)
+    : m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_m_sint (unspec_for_m_sint),
+      m_unspec_for_m_uint (unspec_for_m_uint)
+  {}
+
+  /* The unspec code associated with signed-integer and
+     unsigned-integer operations respectively.  It covers the cases
+     with and without the _m predicate.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_m_sint;
+  int m_unspec_for_m_uint;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+
+    if (! e.type_suffix (0).integer_p)
+      gcc_unreachable ();
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate, no suffix.  */
+	if (e.type_suffix (0).unsigned_p)
+	  code = code_for_mve_q_int (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
+	else
+	  code = code_for_mve_q_int (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
+
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+	/* No suffix, "m" predicate.  */
+	if (e.type_suffix (0).unsigned_p)
+	  code = code_for_mve_q_int_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
+	else
+	  code = code_for_mve_q_int_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+
+	return e.use_cond_insn (code, 0);
+
+      case PRED_x:
+	/* No suffix, "x" predicate.  */
+	if (e.type_suffix (0).unsigned_p)
+	  code = code_for_mve_q_int_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
+	else
+	  code = code_for_mve_q_int_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+
+	return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/9] arm: [MVE intrinsics] add binary_widen shape
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
  2023-08-14 18:34 ` [PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 4/9] arm: [MVE intrinsics] rework vmullbq_int vmulltq_int Christophe Lyon
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_widen shape description.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/:

	* config/arm/arm-mve-builtins-shapes.cc (binary_widen): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_widen): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 42 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  5 +--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 1f22201ac95..c8eb3351ef2 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1129,6 +1129,48 @@ struct binary_rshift_narrow_unsigned_def : public overloaded_base<0>
 };
 SHAPE (binary_rshift_narrow_unsigned)
 
+/* <T0:twice>_t vfoo[_t0](<T0>_t, <T0>_t)
+
+   Example: vmullbq.
+   int32x4_t [__arm_]vmullbq_int[_s16](int16x8_t a, int16x8_t b)
+   int32x4_t [__arm_]vmullbq_int_m[_s16](int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)
+   int32x4_t [__arm_]vmullbq_int_x[_s16](int16x8_t a, int16x8_t b, mve_pred16_t p)  */
+struct binary_widen_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "vw0,v0,v0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    type_suffix_index wide_suffix
+      = find_type_suffix (type_suffixes[type].tclass,
+			  type_suffixes[type].element_bits * 2);
+
+    if (!r.require_matching_vector_type (i, type))
+      return error_mark_node;
+
+    /* Check the inactive argument has the wide type.  */
+    if ((r.pred == PRED_m)
+	&& (r.infer_vector_type (0) != wide_suffix))
+      return r.report_no_such_form (type);
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_widen)
+
 /* <T0:twice>_t vfoo[_n_t0](<T0>_t, const int)
 
    Check that 'imm' is in the [1..#bits] range.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index a1842f5845c..fa6ec4fc002 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -35,13 +35,13 @@ namespace arm_mve
   {
 
     extern const function_shape *const binary;
-    extern const function_shape *const binary_lshift;
-    extern const function_shape *const binary_lshift_r;
     extern const function_shape *const binary_acc_int32;
     extern const function_shape *const binary_acc_int64;
     extern const function_shape *const binary_acca_int32;
     extern const function_shape *const binary_acca_int64;
     extern const function_shape *const binary_imm32;
+    extern const function_shape *const binary_lshift;
+    extern const function_shape *const binary_lshift_r;
     extern const function_shape *const binary_lshift_unsigned;
     extern const function_shape *const binary_maxamina;
     extern const function_shape *const binary_maxavminav;
@@ -54,6 +54,7 @@ namespace arm_mve
     extern const function_shape *const binary_rshift;
     extern const function_shape *const binary_rshift_narrow;
     extern const function_shape *const binary_rshift_narrow_unsigned;
+    extern const function_shape *const binary_widen;
     extern const function_shape *const binary_widen_n;
     extern const function_shape *const binary_widen_opt_n;
     extern const function_shape *const cmp;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/9] arm: [MVE intrinsics] rework vmullbq_int vmulltq_int
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
  2023-08-14 18:34 ` [PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull Christophe Lyon
  2023-08-14 18:34 ` [PATCH 3/9] arm: [MVE intrinsics] add binary_widen shape Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types Christophe Lyon
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmullbq_int, vmulltq_int using the new MVE builtins
framework.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmullbq_int, vmulltq_int):
	New.
	* config/arm/arm-mve-builtins-base.def (vmullbq_int, vmulltq_int):
	New.
	* config/arm/arm-mve-builtins-base.h (vmullbq_int, vmulltq_int):
	New.
	* config/arm/arm_mve.h (vmulltq_int): Remove.
	(vmullbq_int): Remove.
	(vmullbq_int_m): Remove.
	(vmulltq_int_m): Remove.
	(vmullbq_int_x): Remove.
	(vmulltq_int_x): Remove.
	(vmulltq_int_u8): Remove.
	(vmullbq_int_u8): Remove.
	(vmulltq_int_s8): Remove.
	(vmullbq_int_s8): Remove.
	(vmulltq_int_u16): Remove.
	(vmullbq_int_u16): Remove.
	(vmulltq_int_s16): Remove.
	(vmullbq_int_s16): Remove.
	(vmulltq_int_u32): Remove.
	(vmullbq_int_u32): Remove.
	(vmulltq_int_s32): Remove.
	(vmullbq_int_s32): Remove.
	(vmullbq_int_m_s8): Remove.
	(vmullbq_int_m_s32): Remove.
	(vmullbq_int_m_s16): Remove.
	(vmullbq_int_m_u8): Remove.
	(vmullbq_int_m_u32): Remove.
	(vmullbq_int_m_u16): Remove.
	(vmulltq_int_m_s8): Remove.
	(vmulltq_int_m_s32): Remove.
	(vmulltq_int_m_s16): Remove.
	(vmulltq_int_m_u8): Remove.
	(vmulltq_int_m_u32): Remove.
	(vmulltq_int_m_u16): Remove.
	(vmullbq_int_x_s8): Remove.
	(vmullbq_int_x_s16): Remove.
	(vmullbq_int_x_s32): Remove.
	(vmullbq_int_x_u8): Remove.
	(vmullbq_int_x_u16): Remove.
	(vmullbq_int_x_u32): Remove.
	(vmulltq_int_x_s8): Remove.
	(vmulltq_int_x_s16): Remove.
	(vmulltq_int_x_s32): Remove.
	(vmulltq_int_x_u8): Remove.
	(vmulltq_int_x_u16): Remove.
	(vmulltq_int_x_u32): Remove.
	(__arm_vmulltq_int_u8): Remove.
	(__arm_vmullbq_int_u8): Remove.
	(__arm_vmulltq_int_s8): Remove.
	(__arm_vmullbq_int_s8): Remove.
	(__arm_vmulltq_int_u16): Remove.
	(__arm_vmullbq_int_u16): Remove.
	(__arm_vmulltq_int_s16): Remove.
	(__arm_vmullbq_int_s16): Remove.
	(__arm_vmulltq_int_u32): Remove.
	(__arm_vmullbq_int_u32): Remove.
	(__arm_vmulltq_int_s32): Remove.
	(__arm_vmullbq_int_s32): Remove.
	(__arm_vmullbq_int_m_s8): Remove.
	(__arm_vmullbq_int_m_s32): Remove.
	(__arm_vmullbq_int_m_s16): Remove.
	(__arm_vmullbq_int_m_u8): Remove.
	(__arm_vmullbq_int_m_u32): Remove.
	(__arm_vmullbq_int_m_u16): Remove.
	(__arm_vmulltq_int_m_s8): Remove.
	(__arm_vmulltq_int_m_s32): Remove.
	(__arm_vmulltq_int_m_s16): Remove.
	(__arm_vmulltq_int_m_u8): Remove.
	(__arm_vmulltq_int_m_u32): Remove.
	(__arm_vmulltq_int_m_u16): Remove.
	(__arm_vmullbq_int_x_s8): Remove.
	(__arm_vmullbq_int_x_s16): Remove.
	(__arm_vmullbq_int_x_s32): Remove.
	(__arm_vmullbq_int_x_u8): Remove.
	(__arm_vmullbq_int_x_u16): Remove.
	(__arm_vmullbq_int_x_u32): Remove.
	(__arm_vmulltq_int_x_s8): Remove.
	(__arm_vmulltq_int_x_s16): Remove.
	(__arm_vmulltq_int_x_s32): Remove.
	(__arm_vmulltq_int_x_u8): Remove.
	(__arm_vmulltq_int_x_u16): Remove.
	(__arm_vmulltq_int_x_u32): Remove.
	(__arm_vmulltq_int): Remove.
	(__arm_vmullbq_int): Remove.
	(__arm_vmullbq_int_m): Remove.
	(__arm_vmulltq_int_m): Remove.
	(__arm_vmullbq_int_x): Remove.
	(__arm_vmulltq_int_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 648 -----------------------
 4 files changed, 6 insertions(+), 648 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index e31095ae112..3620c56865d 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -329,6 +329,8 @@ FUNCTION_WITHOUT_N_NO_F (vmovltq, VMOVLTQ)
 FUNCTION_WITHOUT_N_NO_F (vmovnbq, VMOVNBQ)
 FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
+FUNCTION (vmullbq_int, unspec_mve_function_exact_insn_vmull, (VMULLBQ_INT_S, VMULLBQ_INT_U, VMULLBQ_INT_M_S, VMULLBQ_INT_M_U))
+FUNCTION (vmulltq_int, unspec_mve_function_exact_insn_vmull, (VMULLTQ_INT_S, VMULLTQ_INT_U, VMULLTQ_INT_M_S, VMULLTQ_INT_M_U))
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index e7d466f2efd..db811bec479 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -78,6 +78,8 @@ DEF_MVE_FUNCTION (vmovltq, unary_widen, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmullbq_int, binary_widen, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmulltq_int, binary_widen, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index be3698b4f4c..5652fb7c701 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -102,6 +102,8 @@ extern const function_base *const vmovltq;
 extern const function_base *const vmovnbq;
 extern const function_base *const vmovntq;
 extern const function_base *const vmulhq;
+extern const function_base *const vmullbq_int;
+extern const function_base *const vmulltq_int;
 extern const function_base *const vmulq;
 extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 88b2e77ffd9..837864aaf29 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -43,16 +43,12 @@
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
-#define vmulltq_int(__a, __b) __arm_vmulltq_int(__a, __b)
-#define vmullbq_int(__a, __b) __arm_vmullbq_int(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
 #define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
 #define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
-#define vmullbq_int_m(__inactive, __a, __b, __p) __arm_vmullbq_int_m(__inactive, __a, __b, __p)
-#define vmulltq_int_m(__inactive, __a, __b, __p) __arm_vmulltq_int_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
 #define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
 #define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
@@ -130,9 +126,7 @@
 #define viwdupq_x_u16(__a, __b, __imm, __p) __arm_viwdupq_x_u16(__a, __b, __imm, __p)
 #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a, __b, __imm, __p)
 #define vmullbq_poly_x(__a, __b, __p) __arm_vmullbq_poly_x(__a, __b, __p)
-#define vmullbq_int_x(__a, __b, __p) __arm_vmullbq_int_x(__a, __b, __p)
 #define vmulltq_poly_x(__a, __b, __p) __arm_vmulltq_poly_x(__a, __b, __p)
-#define vmulltq_int_x(__a, __b, __p) __arm_vmulltq_int_x(__a, __b, __p)
 #define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
@@ -215,28 +209,16 @@
 #define vcvtq_n_u16_f16(__a,  __imm6) __arm_vcvtq_n_u16_f16(__a,  __imm6)
 #define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,  __imm6)
 #define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
-#define vmulltq_int_u8(__a, __b) __arm_vmulltq_int_u8(__a, __b)
-#define vmullbq_int_u8(__a, __b) __arm_vmullbq_int_u8(__a, __b)
 #define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
 #define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
-#define vmulltq_int_s8(__a, __b) __arm_vmulltq_int_s8(__a, __b)
-#define vmullbq_int_s8(__a, __b) __arm_vmullbq_int_s8(__a, __b)
 #define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
 #define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
-#define vmulltq_int_u16(__a, __b) __arm_vmulltq_int_u16(__a, __b)
-#define vmullbq_int_u16(__a, __b) __arm_vmullbq_int_u16(__a, __b)
 #define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
 #define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
-#define vmulltq_int_s16(__a, __b) __arm_vmulltq_int_s16(__a, __b)
-#define vmullbq_int_s16(__a, __b) __arm_vmullbq_int_s16(__a, __b)
 #define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
 #define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
-#define vmulltq_int_u32(__a, __b) __arm_vmulltq_int_u32(__a, __b)
-#define vmullbq_int_u32(__a, __b) __arm_vmullbq_int_u32(__a, __b)
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
-#define vmulltq_int_s32(__a, __b) __arm_vmulltq_int_s32(__a, __b)
-#define vmullbq_int_s32(__a, __b) __arm_vmullbq_int_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
 #define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
 #define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
@@ -304,18 +286,6 @@
 #define vbicq_m_u8(__inactive, __a, __b, __p) __arm_vbicq_m_u8(__inactive, __a, __b, __p)
 #define vbicq_m_u32(__inactive, __a, __b, __p) __arm_vbicq_m_u32(__inactive, __a, __b, __p)
 #define vbicq_m_u16(__inactive, __a, __b, __p) __arm_vbicq_m_u16(__inactive, __a, __b, __p)
-#define vmullbq_int_m_s8(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s8(__inactive, __a, __b, __p)
-#define vmullbq_int_m_s32(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s32(__inactive, __a, __b, __p)
-#define vmullbq_int_m_s16(__inactive, __a, __b, __p) __arm_vmullbq_int_m_s16(__inactive, __a, __b, __p)
-#define vmullbq_int_m_u8(__inactive, __a, __b, __p) __arm_vmullbq_int_m_u8(__inactive, __a, __b, __p)
-#define vmullbq_int_m_u32(__inactive, __a, __b, __p) __arm_vmullbq_int_m_u32(__inactive, __a, __b, __p)
-#define vmullbq_int_m_u16(__inactive, __a, __b, __p) __arm_vmullbq_int_m_u16(__inactive, __a, __b, __p)
-#define vmulltq_int_m_s8(__inactive, __a, __b, __p) __arm_vmulltq_int_m_s8(__inactive, __a, __b, __p)
-#define vmulltq_int_m_s32(__inactive, __a, __b, __p) __arm_vmulltq_int_m_s32(__inactive, __a, __b, __p)
-#define vmulltq_int_m_s16(__inactive, __a, __b, __p) __arm_vmulltq_int_m_s16(__inactive, __a, __b, __p)
-#define vmulltq_int_m_u8(__inactive, __a, __b, __p) __arm_vmulltq_int_m_u8(__inactive, __a, __b, __p)
-#define vmulltq_int_m_u32(__inactive, __a, __b, __p) __arm_vmulltq_int_m_u32(__inactive, __a, __b, __p)
-#define vmulltq_int_m_u16(__inactive, __a, __b, __p) __arm_vmulltq_int_m_u16(__inactive, __a, __b, __p)
 #define vornq_m_s8(__inactive, __a, __b, __p) __arm_vornq_m_s8(__inactive, __a, __b, __p)
 #define vornq_m_s32(__inactive, __a, __b, __p) __arm_vornq_m_s32(__inactive, __a, __b, __p)
 #define vornq_m_s16(__inactive, __a, __b, __p) __arm_vornq_m_s16(__inactive, __a, __b, __p)
@@ -634,20 +604,8 @@
 #define viwdupq_x_wb_u32(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u32(__a, __b,  __imm, __p)
 #define vmullbq_poly_x_p8(__a, __b, __p) __arm_vmullbq_poly_x_p8(__a, __b, __p)
 #define vmullbq_poly_x_p16(__a, __b, __p) __arm_vmullbq_poly_x_p16(__a, __b, __p)
-#define vmullbq_int_x_s8(__a, __b, __p) __arm_vmullbq_int_x_s8(__a, __b, __p)
-#define vmullbq_int_x_s16(__a, __b, __p) __arm_vmullbq_int_x_s16(__a, __b, __p)
-#define vmullbq_int_x_s32(__a, __b, __p) __arm_vmullbq_int_x_s32(__a, __b, __p)
-#define vmullbq_int_x_u8(__a, __b, __p) __arm_vmullbq_int_x_u8(__a, __b, __p)
-#define vmullbq_int_x_u16(__a, __b, __p) __arm_vmullbq_int_x_u16(__a, __b, __p)
-#define vmullbq_int_x_u32(__a, __b, __p) __arm_vmullbq_int_x_u32(__a, __b, __p)
 #define vmulltq_poly_x_p8(__a, __b, __p) __arm_vmulltq_poly_x_p8(__a, __b, __p)
 #define vmulltq_poly_x_p16(__a, __b, __p) __arm_vmulltq_poly_x_p16(__a, __b, __p)
-#define vmulltq_int_x_s8(__a, __b, __p) __arm_vmulltq_int_x_s8(__a, __b, __p)
-#define vmulltq_int_x_s16(__a, __b, __p) __arm_vmulltq_int_x_s16(__a, __b, __p)
-#define vmulltq_int_x_s32(__a, __b, __p) __arm_vmulltq_int_x_s32(__a, __b, __p)
-#define vmulltq_int_x_u8(__a, __b, __p) __arm_vmulltq_int_x_u8(__a, __b, __p)
-#define vmulltq_int_x_u16(__a, __b, __p) __arm_vmulltq_int_x_u16(__a, __b, __p)
-#define vmulltq_int_x_u32(__a, __b, __p) __arm_vmulltq_int_x_u32(__a, __b, __p)
 #define vbicq_x_s8(__a, __b, __p) __arm_vbicq_x_s8(__a, __b, __p)
 #define vbicq_x_s16(__a, __b, __p) __arm_vbicq_x_s16(__a, __b, __p)
 #define vbicq_x_s32(__a, __b, __p) __arm_vbicq_x_s32(__a, __b, __p)
@@ -906,20 +864,6 @@ __arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
   return __builtin_mve_vornq_uv16qi (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmulltq_int_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmullbq_int_uv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
@@ -934,20 +878,6 @@ __arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
   return __builtin_mve_vornq_sv16qi (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmulltq_int_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vmullbq_int_sv16qi (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s8 (int8x16_t __a, int8x16_t __b)
@@ -962,20 +892,6 @@ __arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
   return __builtin_mve_vornq_uv8hi (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmulltq_int_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmullbq_int_uv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
@@ -990,20 +906,6 @@ __arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
   return __builtin_mve_vornq_sv8hi (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmulltq_int_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vmullbq_int_sv8hi (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s16 (int16x8_t __a, int16x8_t __b)
@@ -1018,20 +920,6 @@ __arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
   return __builtin_mve_vornq_uv4si (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vmulltq_int_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vmullbq_int_uv4si (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
@@ -1046,20 +934,6 @@ __arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vornq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmulltq_int_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vmullbq_int_sv4si (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_s32 (int32x4_t __a, int32x4_t __b)
@@ -1275,90 +1149,6 @@ __arm_vbicq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vbicq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_s8 (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_s32 (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_u8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_u32 (uint64x2_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m_u16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_s8 (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_s32 (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_s16 (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_u8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_u32 (uint64x2_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m_u16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -3354,48 +3144,6 @@ __arm_vmullbq_poly_x_p16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmullbq_poly_m_pv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv16qi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv8hi (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_sv4si (__arm_vuninitializedq_s64 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv16qi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_int_m_uv4si (__arm_vuninitializedq_u64 (), __a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmulltq_poly_x_p8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -3410,48 +3158,6 @@ __arm_vmulltq_poly_x_p16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
   return __builtin_mve_vmulltq_poly_m_pv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv16qi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv8hi (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_sv4si (__arm_vuninitializedq_s64 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv16qi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_int_m_uv4si (__arm_vuninitializedq_u64 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -5508,20 +5214,6 @@ __arm_vornq (uint8x16_t __a, uint8x16_t __b)
  return __arm_vornq_u8 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmulltq_int_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmullbq_int_u8 (__a, __b);
-}
-
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint8x16_t __a, uint8x16_t __b)
@@ -5536,20 +5228,6 @@ __arm_vornq (int8x16_t __a, int8x16_t __b)
  return __arm_vornq_s8 (__a, __b);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmulltq_int_s8 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vmullbq_int_s8 (__a, __b);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int8x16_t __a, int8x16_t __b)
@@ -5564,20 +5242,6 @@ __arm_vornq (uint16x8_t __a, uint16x8_t __b)
  return __arm_vornq_u16 (__a, __b);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmulltq_int_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmullbq_int_u16 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint16x8_t __a, uint16x8_t __b)
@@ -5592,20 +5256,6 @@ __arm_vornq (int16x8_t __a, int16x8_t __b)
  return __arm_vornq_s16 (__a, __b);
 }
 
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmulltq_int_s16 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vmullbq_int_s16 (__a, __b);
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int16x8_t __a, int16x8_t __b)
@@ -5620,20 +5270,6 @@ __arm_vornq (uint32x4_t __a, uint32x4_t __b)
  return __arm_vornq_u32 (__a, __b);
 }
 
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmulltq_int_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vmullbq_int_u32 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint32x4_t __a, uint32x4_t __b)
@@ -5648,20 +5284,6 @@ __arm_vornq (int32x4_t __a, int32x4_t __b)
  return __arm_vornq_s32 (__a, __b);
 }
 
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmulltq_int_s32 (__a, __b);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vmullbq_int_s32 (__a, __b);
-}
-
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (int32x4_t __a, int32x4_t __b)
@@ -5837,90 +5459,6 @@ __arm_vbicq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vbicq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (uint64x2_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_m_u16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (int16x8_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (int64x2_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (int32x4_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (uint64x2_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_m_u16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vornq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -7489,48 +7027,6 @@ __arm_vmullbq_poly_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
  return __arm_vmullbq_poly_x_p16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_int_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_int_x_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmulltq_poly_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
@@ -7545,48 +7041,6 @@ __arm_vmulltq_poly_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
  return __arm_vmulltq_poly_x_p16 (__a, __b, __p);
 }
 
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_int_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_int_x_u32 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -9304,26 +8758,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
 
-#define __arm_vmulltq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vmullbq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vbicq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vbicq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
@@ -9842,26 +9276,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vmulltq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vmullbq_int(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
 #define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
@@ -10163,32 +9577,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \
   int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());})
 
-#define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmullbq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
 
-#define __arm_vmulltq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmulltq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -10420,28 +9814,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vmullbq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vmulltq_int_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_m_s8 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_m_s16 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int64x2_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_m_s32 (__ARM_mve_coerce(__p0, int64x2_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_m_u8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_m_u16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_m_u32 (__ARM_mve_coerce(__p0, uint64x2_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -10574,32 +9946,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
 
-#define __arm_vmullbq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmullbq_int_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmullbq_int_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmullbq_int_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_int_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_int_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmullbq_int_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmullbq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
 
-#define __arm_vmulltq_int_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulltq_int_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulltq_int_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulltq_int_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_int_x_u8( __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_int_x_u16( __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmulltq_int_x_u32( __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vmulltq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (2 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 4/9] arm: [MVE intrinsics] rework vmullbq_int vmulltq_int Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-29  6:05   ` Prathamesh Kulkarni
  2023-08-14 18:34 ` [PATCH 6/9] arm: [MVE intrinsics] add support for U and p formats in parse_element_type Christophe Lyon
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Although they look like aliases for u8 and u16, we need to define them
so that we can handle p8 and p16 suffixes with the general framework.

They will be used by vmull[bt]q_poly intrinsics.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins.cc (type_suffixes): Handle poly_p
	field..
	(TYPES_poly_8_16): New.
	(poly_8_16): New.
	* config/arm/arm-mve-builtins.def (p8): New type suffix.
	(p16): Likewise.
	* config/arm/arm-mve-builtins.h (enum type_class_index): Add
	TYPE_poly.
	(struct type_suffix_info): Add poly_p field.
---
 gcc/config/arm/arm-mve-builtins.cc  | 6 ++++++
 gcc/config/arm/arm-mve-builtins.def | 2 ++
 gcc/config/arm/arm-mve-builtins.h   | 5 ++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 7eec9d2861c..fa8b0ad36b3 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -128,6 +128,7 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
     TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
     TYPE_##CLASS == TYPE_unsigned, \
     TYPE_##CLASS == TYPE_float, \
+    TYPE_##CLASS == TYPE_poly, \
     0, \
     MODE },
 #include "arm-mve-builtins.def"
@@ -177,6 +178,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
 #define TYPES_all_signed(S, D) \
   S (s8), S (s16), S (s32)
 
+/* _p8 _p16.  */
+#define TYPES_poly_8_16(S, D) \
+  S (p8), S (p16)
+
 /* _u8 _u16 _u32.  */
 #define TYPES_all_unsigned(S, D) \
   S (u8), S (u16), S (u32)
@@ -275,6 +280,7 @@ DEF_MVE_TYPES_ARRAY (integer_8);
 DEF_MVE_TYPES_ARRAY (integer_8_16);
 DEF_MVE_TYPES_ARRAY (integer_16_32);
 DEF_MVE_TYPES_ARRAY (integer_32);
+DEF_MVE_TYPES_ARRAY (poly_8_16);
 DEF_MVE_TYPES_ARRAY (signed_16_32);
 DEF_MVE_TYPES_ARRAY (signed_32);
 DEF_MVE_TYPES_ARRAY (reinterpret_integer);
diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-builtins.def
index e3f37876210..e2cf1baf370 100644
--- a/gcc/config/arm/arm-mve-builtins.def
+++ b/gcc/config/arm/arm-mve-builtins.def
@@ -63,6 +63,8 @@ DEF_MVE_TYPE_SUFFIX (u8, uint8x16_t, unsigned, 8, V16QImode)
 DEF_MVE_TYPE_SUFFIX (u16, uint16x8_t, unsigned, 16, V8HImode)
 DEF_MVE_TYPE_SUFFIX (u32, uint32x4_t, unsigned, 32, V4SImode)
 DEF_MVE_TYPE_SUFFIX (u64, uint64x2_t, unsigned, 64, V2DImode)
+DEF_MVE_TYPE_SUFFIX (p8, uint8x16_t, poly, 8, V16QImode)
+DEF_MVE_TYPE_SUFFIX (p16, uint16x8_t, poly, 16, V8HImode)
 #undef REQUIRES_FLOAT
 
 #define REQUIRES_FLOAT true
diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h
index c9b51a0c77b..37b8223dfb2 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -146,6 +146,7 @@ enum type_class_index
   TYPE_float,
   TYPE_signed,
   TYPE_unsigned,
+  TYPE_poly,
   NUM_TYPE_CLASSES
 };
 
@@ -221,7 +222,9 @@ struct type_suffix_info
   unsigned int unsigned_p : 1;
   /* True if the suffix is for a floating-point type.  */
   unsigned int float_p : 1;
-  unsigned int spare : 13;
+  /* True if the suffix is for a polynomial type.  */
+  unsigned int poly_p : 1;
+  unsigned int spare : 12;
 
   /* The associated vector or predicate mode.  */
   machine_mode vector_mode : 16;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 6/9] arm: [MVE intrinsics] add support for U and p formats in parse_element_type
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (3 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 7/9] arm: [MVE intrinsics] add binary_widen_poly shape Christophe Lyon
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Introduce these two format specifiers to define the shape of
vmull[bt]q_poly intrinsics.

'U' is used to define a double-width unsigned
'p' is used to define an element of 'poly' type.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (parse_element_type): Add
	support for 'U' and 'p' format specifiers.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index c8eb3351ef2..761da4d8ece 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -61,10 +61,12 @@ apply_predication (const function_instance &instance, tree return_type,
 
    [01]    - the element type in type suffix 0 or 1 of INSTANCE.
    h<elt>  - a half-sized version of <elt>
+   p<elt>  - a poly type with the same width as <elt>
    s<bits> - a signed type with the given number of bits
    s[01]   - a signed type with the same width as type suffix 0 or 1
    u<bits> - an unsigned type with the given number of bits
    u[01]   - an unsigned type with the same width as type suffix 0 or 1
+   U<elt>  - an unsigned type with the double width as <elt>
    w<elt>  - a double-sized version of <elt>
    x<bits> - a type with the given number of bits and same signedness
              as the next argument.
@@ -102,6 +104,20 @@ parse_element_type (const function_instance &instance, const char *&format)
 			       type_suffixes[suffix].element_bits * 2);
     }
 
+   if (ch == 'U')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return find_type_suffix (TYPE_unsigned,
+			       type_suffixes[suffix].element_bits * 2);
+    }
+
+   if (ch == 'p')
+    {
+      type_suffix_index suffix = parse_element_type (instance, format);
+      return find_type_suffix (TYPE_poly,
+			       type_suffixes[suffix].element_bits);
+    }
+
   if (ch == 'x')
     {
       const char *next = format;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 7/9] arm: [MVE intrinsics] add binary_widen_poly shape
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (4 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 6/9] arm: [MVE intrinsics] add support for U and p formats in parse_element_type Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 8/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull_poly Christophe Lyon
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

This patch adds the binary_widen_poly shape description.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-shapes.cc (binary_widen_poly): New.
	* config/arm/arm-mve-builtins-shapes.h (binary_widen_poly): New.
---
 gcc/config/arm/arm-mve-builtins-shapes.cc | 49 +++++++++++++++++++++++
 gcc/config/arm/arm-mve-builtins-shapes.h  |  1 +
 2 files changed, 50 insertions(+)

diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index 761da4d8ece..23eb9d0e69b 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1187,6 +1187,55 @@ struct binary_widen_def : public overloaded_base<0>
 };
 SHAPE (binary_widen)
 
+/* <T0:twice>_t vfoo[_t0](<T0>_t, <T0>_t)
+
+   Example: vmullbq_poly.
+   uint32x4_t [__arm_]vmullbq_poly[_p16](uint16x8_t a, uint16x8_t b)
+   uint32x4_t [__arm_]vmullbq_poly_m[_p16](uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
+   uint32x4_t [__arm_]vmullbq_poly_x[_p16](uint16x8_t a, uint16x8_t b, mve_pred16_t p)  */
+struct binary_widen_poly_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "vU0,vp0,vp0", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* infer_vector_type found the 'unsigned' version of the 'poly'
+       type we are looking for, so find the 'poly' type with the same
+       width.  */
+    type = find_type_suffix (TYPE_poly, type_suffixes[type].element_bits);
+
+    type_suffix_index wide_suffix
+      = find_type_suffix (TYPE_unsigned,
+			  type_suffixes[type].element_bits * 2);
+
+    /* Require the 'poly' type, require_matching_vector_type would try
+       and fail with the 'unsigned' one.  */
+    if (!r.require_vector_type (i, type_suffixes[type].vector_type))
+      return error_mark_node;
+
+    /* Check the inactive argument has the wide type.  */
+    if ((r.pred == PRED_m)
+	&& (r.infer_vector_type (0) != wide_suffix))
+      return r.report_no_such_form (type);
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (binary_widen_poly)
+
 /* <T0:twice>_t vfoo[_n_t0](<T0>_t, const int)
 
    Check that 'imm' is in the [1..#bits] range.
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index fa6ec4fc002..a93245321c9 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -57,6 +57,7 @@ namespace arm_mve
     extern const function_shape *const binary_widen;
     extern const function_shape *const binary_widen_n;
     extern const function_shape *const binary_widen_opt_n;
+    extern const function_shape *const binary_widen_poly;
     extern const function_shape *const cmp;
     extern const function_shape *const create;
     extern const function_shape *const inherent;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 8/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull_poly
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (5 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 7/9] arm: [MVE intrinsics] add binary_widen_poly shape Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-14 18:34 ` [PATCH 9/9] arm: [MVE intrinsics] rework vmullbq_poly vmulltq_poly Christophe Lyon
  2023-08-22 10:04 ` [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Kyrylo Tkachov
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Introduce a function that will be used to build vmull[bt]q_poly
intrinsics that use poly types.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-functions.h (class
	unspec_mve_function_exact_insn_vmull_poly): New.
---
 gcc/config/arm/arm-mve-builtins-functions.h | 56 ++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index c0fc450f886..eba1f071af0 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -838,7 +838,6 @@ public:
   }
 };
 
-
 /* Map the vmull-related function directly to CODE (UNSPEC, UNSPEC, M)
    where M is the vector mode associated with type suffix 0.  We need
    this special case because the builtins have _int in their
@@ -912,6 +911,61 @@ public:
   }
 };
 
+/* Map the vmull_poly-related function directly to CODE (UNSPEC,
+   UNSPEC, M) where M is the vector mode associated with type suffix
+   0.  We need this special case because the builtins have _poly in
+   their names, and use the special poly type..  */
+class unspec_mve_function_exact_insn_vmull_poly : public function_base
+{
+public:
+  CONSTEXPR unspec_mve_function_exact_insn_vmull_poly (int unspec_for_poly,
+						       int unspec_for_m_poly)
+    : m_unspec_for_poly (unspec_for_poly),
+      m_unspec_for_m_poly (unspec_for_m_poly)
+  {}
+
+  /* The unspec code associated with signed-integer, unsigned-integer
+     and poly operations respectively.  It covers the cases with and
+     without the _m predicate.  */
+  int m_unspec_for_poly;
+  int m_unspec_for_m_poly;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    if (! e.type_suffix (0).poly_p)
+      gcc_unreachable ();
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate, no suffix.  */
+	code = code_for_mve_q_poly (m_unspec_for_poly, m_unspec_for_poly, e.vector_mode (0));
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+	/* No suffix, "m" predicate.  */
+	code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, e.vector_mode (0));
+	return e.use_cond_insn (code, 0);
+
+      case PRED_x:
+	/* No suffix, "x" predicate.  */
+	code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, e.vector_mode (0));
+	return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 9/9] arm: [MVE intrinsics] rework vmullbq_poly vmulltq_poly
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (6 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 8/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull_poly Christophe Lyon
@ 2023-08-14 18:34 ` Christophe Lyon
  2023-08-22 10:04 ` [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Kyrylo Tkachov
  8 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-14 18:34 UTC (permalink / raw)
  To: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford
  Cc: Christophe Lyon

Implement vmull[bt]q_poly using the new MVE builtins framework.

2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins-base.cc (vmullbq_poly)
	(vmulltq_poly): New.
	* config/arm/arm-mve-builtins-base.def (vmullbq_poly)
	(vmulltq_poly): New.
	* config/arm/arm-mve-builtins-base.h (vmullbq_poly)
	(vmulltq_poly): New.
	* config/arm/arm_mve.h (vmulltq_poly): Remove.
	(vmullbq_poly): Remove.
	(vmullbq_poly_m): Remove.
	(vmulltq_poly_m): Remove.
	(vmullbq_poly_x): Remove.
	(vmulltq_poly_x): Remove.
	(vmulltq_poly_p8): Remove.
	(vmullbq_poly_p8): Remove.
	(vmulltq_poly_p16): Remove.
	(vmullbq_poly_p16): Remove.
	(vmullbq_poly_m_p8): Remove.
	(vmullbq_poly_m_p16): Remove.
	(vmulltq_poly_m_p8): Remove.
	(vmulltq_poly_m_p16): Remove.
	(vmullbq_poly_x_p8): Remove.
	(vmullbq_poly_x_p16): Remove.
	(vmulltq_poly_x_p8): Remove.
	(vmulltq_poly_x_p16): Remove.
	(__arm_vmulltq_poly_p8): Remove.
	(__arm_vmullbq_poly_p8): Remove.
	(__arm_vmulltq_poly_p16): Remove.
	(__arm_vmullbq_poly_p16): Remove.
	(__arm_vmullbq_poly_m_p8): Remove.
	(__arm_vmullbq_poly_m_p16): Remove.
	(__arm_vmulltq_poly_m_p8): Remove.
	(__arm_vmulltq_poly_m_p16): Remove.
	(__arm_vmullbq_poly_x_p8): Remove.
	(__arm_vmullbq_poly_x_p16): Remove.
	(__arm_vmulltq_poly_x_p8): Remove.
	(__arm_vmulltq_poly_x_p16): Remove.
	(__arm_vmulltq_poly): Remove.
	(__arm_vmullbq_poly): Remove.
	(__arm_vmullbq_poly_m): Remove.
	(__arm_vmulltq_poly_m): Remove.
	(__arm_vmullbq_poly_x): Remove.
	(__arm_vmulltq_poly_x): Remove.
---
 gcc/config/arm/arm-mve-builtins-base.cc  |   2 +
 gcc/config/arm/arm-mve-builtins-base.def |   2 +
 gcc/config/arm/arm-mve-builtins-base.h   |   2 +
 gcc/config/arm/arm_mve.h                 | 248 -----------------------
 4 files changed, 6 insertions(+), 248 deletions(-)

diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index 3620c56865d..ed5eba656c1 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -331,6 +331,8 @@ FUNCTION_WITHOUT_N_NO_F (vmovntq, VMOVNTQ)
 FUNCTION_WITHOUT_N_NO_F (vmulhq, VMULHQ)
 FUNCTION (vmullbq_int, unspec_mve_function_exact_insn_vmull, (VMULLBQ_INT_S, VMULLBQ_INT_U, VMULLBQ_INT_M_S, VMULLBQ_INT_M_U))
 FUNCTION (vmulltq_int, unspec_mve_function_exact_insn_vmull, (VMULLTQ_INT_S, VMULLTQ_INT_U, VMULLTQ_INT_M_S, VMULLTQ_INT_M_U))
+FUNCTION (vmullbq_poly, unspec_mve_function_exact_insn_vmull_poly, (VMULLBQ_POLY_P, VMULLBQ_POLY_M_P))
+FUNCTION (vmulltq_poly, unspec_mve_function_exact_insn_vmull_poly, (VMULLTQ_POLY_P, VMULLTQ_POLY_M_P))
 FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index db811bec479..01dfbdef8a3 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -80,6 +80,8 @@ DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmullbq_int, binary_widen, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulltq_int, binary_widen, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vmullbq_poly, binary_widen_poly, poly_8_16, mx_or_none)
+DEF_MVE_FUNCTION (vmulltq_poly, binary_widen_poly, poly_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index 5652fb7c701..c574c32ac53 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -104,6 +104,8 @@ extern const function_base *const vmovntq;
 extern const function_base *const vmulhq;
 extern const function_base *const vmullbq_int;
 extern const function_base *const vmulltq_int;
+extern const function_base *const vmullbq_poly;
+extern const function_base *const vmulltq_poly;
 extern const function_base *const vmulq;
 extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 837864aaf29..b82d94e59bd 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -44,14 +44,10 @@
 #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
 #define vornq(__a, __b) __arm_vornq(__a, __b)
 #define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vmulltq_poly(__a, __b) __arm_vmulltq_poly(__a, __b)
-#define vmullbq_poly(__a, __b) __arm_vmullbq_poly(__a, __b)
 #define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
 #define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
 #define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
 #define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
-#define vmullbq_poly_m(__inactive, __a, __b, __p) __arm_vmullbq_poly_m(__inactive, __a, __b, __p)
-#define vmulltq_poly_m(__inactive, __a, __b, __p) __arm_vmulltq_poly_m(__inactive, __a, __b, __p)
 #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
 #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
 #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
@@ -125,8 +121,6 @@
 #define viwdupq_x_u8(__a, __b, __imm, __p) __arm_viwdupq_x_u8(__a, __b, __imm, __p)
 #define viwdupq_x_u16(__a, __b, __imm, __p) __arm_viwdupq_x_u16(__a, __b, __imm, __p)
 #define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a, __b, __imm, __p)
-#define vmullbq_poly_x(__a, __b, __p) __arm_vmullbq_poly_x(__a, __b, __p)
-#define vmulltq_poly_x(__a, __b, __p) __arm_vmulltq_poly_x(__a, __b, __p)
 #define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
 #define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
 #define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
@@ -220,14 +214,10 @@
 #define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
 #define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
 #define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
-#define vmulltq_poly_p8(__a, __b) __arm_vmulltq_poly_p8(__a, __b)
-#define vmullbq_poly_p8(__a, __b) __arm_vmullbq_poly_p8(__a, __b)
 #define vbicq_n_u16(__a,  __imm) __arm_vbicq_n_u16(__a,  __imm)
 #define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b)
 #define vbicq_f16(__a, __b) __arm_vbicq_f16(__a, __b)
 #define vbicq_n_s16(__a,  __imm) __arm_vbicq_n_s16(__a,  __imm)
-#define vmulltq_poly_p16(__a, __b) __arm_vmulltq_poly_p16(__a, __b)
-#define vmullbq_poly_p16(__a, __b) __arm_vmullbq_poly_p16(__a, __b)
 #define vbicq_n_u32(__a,  __imm) __arm_vbicq_n_u32(__a,  __imm)
 #define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b)
 #define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b)
@@ -292,10 +282,6 @@
 #define vornq_m_u8(__inactive, __a, __b, __p) __arm_vornq_m_u8(__inactive, __a, __b, __p)
 #define vornq_m_u32(__inactive, __a, __b, __p) __arm_vornq_m_u32(__inactive, __a, __b, __p)
 #define vornq_m_u16(__inactive, __a, __b, __p) __arm_vornq_m_u16(__inactive, __a, __b, __p)
-#define vmullbq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p8(__inactive, __a, __b, __p)
-#define vmullbq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmullbq_poly_m_p16(__inactive, __a, __b, __p)
-#define vmulltq_poly_m_p8(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p8(__inactive, __a, __b, __p)
-#define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p)
 #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
 #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
 #define vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p)
@@ -602,10 +588,6 @@
 #define viwdupq_x_wb_u8(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u8(__a, __b,  __imm, __p)
 #define viwdupq_x_wb_u16(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u16(__a, __b,  __imm, __p)
 #define viwdupq_x_wb_u32(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u32(__a, __b,  __imm, __p)
-#define vmullbq_poly_x_p8(__a, __b, __p) __arm_vmullbq_poly_x_p8(__a, __b, __p)
-#define vmullbq_poly_x_p16(__a, __b, __p) __arm_vmullbq_poly_x_p16(__a, __b, __p)
-#define vmulltq_poly_x_p8(__a, __b, __p) __arm_vmulltq_poly_x_p8(__a, __b, __p)
-#define vmulltq_poly_x_p16(__a, __b, __p) __arm_vmulltq_poly_x_p16(__a, __b, __p)
 #define vbicq_x_s8(__a, __b, __p) __arm_vbicq_x_s8(__a, __b, __p)
 #define vbicq_x_s16(__a, __b, __p) __arm_vbicq_x_s16(__a, __b, __p)
 #define vbicq_x_s32(__a, __b, __p) __arm_vbicq_x_s32(__a, __b, __p)
@@ -941,20 +923,6 @@ __arm_vbicq_s32 (int32x4_t __a, int32x4_t __b)
   return __builtin_mve_vbicq_sv4si (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_p8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmulltq_poly_pv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_p8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vmullbq_poly_pv16qi (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_u16 (uint16x8_t __a, const int __imm)
@@ -969,20 +937,6 @@ __arm_vbicq_n_s16 (int16x8_t __a, const int __imm)
   return __builtin_mve_vbicq_n_sv8hi (__a, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_p16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmulltq_poly_pv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_p16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vmullbq_poly_pv8hi (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_n_u32 (uint32x4_t __a, const int __imm)
@@ -1191,34 +1145,6 @@ __arm_vornq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pr
   return __builtin_mve_vornq_m_uv8hi (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_m_p8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_poly_m_pv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_m_p16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_poly_m_pv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_m_p8 (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_poly_m_pv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_m_p16 (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_poly_m_pv8hi (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -3130,34 +3056,6 @@ __arm_viwdupq_x_wb_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16
   return __res;
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_x_p8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_poly_m_pv16qi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_x_p16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmullbq_poly_m_pv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_x_p8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_poly_m_pv16qi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_x_p16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vmulltq_poly_m_pv8hi (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -5291,20 +5189,6 @@ __arm_vbicq (int32x4_t __a, int32x4_t __b)
  return __arm_vbicq_s32 (__a, __b);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmulltq_poly_p8 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vmullbq_poly_p8 (__a, __b);
-}
-
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint16x8_t __a, const int __imm)
@@ -5319,20 +5203,6 @@ __arm_vbicq (int16x8_t __a, const int __imm)
  return __arm_vbicq_n_s16 (__a, __imm);
 }
 
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmulltq_poly_p16 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vmullbq_poly_p16 (__a, __b);
-}
-
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq (uint32x4_t __a, const int __imm)
@@ -5501,34 +5371,6 @@ __arm_vornq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16
  return __arm_vornq_m_u16 (__inactive, __a, __b, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_poly_m_p8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_poly_m_p16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_m (uint16x8_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_poly_m_p8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_m (uint32x4_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_poly_m_p16 (__inactive, __a, __b, __p);
-}
-
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -7013,34 +6855,6 @@ __arm_viwdupq_x_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t
  return __arm_viwdupq_x_wb_u32 (__a, __b, __imm, __p);
 }
 
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_poly_x_p8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmullbq_poly_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmullbq_poly_x_p16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_poly_x_p8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmulltq_poly_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vmulltq_poly_x_p16 (__a, __b, __p);
-}
-
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vbicq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
@@ -8746,18 +8560,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
 
-#define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
-#define __arm_vmullbq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
 #define __arm_vbicq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int16x8_t]: __arm_vbicq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
@@ -9290,18 +9092,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
 
-#define __arm_vmulltq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
-#define __arm_vmullbq_poly(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_p8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_p16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
 #define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
@@ -9577,18 +9367,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \
   int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());})
 
-#define __arm_vmullbq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
-#define __arm_vmulltq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
 #define __arm_vornq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
@@ -9814,20 +9592,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
-#define __arm_vmulltq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
-#define __arm_vmullbq_poly_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_m_p8 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_m_p16 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
 #define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
@@ -9946,18 +9710,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
 
-#define __arm_vmullbq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmullbq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmullbq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
-#define __arm_vmulltq_poly_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vmulltq_poly_x_p8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmulltq_poly_x_p16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3));})
-
 #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
   int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq
  2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
                   ` (7 preceding siblings ...)
  2023-08-14 18:34 ` [PATCH 9/9] arm: [MVE intrinsics] rework vmullbq_poly vmulltq_poly Christophe Lyon
@ 2023-08-22 10:04 ` Kyrylo Tkachov
  8 siblings, 0 replies; 12+ messages in thread
From: Kyrylo Tkachov @ 2023-08-22 10:04 UTC (permalink / raw)
  To: Christophe Lyon, gcc-patches, Richard Earnshaw, Richard Sandiford

Hi Christophe,

> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@linaro.org>
> Sent: Monday, August 14, 2023 7:34 PM
> To: gcc-patches@gcc.gnu.org; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>;
> Richard Earnshaw <Richard.Earnshaw@arm.com>; Richard Sandiford
> <Richard.Sandiford@arm.com>
> Cc: Christophe Lyon <christophe.lyon@linaro.org>
> Subject: [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq
> 
> Factorize vmullbq, vmulltq so that they use the same parameterized
> names.
> 
> 2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/iterators.md (mve_insn): Add vmullb, vmullt.
> 	(isu): Add VMULLBQ_INT_S, VMULLBQ_INT_U, VMULLTQ_INT_S,
> 	VMULLTQ_INT_U.
> 	(supf): Add VMULLBQ_POLY_P, VMULLTQ_POLY_P,
> VMULLBQ_POLY_M_P,
> 	VMULLTQ_POLY_M_P.
> 	(VMULLBQ_INT, VMULLTQ_INT, VMULLBQ_INT_M, VMULLTQ_INT_M):
> Delete.
> 	(VMULLxQ_INT, VMULLxQ_POLY, VMULLxQ_INT_M,
> VMULLxQ_POLY_M): New.
> 	* config/arm/mve.md (mve_vmullbq_int_<supf><mode>)
> 	(mve_vmulltq_int_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_int_<supf><mode>) ... this.
> 	(mve_vmulltq_poly_p<mode>, mve_vmullbq_poly_p<mode>): Merge
> into ...
> 	(@mve_<mve_insn>q_poly_<supf><mode>): ... this.
> 	(mve_vmullbq_int_m_<supf><mode>,
> mve_vmulltq_int_m_<supf><mode>): Merge into ...
> 	(@mve_<mve_insn>q_int_m_<supf><mode>): ... this.
> 	(mve_vmullbq_poly_m_p<mode>, mve_vmulltq_poly_m_p<mode>):
> Merge into ...
> 	(@mve_<mve_insn>q_poly_m_<supf><mode>): ... this.

The series is okay and similar in design to your previous series in this area.
Thanks again for doing this rework.
Kyrill

> ---
>  gcc/config/arm/iterators.md |  23 +++++++--
>  gcc/config/arm/mve.md       | 100 ++++++++----------------------------
>  2 files changed, 38 insertions(+), 85 deletions(-)
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index b13ff53d36f..fb003bcd67b 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -917,6 +917,7 @@
> 
>  (define_int_attr mve_insn [
>  		 (UNSPEC_VCADD90 "vcadd") (UNSPEC_VCADD270 "vcadd")
> +		 (UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla")
> (UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
>  		 (UNSPEC_VCMUL "vcmul") (UNSPEC_VCMUL90 "vcmul")
> (UNSPEC_VCMUL180 "vcmul") (UNSPEC_VCMUL270 "vcmul")
>  		 (VABAVQ_P_S "vabav") (VABAVQ_P_U "vabav")
>  		 (VABAVQ_S "vabav") (VABAVQ_U "vabav")
> @@ -1044,6 +1045,13 @@
>  		 (VMOVNTQ_S "vmovnt") (VMOVNTQ_U "vmovnt")
>  		 (VMULHQ_M_S "vmulh") (VMULHQ_M_U "vmulh")
>  		 (VMULHQ_S "vmulh") (VMULHQ_U "vmulh")
> +		 (VMULLBQ_INT_M_S "vmullb") (VMULLBQ_INT_M_U
> "vmullb")
> +		 (VMULLBQ_INT_S "vmullb") (VMULLBQ_INT_U "vmullb")
> +		 (VMULLBQ_POLY_M_P "vmullb") (VMULLTQ_POLY_M_P
> "vmullt")
> +		 (VMULLBQ_POLY_P "vmullb")
> +		 (VMULLTQ_INT_M_S "vmullt") (VMULLTQ_INT_M_U
> "vmullt")
> +		 (VMULLTQ_INT_S "vmullt") (VMULLTQ_INT_U "vmullt")
> +		 (VMULLTQ_POLY_P "vmullt")
>  		 (VMULQ_M_N_S "vmul") (VMULQ_M_N_U "vmul")
> (VMULQ_M_N_F "vmul")
>  		 (VMULQ_M_S "vmul") (VMULQ_M_U "vmul") (VMULQ_M_F
> "vmul")
>  		 (VMULQ_N_S "vmul") (VMULQ_N_U "vmul") (VMULQ_N_F
> "vmul")
> @@ -1209,7 +1217,6 @@
>  		 (VSUBQ_M_N_S "vsub") (VSUBQ_M_N_U "vsub")
> (VSUBQ_M_N_F "vsub")
>  		 (VSUBQ_M_S "vsub") (VSUBQ_M_U "vsub") (VSUBQ_M_F
> "vsub")
>  		 (VSUBQ_N_S "vsub") (VSUBQ_N_U "vsub") (VSUBQ_N_F
> "vsub")
> -		 (UNSPEC_VCMLA "vcmla") (UNSPEC_VCMLA90 "vcmla")
> (UNSPEC_VCMLA180 "vcmla") (UNSPEC_VCMLA270 "vcmla")
>  		 ])
> 
>  (define_int_attr isu    [
> @@ -1246,6 +1253,8 @@
>  		 (VMOVNBQ_S "i") (VMOVNBQ_U "i")
>  		 (VMOVNTQ_M_S "i") (VMOVNTQ_M_U "i")
>  		 (VMOVNTQ_S "i") (VMOVNTQ_U "i")
> +		 (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u")
> +		 (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u")
>  		 (VNEGQ_M_S "s")
>  		 (VQABSQ_M_S "s")
>  		 (VQMOVNBQ_M_S "s") (VQMOVNBQ_M_U "u")
> @@ -2330,6 +2339,10 @@
>  		       (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
>  		       (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S
> "s")
>  		       (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U
> "u")
> +		       (VMULLBQ_POLY_P "p")
> +		       (VMULLTQ_POLY_P "p")
> +		       (VMULLBQ_POLY_M_P "p")
> +		       (VMULLTQ_POLY_M_P "p")
>  		       (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
>  		       (VMULQ_U "u")
>  		       (VQADDQ_N_S "s") (VQADDQ_N_U "u")
> @@ -2713,8 +2726,8 @@
>  (define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
>  (define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
>  (define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
> -(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
> -(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
> +(define_int_iterator VMULLxQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S
> VMULLTQ_INT_U VMULLTQ_INT_S])
> +(define_int_iterator VMULLxQ_POLY [VMULLBQ_POLY_P VMULLTQ_POLY_P])
>  (define_int_iterator VMULQ [VMULQ_U VMULQ_S])
>  (define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
>  (define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
> @@ -2815,7 +2828,8 @@
>  (define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
>  (define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
>  (define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
> -(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U
> VMULLBQ_INT_M_S])
> +(define_int_iterator VMULLxQ_INT_M [VMULLBQ_INT_M_U
> VMULLBQ_INT_M_S VMULLTQ_INT_M_U VMULLTQ_INT_M_S])
> +(define_int_iterator VMULLxQ_POLY_M [VMULLBQ_POLY_M_P
> VMULLTQ_POLY_M_P])
>  (define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
>  (define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
>  (define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
> @@ -2844,7 +2858,6 @@
>  (define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
>  (define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
>  (define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
> -(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S
> VMULLTQ_INT_M_U])
>  (define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
>  (define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
>  (define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index a2cbcff1a6f..2001e95a5f1 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -976,32 +976,18 @@
>  ])
> 
>  ;;
> -;; [vmullbq_int_u, vmullbq_int_s])
> +;; [vmullbq_int_u, vmullbq_int_s]
> +;; [vmulltq_int_u, vmulltq_int_s]
>  ;;
> -(define_insn "mve_vmullbq_int_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_int_<supf><mode>"
>    [
>     (set (match_operand:<V_double_width> 0 "s_register_operand"
> "<earlyclobber_32>")
>  	(unspec:<V_double_width> [(match_operand:MVE_2 1
> "s_register_operand" "w")
>  				  (match_operand:MVE_2 2
> "s_register_operand" "w")]
> -	 VMULLBQ_INT))
> +	 VMULLxQ_INT))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vmullb.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vmulltq_int_u, vmulltq_int_s])
> -;;
> -(define_insn "mve_vmulltq_int_<supf><mode>"
> -  [
> -   (set (match_operand:<V_double_width> 0 "s_register_operand"
> "<earlyclobber_32>")
> -	(unspec:<V_double_width> [(match_operand:MVE_2 1
> "s_register_operand" "w")
> -				  (match_operand:MVE_2 2
> "s_register_operand" "w")]
> -	 VMULLTQ_INT))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vmullt.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
> +  "<mve_insn>.<isu>%#<V_sz_elem>\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -1528,32 +1514,18 @@
>  ])
> 
>  ;;
> -;; [vmulltq_poly_p])
> -;;
> -(define_insn "mve_vmulltq_poly_p<mode>"
> -  [
> -   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
> -	(unspec:<V_double_width> [(match_operand:MVE_3 1
> "s_register_operand" "w")
> -				  (match_operand:MVE_3 2
> "s_register_operand" "w")]
> -	 VMULLTQ_POLY_P))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vmullt.p%#<V_sz_elem>\t%q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> -
> -;;
> -;; [vmullbq_poly_p])
> +;; [vmulltq_poly_p]
> +;; [vmullbq_poly_p]
>  ;;
> -(define_insn "mve_vmullbq_poly_p<mode>"
> +(define_insn "@mve_<mve_insn>q_poly_<supf><mode>"
>    [
>     (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
>  	(unspec:<V_double_width> [(match_operand:MVE_3 1
> "s_register_operand" "w")
>  				  (match_operand:MVE_3 2
> "s_register_operand" "w")]
> -	 VMULLBQ_POLY_P))
> +	 VMULLxQ_POLY))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vmullb.p%#<V_sz_elem>\t%q0, %q1, %q2"
> +  "<mve_insn>.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
>    [(set_attr "type" "mve_move")
>  ])
> 
> @@ -2816,36 +2788,20 @@
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vmullbq_int_m_u, vmullbq_int_m_s])
> +;; [vmullbq_int_m_u, vmullbq_int_m_s]
> +;; [vmulltq_int_m_s, vmulltq_int_m_u]
>  ;;
> -(define_insn "mve_vmullbq_int_m_<supf><mode>"
> +(define_insn "@mve_<mve_insn>q_int_m_<supf><mode>"
>    [
>     (set (match_operand:<V_double_width> 0 "s_register_operand"
> "<earlyclobber_32>")
>  	(unspec:<V_double_width> [(match_operand:<V_double_width> 1
> "s_register_operand" "0")
>  				  (match_operand:MVE_2 2
> "s_register_operand" "w")
>  				  (match_operand:MVE_2 3
> "s_register_operand" "w")
>  				  (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VMULLBQ_INT_M))
> +	 VMULLxQ_INT_M))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vmullbt.<supf>%#<V_sz_elem>	%q0, %q2, %q3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vmulltq_int_m_s, vmulltq_int_m_u])
> -;;
> -(define_insn "mve_vmulltq_int_m_<supf><mode>"
> -  [
> -   (set (match_operand:<V_double_width> 0 "s_register_operand"
> "<earlyclobber_32>")
> -	(unspec:<V_double_width> [(match_operand:<V_double_width> 1
> "s_register_operand" "0")
> -				  (match_operand:MVE_2 2
> "s_register_operand" "w")
> -				  (match_operand:MVE_2 3
> "s_register_operand" "w")
> -				  (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VMULLTQ_INT_M))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vmulltt.<supf>%#<V_sz_elem>	%q0, %q2, %q3"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> @@ -3006,36 +2962,20 @@
>     (set_attr "length""8")])
> 
>  ;;
> -;; [vmullbq_poly_m_p])
> +;; [vmullbq_poly_m_p]
> +;; [vmulltq_poly_m_p]
>  ;;
> -(define_insn "mve_vmullbq_poly_m_p<mode>"
> +(define_insn "@mve_<mve_insn>q_poly_m_<supf><mode>"
>    [
>     (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
>  	(unspec:<V_double_width> [(match_operand:<V_double_width> 1
> "s_register_operand" "0")
>  		       (match_operand:MVE_3 2 "s_register_operand" "w")
>  		       (match_operand:MVE_3 3 "s_register_operand" "w")
>  		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VMULLBQ_POLY_M_P))
> +	 VMULLxQ_POLY_M))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vpst\;vmullbt.p%#<V_sz_elem>\t%q0, %q2, %q3"
> -  [(set_attr "type" "mve_move")
> -   (set_attr "length""8")])
> -
> -;;
> -;; [vmulltq_poly_m_p])
> -;;
> -(define_insn "mve_vmulltq_poly_m_p<mode>"
> -  [
> -   (set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
> -	(unspec:<V_double_width> [(match_operand:<V_double_width> 1
> "s_register_operand" "0")
> -		       (match_operand:MVE_3 2 "s_register_operand" "w")
> -		       (match_operand:MVE_3 3 "s_register_operand" "w")
> -		       (match_operand:<MVE_VPRED> 4
> "vpr_register_operand" "Up")]
> -	 VMULLTQ_POLY_M_P))
> -  ]
> -  "TARGET_HAVE_MVE"
> -  "vpst\;vmulltt.p%#<V_sz_elem>\t%q0, %q2, %q3"
> +  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>\t%q0, %q2, %q3"
>    [(set_attr "type" "mve_move")
>     (set_attr "length""8")])
> 
> --
> 2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types
  2023-08-14 18:34 ` [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types Christophe Lyon
@ 2023-08-29  6:05   ` Prathamesh Kulkarni
  2023-08-29  6:44     ` Christophe Lyon
  0 siblings, 1 reply; 12+ messages in thread
From: Prathamesh Kulkarni @ 2023-08-29  6:05 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford

On Tue, 15 Aug 2023 at 00:05, Christophe Lyon via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Although they look like aliases for u8 and u16, we need to define them
> so that we can handle p8 and p16 suffixes with the general framework.
>
> They will be used by vmull[bt]q_poly intrinsics.
Hi Christophe,
It seems your patch committed in 9bae37ec8dc32027dedf9a32bf15754ebad6da38
broke arm bootstrap build due to Werror=missing-field-initializers:
https://ci.linaro.org/job/tcwg_bootstrap_build--master-arm-bootstrap-build/199/artifact/artifacts/notify/mail-body.txt/*view*/

I think this happens because the commit adds a new member to type_suffix_info:
-  unsigned int spare : 13;
+  /* True if the suffix is for a polynomial type.  */
+  unsigned int poly_p : 1;
+  unsigned int spare : 12;

but probably misses an initializer in arm-mve-builtins.cc:type_suffixes:
  { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false,
    0, VOIDmode }

Thanks,
Prathamesh
>
> 2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>
>
>         gcc/
>         * config/arm/arm-mve-builtins.cc (type_suffixes): Handle poly_p
>         field..
>         (TYPES_poly_8_16): New.
>         (poly_8_16): New.
>         * config/arm/arm-mve-builtins.def (p8): New type suffix.
>         (p16): Likewise.
>         * config/arm/arm-mve-builtins.h (enum type_class_index): Add
>         TYPE_poly.
>         (struct type_suffix_info): Add poly_p field.
> ---
>  gcc/config/arm/arm-mve-builtins.cc  | 6 ++++++
>  gcc/config/arm/arm-mve-builtins.def | 2 ++
>  gcc/config/arm/arm-mve-builtins.h   | 5 ++++-
>  3 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
> index 7eec9d2861c..fa8b0ad36b3 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -128,6 +128,7 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
>      TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
>      TYPE_##CLASS == TYPE_unsigned, \
>      TYPE_##CLASS == TYPE_float, \
> +    TYPE_##CLASS == TYPE_poly, \
>      0, \
>      MODE },
>  #include "arm-mve-builtins.def"
> @@ -177,6 +178,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
>  #define TYPES_all_signed(S, D) \
>    S (s8), S (s16), S (s32)
>
> +/* _p8 _p16.  */
> +#define TYPES_poly_8_16(S, D) \
> +  S (p8), S (p16)
> +
>  /* _u8 _u16 _u32.  */
>  #define TYPES_all_unsigned(S, D) \
>    S (u8), S (u16), S (u32)
> @@ -275,6 +280,7 @@ DEF_MVE_TYPES_ARRAY (integer_8);
>  DEF_MVE_TYPES_ARRAY (integer_8_16);
>  DEF_MVE_TYPES_ARRAY (integer_16_32);
>  DEF_MVE_TYPES_ARRAY (integer_32);
> +DEF_MVE_TYPES_ARRAY (poly_8_16);
>  DEF_MVE_TYPES_ARRAY (signed_16_32);
>  DEF_MVE_TYPES_ARRAY (signed_32);
>  DEF_MVE_TYPES_ARRAY (reinterpret_integer);
> diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-builtins.def
> index e3f37876210..e2cf1baf370 100644
> --- a/gcc/config/arm/arm-mve-builtins.def
> +++ b/gcc/config/arm/arm-mve-builtins.def
> @@ -63,6 +63,8 @@ DEF_MVE_TYPE_SUFFIX (u8, uint8x16_t, unsigned, 8, V16QImode)
>  DEF_MVE_TYPE_SUFFIX (u16, uint16x8_t, unsigned, 16, V8HImode)
>  DEF_MVE_TYPE_SUFFIX (u32, uint32x4_t, unsigned, 32, V4SImode)
>  DEF_MVE_TYPE_SUFFIX (u64, uint64x2_t, unsigned, 64, V2DImode)
> +DEF_MVE_TYPE_SUFFIX (p8, uint8x16_t, poly, 8, V16QImode)
> +DEF_MVE_TYPE_SUFFIX (p16, uint16x8_t, poly, 16, V8HImode)
>  #undef REQUIRES_FLOAT
>
>  #define REQUIRES_FLOAT true
> diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h
> index c9b51a0c77b..37b8223dfb2 100644
> --- a/gcc/config/arm/arm-mve-builtins.h
> +++ b/gcc/config/arm/arm-mve-builtins.h
> @@ -146,6 +146,7 @@ enum type_class_index
>    TYPE_float,
>    TYPE_signed,
>    TYPE_unsigned,
> +  TYPE_poly,
>    NUM_TYPE_CLASSES
>  };
>
> @@ -221,7 +222,9 @@ struct type_suffix_info
>    unsigned int unsigned_p : 1;
>    /* True if the suffix is for a floating-point type.  */
>    unsigned int float_p : 1;
> -  unsigned int spare : 13;
> +  /* True if the suffix is for a polynomial type.  */
> +  unsigned int poly_p : 1;
> +  unsigned int spare : 12;
>
>    /* The associated vector or predicate mode.  */
>    machine_mode vector_mode : 16;
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types
  2023-08-29  6:05   ` Prathamesh Kulkarni
@ 2023-08-29  6:44     ` Christophe Lyon
  0 siblings, 0 replies; 12+ messages in thread
From: Christophe Lyon @ 2023-08-29  6:44 UTC (permalink / raw)
  To: Prathamesh Kulkarni
  Cc: gcc-patches, Kyrylo.Tkachov, richard.earnshaw, richard.sandiford


[-- Attachment #1.1: Type: text/plain, Size: 4877 bytes --]

On Tue, 29 Aug 2023 at 08:06, Prathamesh Kulkarni <
prathamesh.kulkarni@linaro.org> wrote:

> On Tue, 15 Aug 2023 at 00:05, Christophe Lyon via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Although they look like aliases for u8 and u16, we need to define them
> > so that we can handle p8 and p16 suffixes with the general framework.
> >
> > They will be used by vmull[bt]q_poly intrinsics.
> Hi Christophe,
>

Hi Prathamesh,


> It seems your patch committed in 9bae37ec8dc32027dedf9a32bf15754ebad6da38
> broke arm bootstrap build due to Werror=missing-field-initializers:
>
> https://ci.linaro.org/job/tcwg_bootstrap_build--master-arm-bootstrap-build/199/artifact/artifacts/notify/mail-body.txt/*view*/
>
> I think this happens because the commit adds a new member to
> type_suffix_info:
> -  unsigned int spare : 13;
> +  /* True if the suffix is for a polynomial type.  */
> +  unsigned int poly_p : 1;
> +  unsigned int spare : 12;
>
> but probably misses an initializer in arm-mve-builtins.cc:type_suffixes:
>   { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false,
>     0, VOIDmode }
>
> Yeah, exactly. I had noticed this after sending the patch, but forgot to
fix it when I pushed the patch.

Fixed as obvious with the attached patch (r14-3538-gacaf9e333dbc2e).

Thanks,

Christophe


Thanks,
> Prathamesh
> >
> > 2023-08-14  Christophe Lyon  <christophe.lyon@linaro.org>
> >
> >         gcc/
> >         * config/arm/arm-mve-builtins.cc (type_suffixes): Handle poly_p
> >         field..
> >         (TYPES_poly_8_16): New.
> >         (poly_8_16): New.
> >         * config/arm/arm-mve-builtins.def (p8): New type suffix.
> >         (p16): Likewise.
> >         * config/arm/arm-mve-builtins.h (enum type_class_index): Add
> >         TYPE_poly.
> >         (struct type_suffix_info): Add poly_p field.
> > ---
> >  gcc/config/arm/arm-mve-builtins.cc  | 6 ++++++
> >  gcc/config/arm/arm-mve-builtins.def | 2 ++
> >  gcc/config/arm/arm-mve-builtins.h   | 5 ++++-
> >  3 files changed, 12 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/config/arm/arm-mve-builtins.cc
> b/gcc/config/arm/arm-mve-builtins.cc
> > index 7eec9d2861c..fa8b0ad36b3 100644
> > --- a/gcc/config/arm/arm-mve-builtins.cc
> > +++ b/gcc/config/arm/arm-mve-builtins.cc
> > @@ -128,6 +128,7 @@ CONSTEXPR const type_suffix_info
> type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
> >      TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
> >      TYPE_##CLASS == TYPE_unsigned, \
> >      TYPE_##CLASS == TYPE_float, \
> > +    TYPE_##CLASS == TYPE_poly, \
> >      0, \
> >      MODE },
> >  #include "arm-mve-builtins.def"
> > @@ -177,6 +178,10 @@ CONSTEXPR const type_suffix_info
> type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
> >  #define TYPES_all_signed(S, D) \
> >    S (s8), S (s16), S (s32)
> >
> > +/* _p8 _p16.  */
> > +#define TYPES_poly_8_16(S, D) \
> > +  S (p8), S (p16)
> > +
> >  /* _u8 _u16 _u32.  */
> >  #define TYPES_all_unsigned(S, D) \
> >    S (u8), S (u16), S (u32)
> > @@ -275,6 +280,7 @@ DEF_MVE_TYPES_ARRAY (integer_8);
> >  DEF_MVE_TYPES_ARRAY (integer_8_16);
> >  DEF_MVE_TYPES_ARRAY (integer_16_32);
> >  DEF_MVE_TYPES_ARRAY (integer_32);
> > +DEF_MVE_TYPES_ARRAY (poly_8_16);
> >  DEF_MVE_TYPES_ARRAY (signed_16_32);
> >  DEF_MVE_TYPES_ARRAY (signed_32);
> >  DEF_MVE_TYPES_ARRAY (reinterpret_integer);
> > diff --git a/gcc/config/arm/arm-mve-builtins.def
> b/gcc/config/arm/arm-mve-builtins.def
> > index e3f37876210..e2cf1baf370 100644
> > --- a/gcc/config/arm/arm-mve-builtins.def
> > +++ b/gcc/config/arm/arm-mve-builtins.def
> > @@ -63,6 +63,8 @@ DEF_MVE_TYPE_SUFFIX (u8, uint8x16_t, unsigned, 8,
> V16QImode)
> >  DEF_MVE_TYPE_SUFFIX (u16, uint16x8_t, unsigned, 16, V8HImode)
> >  DEF_MVE_TYPE_SUFFIX (u32, uint32x4_t, unsigned, 32, V4SImode)
> >  DEF_MVE_TYPE_SUFFIX (u64, uint64x2_t, unsigned, 64, V2DImode)
> > +DEF_MVE_TYPE_SUFFIX (p8, uint8x16_t, poly, 8, V16QImode)
> > +DEF_MVE_TYPE_SUFFIX (p16, uint16x8_t, poly, 16, V8HImode)
> >  #undef REQUIRES_FLOAT
> >
> >  #define REQUIRES_FLOAT true
> > diff --git a/gcc/config/arm/arm-mve-builtins.h
> b/gcc/config/arm/arm-mve-builtins.h
> > index c9b51a0c77b..37b8223dfb2 100644
> > --- a/gcc/config/arm/arm-mve-builtins.h
> > +++ b/gcc/config/arm/arm-mve-builtins.h
> > @@ -146,6 +146,7 @@ enum type_class_index
> >    TYPE_float,
> >    TYPE_signed,
> >    TYPE_unsigned,
> > +  TYPE_poly,
> >    NUM_TYPE_CLASSES
> >  };
> >
> > @@ -221,7 +222,9 @@ struct type_suffix_info
> >    unsigned int unsigned_p : 1;
> >    /* True if the suffix is for a floating-point type.  */
> >    unsigned int float_p : 1;
> > -  unsigned int spare : 13;
> > +  /* True if the suffix is for a polynomial type.  */
> > +  unsigned int poly_p : 1;
> > +  unsigned int spare : 12;
> >
> >    /* The associated vector or predicate mode.  */
> >    machine_mode vector_mode : 16;
> > --
> > 2.34.1
> >
>

[-- Attachment #2: 0001-arm-Fix-bootstrap-add-missing-initializer-in-MVE-typ.patch --]
[-- Type: text/x-patch, Size: 1252 bytes --]

From acaf9e333dbc2eb811848c169f95ec7a8ca0e2e7 Mon Sep 17 00:00:00 2001
From: Christophe Lyon <christophe.lyon@linaro.org>
Date: Tue, 29 Aug 2023 06:35:06 +0000
Subject: [PATCH] arm: Fix bootstrap / add missing initializer in MVE
 type_suffixes

My recent patch r14-3519-g9bae37ec8dc320 (arm: [MVE intrinsics] add
support for p8 and p16 polynomial types) added a new member to
type_suffix_info, but I forgot to add the corresponding initializer to
type_suffixes.

Committed as obvious.

2023-08-29  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/arm-mve-builtins.cc (type_suffixes): Add missing
	initializer.
---
 gcc/config/arm/arm-mve-builtins.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index fa8b0ad36b3..02dc8fa9b73 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -132,7 +132,7 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
     0, \
     MODE },
 #include "arm-mve-builtins.def"
-  { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false,
+  { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false,
     0, VOIDmode }
 };
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-08-29  6:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-14 18:34 [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Christophe Lyon
2023-08-14 18:34 ` [PATCH 2/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull Christophe Lyon
2023-08-14 18:34 ` [PATCH 3/9] arm: [MVE intrinsics] add binary_widen shape Christophe Lyon
2023-08-14 18:34 ` [PATCH 4/9] arm: [MVE intrinsics] rework vmullbq_int vmulltq_int Christophe Lyon
2023-08-14 18:34 ` [PATCH 5/9] arm: [MVE intrinsics] add support for p8 and p16 polynomial types Christophe Lyon
2023-08-29  6:05   ` Prathamesh Kulkarni
2023-08-29  6:44     ` Christophe Lyon
2023-08-14 18:34 ` [PATCH 6/9] arm: [MVE intrinsics] add support for U and p formats in parse_element_type Christophe Lyon
2023-08-14 18:34 ` [PATCH 7/9] arm: [MVE intrinsics] add binary_widen_poly shape Christophe Lyon
2023-08-14 18:34 ` [PATCH 8/9] arm: [MVE intrinsics] add unspec_mve_function_exact_insn_vmull_poly Christophe Lyon
2023-08-14 18:34 ` [PATCH 9/9] arm: [MVE intrinsics] rework vmullbq_poly vmulltq_poly Christophe Lyon
2023-08-22 10:04 ` [PATCH 1/9] arm: [MVE intrinsics] factorize vmullbq vmulltq Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).