Re: [GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector, <us/su>dot - by element) for AArch64 AdvSIMD ARMv8.6 Extension

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Stam Markianos-Wright <Stam.Markianos-Wright@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	Richard Earnshaw	<Richard.Earnshaw@arm.com>,
	Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>,
	Marcus Shawcroft <Marcus.Shawcroft@arm.com>,
	Richard Sandiford	<Richard.Sandiford@arm.com>
Subject: Re: [GCC][PATCH][AArch64]Add ACLE intrinsics for dot product (usdot - vector, <us/su>dot - by element) for AArch64 AdvSIMD ARMv8.6 Extension
Date: Fri, 20 Dec 2019 13:44:00 -0000	[thread overview]
Message-ID: <d9c463b3-d99c-31d7-9479-57e1133cf316@arm.com> (raw)
In-Reply-To: <mptwob0zdwd.fsf@arm.com>

[-- Attachment #1: Type: text/plain, Size: 2666 bytes --]



On 12/13/19 11:02 AM, Richard Sandiford wrote:
> Stam Markianos-Wright <Stam.Markianos-Wright@arm.com> writes:
>> @@ -573,6 +586,44 @@
>>     [(set_attr "type" "neon_dot<q>")]
>>   )
>>   
>> +;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
>> +;; (by element) Dot Product operations.
>> +(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
>> +  [(set (match_operand:VS 0 "register_operand" "=w")
>> +	(plus:VS (match_operand:VS 1 "register_operand" "0")
>> +		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
>> +			    (match_operand:V8QI 3 "register_operand" "<h_con>")
>> +			    (match_operand:SI 4 "immediate_operand" "i")]
>> +		DOTPROD_I8MM)))]
>> +  "TARGET_SIMD && TARGET_I8MM"
>> +  {
>> +    int nunits = GET_MODE_NUNITS (V8QImode).to_constant ();
>> +    int lane = INTVAL (operands[4]);
>> +    operands[4]
>> +    =  gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
>> +    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
>> +  }
>> +  [(set_attr "type" "neon_dot<q>")]
>> +)
>> +
>> +(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
>> +  [(set (match_operand:VS 0 "register_operand" "=w")
>> +	(plus:VS (match_operand:VS 1 "register_operand" "0")
>> +		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
>> +			    (match_operand:V16QI 3 "register_operand" "<h_con>")
> 
> Using <h_con> seems a bit redundant when it's always "w" in this context,
> but either's fine.

Done!

> 
>> +			    (match_operand:SI 4 "immediate_operand" "i")]
>> +		DOTPROD_I8MM)))]
>> +  "TARGET_SIMD && TARGET_I8MM"
>> +  {
>> +    int nunits = GET_MODE_NUNITS (V16QImode).to_constant ();
>> +    int lane = INTVAL (operands[4]);
>> +    operands[4]
>> +    =  gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
> 
> Nit: = should be indented two spaces more, and there should be only
> one space afterwards.  But the statement fits on one line, so probably
> better not to have the line break at all.

I put put all onto one line.
> 
>> +    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
>> +  }
>> +  [(set_attr "type" "neon_dot<q>")]
>> +)
> 
> These two patterns can be merged using :VB for operand 3.

Merged them.

I also changed the tests to use the new check-function-bodies according to 
downstream comments.
This helps check that the assembler scans are done in the right order and 
ensures that the correct assembler was generated from the right function call 
(as opposed to "somewhere in the output file").

Hope this looks better :D

Cheers,
Stam
> 
> LGTM otherwise, thanks.
> 
> Richard
> 


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: i8mm-us-su-dot-aarch64_REV2.patch --]
[-- Type: text/x-patch; name="i8mm-us-su-dot-aarch64_REV2.patch", Size: 20956 bytes --]

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c35a1b1f0299ce5af8ca1a3df0209614f7bd0f25..6bd26889f2f26a9f82dd6d40f50125eaeee41740 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -107,6 +107,9 @@ enum aarch64_type_qualifiers
   /* Lane indices selected in pairs. - must be in range, and flipped for
      bigendian.  */
   qualifier_lane_pair_index = 0x800,
+  /* Lane indices selected in quadtuplets. - must be in range, and flipped for
+     bigendian.  */
+  qualifier_lane_quadtup_index = 0x1000,
 };
 
 typedef struct
@@ -173,6 +176,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned,
       qualifier_unsigned, qualifier_immediate };
 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
+#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
 
 
 static enum aarch64_type_qualifiers
@@ -191,6 +198,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_unsigned, qualifier_lane_index };
 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
 
+static enum aarch64_type_qualifiers
+aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_unsigned,
+      qualifier_none, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSUS_LANE_QUADTUP \
+	(aarch64_types_quadopssus_lane_quadtup_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none,
+      qualifier_unsigned, qualifier_lane_quadtup_index };
+#define TYPES_QUADOPSSSU_LANE_QUADTUP \
+	(aarch64_types_quadopsssu_lane_quadtup_qualifiers)
+
 static enum aarch64_type_qualifiers
 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
@@ -1260,6 +1280,7 @@ typedef enum
   SIMD_ARG_LANE_INDEX,
   SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
   SIMD_ARG_LANE_PAIR_INDEX,
+  SIMD_ARG_LANE_QUADTUP_INDEX,
   SIMD_ARG_STOP
 } builtin_simd_arg;
 
@@ -1349,9 +1370,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval,
 		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
 					  SImode);
 		}
-	      /* Fall through - if the lane index isn't a constant then
-		 the next case will error.  */
-	      /* FALLTHRU */
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
+	    case SIMD_ARG_LANE_QUADTUP_INDEX:
+	      /* Must be a previous operand into which this is an index and
+		 index is restricted to nunits / 4.  */
+	      gcc_assert (opc > 0);
+	      if (CONST_INT_P (op[opc]))
+		{
+		  machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
+		  unsigned int nunits
+		    = GET_MODE_NUNITS (vmode).to_constant ();
+		  aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
+		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
+		  int lane = INTVAL (op[opc]);
+		  op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
+					  SImode);
+		}
+	      /* If the lane index isn't a constant then error out.  */
+	      goto constant_arg;
 	    case SIMD_ARG_CONSTANT:
 constant_arg:
 	      if (!(*insn_data[icode].operand[opc].predicate)
@@ -1464,6 +1501,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
 	args[k] = SIMD_ARG_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
 	args[k] = SIMD_ARG_LANE_PAIR_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
+	args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
 	args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index f4ca35a59704c761fe2ac2b6d401fff7c8aba80d..651aab0f80fba5a40b5e3fa149f503acb6a48702 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -212,10 +212,15 @@
   /* Implemented by aarch64_<sur><dotprod>{_lane}{q}<dot_mode>.  */
   BUILTIN_VB (TERNOP, sdot, 0)
   BUILTIN_VB (TERNOPU, udot, 0)
+  BUILTIN_VB (TERNOP_SSUS, usdot, 0)
   BUILTIN_VB (QUADOP_LANE, sdot_lane, 0)
   BUILTIN_VB (QUADOPU_LANE, udot_lane, 0)
   BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0)
   BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0)
 
   /* Implemented by aarch64_fcadd<rot><mode>.   */
   BUILTIN_VHSDF (BINOP, fcadd90, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ad4676bc167f08951e693916c7ef796e3501762a..eba71f004ef67af654f9c512b720aa6cfdd1d7fc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -506,6 +506,19 @@
   [(set_attr "type" "neon_dot<q>")]
 )
 
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot
+;; (vector) Dot Product operation.
+(define_insn "aarch64_usdot<vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+	(plus:VS (match_operand:VS 1 "register_operand" "0")
+		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+			    (match_operand:<VSI2QI> 3 "register_operand" "w")]
+		UNSPEC_USDOT)))]
+  "TARGET_SIMD && TARGET_I8MM"
+  "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
+  [(set_attr "type" "neon_dot<q>")]
+)
+
 ;; These expands map to the Dot Product optab the vectorizer checks for.
 ;; The auto-vectorizer expects a dot product builtin that also does an
 ;; accumulation into the provided register.
@@ -573,6 +586,25 @@
   [(set_attr "type" "neon_dot<q>")]
 )
 
+;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
+;; (by element) Dot Product operations.
+(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
+  [(set (match_operand:VS 0 "register_operand" "=w")
+	(plus:VS (match_operand:VS 1 "register_operand" "0")
+		(unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
+			    (match_operand:VB 3 "register_operand" "w")
+			    (match_operand:SI 4 "immediate_operand" "i")]
+		DOTPROD_I8MM)))]
+  "TARGET_SIMD && TARGET_I8MM"
+  {
+    int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
+    int lane = INTVAL (operands[4]);
+    operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
+    return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
+  }
+  [(set_attr "type" "neon_dot<VS:q>")]
+)
+
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 8b861601a48b2150aa5768d717c61e0d1416747f..95b92dff69343e2b6c74174b39f3cd9d9838ddab 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34606,6 +34606,89 @@ vrnd64xq_f64 (float64x2_t __a)
 
 #pragma GCC pop_options
 
+/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+i8mm")
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_laneq_s32 \
+      (int32x2_t __r, uint8x8_t __a, int8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_lane_s32 \
+      (int32x4_t __r, uint8x16_t __a, int8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_laneq_s32 \
+     (int32x4_t __r, uint8x16_t __a, int8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_laneq_s32 \
+      (int32x2_t __r, int8x8_t __a, uint8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_lane_s32 \
+      (int32x4_t __r, int8x16_t __a, uint8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_laneq_s32 \
+      (int32x4_t __r, int8x16_t __a, uint8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
+}
+
+#pragma GCC pop_options
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 1ca5ed1ef1bc66a4ecb52ee240338f18fd560384..c288de6c3a5bb237318bfcc33924dd0e7788036b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -650,6 +650,8 @@
     UNSPEC_UMULHS	; Used in aarch64-sve2.md.
     UNSPEC_UMULHRS	; Used in aarch64-sve2.md.
     UNSPEC_ASRD		; Used in aarch64-sve.md.
+    UNSPEC_USDOT	; Used in aarch64-simd.md.
+    UNSPEC_SUDOT	; Used in aarch64-simd.md.
 ])
 
 ;; ------------------------------------------------------------------
@@ -1299,6 +1301,8 @@
 
 (define_mode_attr f16quad [(V2SF "") (V4SF "q")])
 
+(define_mode_attr isquadop [(V8QI "") (V16QI "q")])
+
 (define_code_attr f16mac [(plus "a") (minus "s")])
 
 ;; Map smax to smin and umax to umin.
@@ -1859,6 +1863,8 @@
 
 (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
 
+(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT])
+
 (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
 			       UNSPEC_SUBHN UNSPEC_RSUBHN])
 
@@ -2298,6 +2304,7 @@
 		      (UNSPEC_URSHL  "ur") (UNSPEC_SRSHL  "sr")
 		      (UNSPEC_UQRSHL  "u") (UNSPEC_SQRSHL  "s")
 		      (UNSPEC_SDOT "s") (UNSPEC_UDOT "u")
+		      (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su")
 ])
 
 (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c
new file mode 100755
index 0000000000000000000000000000000000000000..6a4ff054589b736c224bb2fabdcfa48439a8a420
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-1.c
@@ -0,0 +1,133 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**ufoo:
+**	...
+**	usdot\tv[0-9]+.2s, v[0-9]+.8b, v[0-9]+.8b
+**	...
+**	ret
+*/
+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq:
+**	...
+**	usdot\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b
+**	...
+**	ret
+*/
+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_s32 (r, x, y);
+}
+
+/*
+**ufoo_lane:
+**	...
+**	usdot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[0\]
+**	...
+**	ret
+*/
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**ufoo_laneq:
+**	...
+**	usdot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[2\]
+**	...
+**	ret
+*/
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**ufooq_lane:
+**	...
+**	usdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[1\]
+**	...
+**	ret
+*/
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**ufooq_laneq:
+**	...
+**	usdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[3\]
+**	...
+**	ret
+*/
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**	...
+**	sudot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[0\]
+**	...
+**	ret
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfoo_laneq:
+**	...
+**	sudot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[2\]
+**	...
+**	ret
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_lane:
+**	...
+**	sudot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[1\]
+**	...
+**	ret
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**sfooq_laneq:
+**	...
+**	sudot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[3\]
+**	...
+**	ret
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c
new file mode 100755
index 0000000000000000000000000000000000000000..f522bb21297089af27ce99764fcffeaf0930e563
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-2.c
@@ -0,0 +1,133 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "-mbig-endian --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/* Unsigned-Signed Dot Product instructions.  */
+
+/*
+**ufoo:
+**	...
+**	usdot\tv[0-9]+.2s, v[0-9]+.8b, v[0-9]+.8b
+**	...
+**	ret
+*/
+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_s32 (r, x, y);
+}
+
+/*
+**ufooq:
+**	...
+**	usdot\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b
+**	...
+**	ret
+*/
+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_s32 (r, x, y);
+}
+
+/*
+**ufoo_lane:
+**	...
+**	usdot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[0\]
+**	...
+**	ret
+*/
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  return vusdot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**ufoo_laneq:
+**	...
+**	usdot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[2\]
+**	...
+**	ret
+*/
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**ufooq_lane:
+**	...
+**	usdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[1\]
+**	...
+**	ret
+*/
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  return vusdotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**ufooq_laneq:
+**	...
+**	usdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[3\]
+**	...
+**	ret
+*/
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+
+/* Signed-Unsigned Dot Product instructions.  */
+
+/*
+**sfoo_lane:
+**	...
+**	sudot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[0\]
+**	...
+**	ret
+*/
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  return vsudot_lane_s32 (r, x, y, 0);
+}
+
+/*
+**sfoo_laneq:
+**	...
+**	sudot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[2\]
+**	...
+**	ret
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_lane:
+**	...
+**	sudot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[1\]
+**	...
+**	ret
+*/
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  return vsudotq_lane_s32 (r, x, y, 1);
+}
+
+/*
+**sfooq_laneq:
+**	...
+**	sudot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[3\]
+**	...
+**	ret
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c
new file mode 100755
index 0000000000000000000000000000000000000000..18ecabef8dc6b99872d71c8e412b6f4b4809e901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-3.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdot_lane_s32 (r, x, y, -1);
+}
+
+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vusdot_laneq_s32 (r, x, y, -1);
+}
+
+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vusdotq_lane_s32 (r, x, y, 2);
+}
+
+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vusdotq_laneq_s32 (r, x, y, 4);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..66c87d48694bad9624b491aec4cd1a38b75fbb95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile-3-4.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
+/* { dg-add-options arm_v8_2a_i8mm }  */
+/* { dg-additional-options "--save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudot_lane_s32 (r, x, y, -1);
+}
+
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vsudot_laneq_s32 (r, x, y, -1);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
+{
+  /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */
+  return vsudotq_lane_s32 (r, x, y, 2);
+}
+
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  return vsudotq_laneq_s32 (r, x, y, 4);
+}

next prev parent reply	other threads:[~2019-12-20 13:42 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-13 10:18 Stam Markianos-Wright
2019-12-13 11:03 ` Richard Sandiford
2019-12-20 13:44   ` Stam Markianos-Wright [this message]
2019-12-20 14:24     ` Richard Sandiford
2019-12-30  9:22       ` Stam Markianos-Wright
2019-12-30 10:29         ` Richard Sandiford
2020-01-09 14:48           ` Stam Markianos-Wright
2020-01-09 15:53             ` Richard Sandiford
2020-01-16 16:14               ` Stam Markianos-Wright

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d9c463b3-d99c-31d7-9479-57e1133cf316@arm.com \
    --to=stam.markianos-wright@arm.com \
    --cc=Kyrylo.Tkachov@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=Richard.Sandiford@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).