public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][GCC][AArch64] Dot Product NEON intrinsics [Patch (6/8)]
@ 2017-09-01 13:23 Tamar Christina
  2017-09-04 11:06 ` James Greenhalgh
  0 siblings, 1 reply; 2+ messages in thread
From: Tamar Christina @ 2017-09-01 13:23 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, james.greenhalgh, Richard.Earnshaw, Marcus.Shawcroft

[-- Attachment #1: Type: text/plain, Size: 804 bytes --]

Hi All,

This patch adds the Adv.SIMD intrinsics for Dot product.

Dot product is available from ARMv8.2-a and onwards.

Regtested and bootstrapped on aarch64-none-elf and no issues.

Ok for trunk?

gcc/
2017-09-01  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): New.
	(vdot_lane_u32, vdot_laneq_u32, vdotq_lane_u32, vdotq_laneq_u32): New.
	(vdot_lane_s32, vdot_laneq_s32, vdotq_lane_s32, vdotq_laneq_s32): New.

gcc/testsuite/
2017-09-01  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New.
	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New.
	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New.
	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New.

-- 

[-- Attachment #2: 7946-diff.patch --]
[-- Type: text/x-diff, Size: 7965 bytes --]

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index d7b30b0e5ee6144d543d354ce9978fe9c5d5ae73..96e740f91a7fb01d201c1badf08199a2a76cb483 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -31541,6 +31541,99 @@ vminnmvq_f16 (float16x8_t __a)
 
 #pragma GCC pop_options
 
+/* AdvSIMD Dot Product intrinsics.  */
+
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+dotprod")
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b)
+{
+  return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b)
+{
+  return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b)
+{
+  return __builtin_aarch64_sdotv8qi (__r, __a, __b);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b)
+{
+  return __builtin_aarch64_sdotv16qi (__r, __a, __b);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_lane_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_udot_lanev8qi_uuuus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline uint32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_laneq_u32 (uint32x2_t __r, uint8x8_t __a, uint8x16_t __b,
+		const int __index)
+{
+  return __builtin_aarch64_udot_laneqv8qi_uuuus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_lane_u32 (uint32x4_t __r, uint8x16_t __a, uint8x8_t __b,
+		const int __index)
+{
+  return __builtin_aarch64_udot_lanev16qi_uuuus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_laneq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b,
+		 const int __index)
+{
+  return __builtin_aarch64_udot_laneqv16qi_uuuus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_lane_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_sdot_lanev8qi (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdot_laneq_s32 (int32x2_t __r, int8x8_t __a, int8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_sdot_laneqv8qi (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index)
+{
+  return __builtin_aarch64_sdot_lanev16qi (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index)
+{
+  return __builtin_aarch64_sdot_laneqv16qi (__r, __a, __b, __index);
+}
+#pragma GCC pop_options
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c
new file mode 100644
index 0000000000000000000000000000000000000000..f75503e1ef52a215b91538dad243b51d88b99c00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c
@@ -0,0 +1,74 @@
+/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#include <arm_neon.h>
+
+/* Unsigned Dot Product instructions.  */
+
+uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+  return vdot_u32 (r, x, y);
+}
+
+uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y)
+{
+  return vdotq_u32 (r, x, y);
+}
+
+uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y)
+{
+  return vdot_lane_u32 (r, x, y, 0);
+}
+
+uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y)
+{
+  return vdot_laneq_u32 (r, x, y, 0);
+}
+
+uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y)
+{
+  return vdotq_lane_u32 (r, x, y, 0);
+}
+
+uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y)
+{
+  return vdotq_laneq_u32 (r, x, y, 0);
+}
+
+/* Signed Dot Product instructions.  */
+
+int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+  return vdot_s32 (r, x, y);
+}
+
+int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y)
+{
+  return vdotq_s32 (r, x, y);
+}
+
+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y)
+{
+  return vdot_lane_s32 (r, x, y, 0);
+}
+
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y)
+{
+  return vdot_laneq_s32 (r, x, y, 0);
+}
+
+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y)
+{
+  return vdotq_lane_s32 (r, x, y, 0);
+}
+
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y)
+{
+  return vdotq_laneq_s32 (r, x, y, 0);
+}
+
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]}  4 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b}  2 } } */
+/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]}  4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h
new file mode 100644
index 0000000000000000000000000000000000000000..90b00aff95cfef96d1963be17673dc191cc71169
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h
@@ -0,0 +1,15 @@
+TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+__attribute__ ((noinline)) int
+foo1(int len) {
+  int i;
+  TYPE int result = 0;
+  TYPE short prod;
+
+  for (i=0; i<len; i++) {
+    prod = X[i] * Y[i];
+    result += prod;
+  }
+  return result;
+}
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c840ffc94f4adfb0d36cbd899958435235e20b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#define N 64
+#define TYPE signed
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {sdot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c
new file mode 100644
index 0000000000000000000000000000000000000000..eff97a0c8ef312ba2db2b68b48cd0a8348719cd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */
+
+#define N 64
+#define TYPE unsigned
+
+#include "vect-dot-qi.h"
+
+/* { dg-final { scan-assembler-times {udot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 4 } } */
\ No newline at end of file


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH][GCC][AArch64] Dot Product NEON intrinsics [Patch (6/8)]
  2017-09-01 13:23 [PATCH][GCC][AArch64] Dot Product NEON intrinsics [Patch (6/8)] Tamar Christina
@ 2017-09-04 11:06 ` James Greenhalgh
  0 siblings, 0 replies; 2+ messages in thread
From: James Greenhalgh @ 2017-09-04 11:06 UTC (permalink / raw)
  To: Tamar Christina; +Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft

On Fri, Sep 01, 2017 at 02:22:55PM +0100, Tamar Christina wrote:
> Hi All,
> 
> This patch adds the Adv.SIMD intrinsics for Dot product.
> 
> Dot product is available from ARMv8.2-a and onwards.
> 
> Regtested and bootstrapped on aarch64-none-elf and no issues.
> 
> Ok for trunk?

OK.

Thanks,
James

> 
> gcc/
> 2017-09-01  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* config/aarch64/arm_neon.h (vdot_u32, vdotq_u32, vdot_s32, vdotq_s32): New.
> 	(vdot_lane_u32, vdot_laneq_u32, vdotq_lane_u32, vdotq_laneq_u32): New.
> 	(vdot_lane_s32, vdot_laneq_s32, vdotq_lane_s32, vdotq_laneq_s32): New.
> 
> gcc/testsuite/
> 2017-09-01  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h: New.
> 	* gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c: New.
> 	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-s8.c: New.
> 	* gcc.target/aarch64/advsimd-intrinsics/vect-dot-u8.c: New.
> 
> -- 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-09-04 11:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-01 13:23 [PATCH][GCC][AArch64] Dot Product NEON intrinsics [Patch (6/8)] Tamar Christina
2017-09-04 11:06 ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).