diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index d7b30b0e5ee6144d543d354ce9978fe9c5d5ae73..96e740f91a7fb01d201c1badf08199a2a76cb483 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -31541,6 +31541,99 @@ vminnmvq_f16 (float16x8_t __a) #pragma GCC pop_options +/* AdvSIMD Dot Product intrinsics. */ + +#pragma GCC push_options +#pragma GCC target ("arch=armv8.2-a+dotprod") + +__extension__ extern __inline uint32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b) +{ + return __builtin_aarch64_udotv8qi_uuuu (__r, __a, __b); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) +{ + return __builtin_aarch64_udotv16qi_uuuu (__r, __a, __b); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b) +{ + return __builtin_aarch64_sdotv8qi (__r, __a, __b); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) +{ + return __builtin_aarch64_sdotv16qi (__r, __a, __b); +} + +__extension__ extern __inline uint32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_lane_u32 (uint32x2_t __r, uint8x8_t __a, uint8x8_t __b, const int __index) +{ + return __builtin_aarch64_udot_lanev8qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline uint32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_laneq_u32 (uint32x2_t __r, uint8x8_t __a, uint8x16_t __b, + const int __index) +{ + return __builtin_aarch64_udot_laneqv8qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_lane_u32 (uint32x4_t __r, uint8x16_t __a, uint8x8_t __b, + const int __index) +{ + return __builtin_aarch64_udot_lanev16qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_laneq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b, + const int __index) +{ + return __builtin_aarch64_udot_laneqv16qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_lane_s32 (int32x2_t __r, int8x8_t __a, int8x8_t __b, const int __index) +{ + return __builtin_aarch64_sdot_lanev8qi (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_laneq_s32 (int32x2_t __r, int8x8_t __a, int8x16_t __b, const int __index) +{ + return __builtin_aarch64_sdot_laneqv8qi (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index) +{ + return __builtin_aarch64_sdot_lanev16qi (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index) +{ + return __builtin_aarch64_sdot_laneqv16qi (__r, __a, __b, __index); +} +#pragma GCC pop_options + #undef __aarch64_vget_lane_any #undef __aarch64_vdup_lane_any diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c new file mode 100644 index 0000000000000000000000000000000000000000..f75503e1ef52a215b91538dad243b51d88b99c00 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-compile.c @@ -0,0 +1,74 @@ +/* { dg-skip-if "can't compile on arm." { arm*-*-* } } */ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv8.2-a+dotprod" } */ + +#include + +/* Unsigned Dot Product instructions. */ + +uint32x2_t ufoo (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_u32 (r, x, y); +} + +uint32x4_t ufooq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_u32 (r, x, y); +} + +uint32x2_t ufoo_lane (uint32x2_t r, uint8x8_t x, uint8x8_t y) +{ + return vdot_lane_u32 (r, x, y, 0); +} + +uint32x2_t ufoo_laneq (uint32x2_t r, uint8x8_t x, uint8x16_t y) +{ + return vdot_laneq_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_lane (uint32x4_t r, uint8x16_t x, uint8x8_t y) +{ + return vdotq_lane_u32 (r, x, y, 0); +} + +uint32x4_t ufooq_laneq (uint32x4_t r, uint8x16_t x, uint8x16_t y) +{ + return vdotq_laneq_u32 (r, x, y, 0); +} + +/* Signed Dot Product instructions. */ + +int32x2_t sfoo (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_s32 (r, x, y); +} + +int32x4_t sfooq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_s32 (r, x, y); +} + +int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, int8x8_t y) +{ + return vdot_lane_s32 (r, x, y, 0); +} + +int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, int8x16_t y) +{ + return vdot_laneq_s32 (r, x, y, 0); +} + +int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, int8x8_t y) +{ + return vdotq_lane_s32 (r, x, y, 0); +} + +int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, int8x16_t y) +{ + return vdotq_laneq_s32 (r, x, y, 0); +} + +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.8b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.2s, v[0-9]+\.8b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.16b} 2 } } */ +/* { dg-final { scan-assembler-times {[us]dot\tv[0-9]+\.4s, v[0-9]+\.16b, v[0-9]+\.4b\[[0-9]+\]} 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h new file mode 100644 index 0000000000000000000000000000000000000000..90b00aff95cfef96d1963be17673dc191cc71169 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vect-dot-qi.h @@ -0,0 +1,15 @@ +TYPE char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +TYPE char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) int +foo1(int len) { + int i; + TYPE int result = 0; + TYPE short prod; + + for (i=0; i