public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-9624] AArch32: correct usdot-product RTL patterns.
@ 2022-02-25 12:13 Tamar Christina
0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2022-02-25 12:13 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:900ea59b055f60a3c5219c8e6eb2f3359aaa854a
commit r11-9624-g900ea59b055f60a3c5219c8e6eb2f3359aaa854a
Author: Tamar Christina <tamar.christina@arm.com>
Date: Fri Feb 25 12:01:25 2022 +0000
AArch32: correct usdot-product RTL patterns.
There was a bug in the ACLE specication for dot product which has now
been fixed[1]. This means some intrinsics were missing and are added by this
patch.
[1] https://github.com/ARM-software/acle/releases/tag/r2021Q3
gcc/ChangeLog:
* config/arm/arm_neon.h (vusdotq_s32, vusdot_laneq_s32,
vusdotq_laneq_s32, vsudot_laneq_s32, vsudotq_laneq_s32): New
* config/arm/arm_neon_builtins.def (usdot): Add V16QI.
(usdot_laneq, sudot_laneq): New.
* config/arm/neon.md (neon_<sup>dot_laneq<vsi2qi>): New.
(neon_<sup>dot_lane<vsi2qi>): Remote unneeded code.
gcc/testsuite/ChangeLog:
* gcc.target/arm/simd/vdot-2-1.c: Add new tests.
* gcc.target/arm/simd/vdot-2-2.c: Likewise and fix output.
Diff:
---
gcc/config/arm/arm_neon.h | 39 ++++++++++++++++++
gcc/config/arm/arm_neon_builtins.def | 4 +-
gcc/config/arm/neon.md | 28 ++++++++++++-
gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c | 59 ++++++++++++++++++++++++++-
gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c | 60 +++++++++++++++++++++++++++-
5 files changed, 185 insertions(+), 5 deletions(-)
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index dc28b92b5af..5280b962d90 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -18803,6 +18803,13 @@ vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
return __builtin_neon_usdotv8qi_ssus (__r, __a, __b);
}
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
+{
+ return __builtin_neon_usdotv16qi_ssus (__r, __a, __b);
+}
+
__extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a,
@@ -18835,6 +18842,38 @@ vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a,
return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index);
}
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a,
+ int8x16_t __b, const int __index)
+{
+ return __builtin_neon_usdot_laneqv8qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a,
+ int8x16_t __b, const int __index)
+{
+ return __builtin_neon_usdot_laneqv16qi_ssuss (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a,
+ uint8x16_t __b, const int __index)
+{
+ return __builtin_neon_sudot_laneqv8qi_sssus (__r, __a, __b, __index);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a,
+ uint8x16_t __b, const int __index)
+{
+ return __builtin_neon_sudot_laneqv16qi_sssus (__r, __a, __b, __index);
+}
+
#pragma GCC pop_options
#pragma GCC pop_options
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 97e4f9c9be3..e2ce6602ac4 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -347,9 +347,11 @@ VAR2 (UTERNOP, udot, v8qi, v16qi)
VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi)
VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi)
-VAR1 (USTERNOP, usdot, v8qi)
+VAR2 (USTERNOP, usdot, v8qi, v16qi)
VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi)
VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi)
+VAR2 (USMAC_LANE_QUADTUP, usdot_laneq, v8qi, v16qi)
+VAR2 (SUMAC_LANE_QUADTUP, sudot_laneq, v8qi, v16qi)
VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf)
VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fec2cc91d24..43dd8626368 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3031,9 +3031,33 @@
DOTPROD_I8MM)
(match_operand:VCVTI 1 "register_operand" "0")))]
"TARGET_I8MM"
+ "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
+ [(set_attr "type" "neon_dot<q>")]
+)
+
+;; These instructions map to the __builtins for the Dot Product
+;; indexed operations in the v8.6 I8MM extension.
+(define_insn "neon_<sup>dot_laneq<vsi2qi>"
+ [(set (match_operand:VCVTI 0 "register_operand" "=w")
+ (plus:VCVTI
+ (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w")
+ (match_operand:V16QI 3 "register_operand" "t")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ DOTPROD_I8MM)
+ (match_operand:VCVTI 1 "register_operand" "0")))]
+ "TARGET_I8MM"
{
- operands[4] = GEN_INT (INTVAL (operands[4]));
- return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
+ int lane = INTVAL (operands[4]);
+ if (lane > GET_MODE_NUNITS (V2SImode) - 1)
+ {
+ operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode));
+ return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
+ }
+ else
+ {
+ operands[4] = GEN_INT (lane);
+ return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
+ }
}
[(set_attr "type" "neon_dot<q>")]
)
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
index 88b80cff232..35d713f6a60 100644
--- a/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
+++ b/gcc/testsuite/gcc.target/arm/simd/vdot-2-1.c
@@ -2,7 +2,7 @@
/* { dg-require-effective-target arm_hard_ok } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
-/* { dg-additional-options "-O -save-temps -mfloat-abi=hard" } */
+/* { dg-additional-options "-O -save-temps -mfloat-abi=hard -mfpu=auto" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_neon.h>
@@ -20,6 +20,17 @@ int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
return vusdot_s32 (r, x, y);
}
+/*
+**usfooq:
+** ...
+** vusdot\.s8 q0, q1, q2
+** bx lr
+*/
+int32x4_t usfooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+ return vusdotq_s32 (r, x, y);
+}
+
/*
**usfoo_lane:
** ...
@@ -66,6 +77,52 @@ int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
return vsudotq_lane_s32 (r, x, y, 1);
}
+/*
+**usfoo_laneq:
+** ...
+** vusdot\.s8 d0, d1, d3\[0\]
+** bx lr
+*/
+int32x2_t usfoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+ return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**usfooq_laneq:
+** ...
+** vusdot\.s8 q0, q1, d5\[1\]
+** bx lr
+*/
+int32x4_t usfooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+ return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+/* Signed-Unsigned Dot Product instructions. */
+
+/*
+**sfoo_laneq:
+** ...
+** vsudot\.u8 d0, d1, d3\[0\]
+** bx lr
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+ return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_laneq:
+** ...
+** vsudot\.u8 q0, q1, d5\[1\]
+** bx lr
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+ return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
/*
**usfoo_untied:
** ...
diff --git a/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c b/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
index 1c74718ca56..c57dd423dbc 100644
--- a/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
+++ b/gcc/testsuite/gcc.target/arm/simd/vdot-2-2.c
@@ -2,7 +2,7 @@
/* { dg-require-effective-target arm_hard_ok } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
-/* { dg-additional-options "-O -save-temps -mbig-endian -mfloat-abi=hard" } */
+/* { dg-additional-options "-O -save-temps -mfloat-abi=hard -mbig-endian -mfpu=auto" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_neon.h>
@@ -20,6 +20,17 @@ int32x2_t usfoo (int32x2_t r, uint8x8_t x, int8x8_t y)
return vusdot_s32 (r, x, y);
}
+/*
+**usfooq:
+** ...
+** vusdot\.s8 q0, q1, q2
+** bx lr
+*/
+int32x4_t usfooq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+ return vusdotq_s32 (r, x, y);
+}
+
/*
**usfoo_lane:
** ...
@@ -66,6 +77,52 @@ int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
return vsudotq_lane_s32 (r, x, y, 1);
}
+/*
+**usfoo_laneq:
+** ...
+** vusdot\.s8 d0, d1, d3\[0\]
+** bx lr
+*/
+int32x2_t usfoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
+{
+ return vusdot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**usfooq_laneq:
+** ...
+** vusdot\.s8 q0, q1, d5\[1\]
+** bx lr
+*/
+int32x4_t usfooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
+{
+ return vusdotq_laneq_s32 (r, x, y, 3);
+}
+
+/* Signed-Unsigned Dot Product instructions. */
+
+/*
+**sfoo_laneq:
+** ...
+** vsudot\.u8 d0, d1, d3\[0\]
+** bx lr
+*/
+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
+{
+ return vsudot_laneq_s32 (r, x, y, 2);
+}
+
+/*
+**sfooq_laneq:
+** ...
+** vsudot\.u8 q0, q1, d5\[1\]
+** bx lr
+*/
+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
+{
+ return vsudotq_laneq_s32 (r, x, y, 3);
+}
+
/*
**usfoo_untied:
** ...
@@ -89,3 +146,4 @@ int32x2_t usfoo_lane_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_
{
return vusdot_lane_s32 (r, x, y, 0);
}
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-02-25 12:13 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-25 12:13 [gcc r11-9624] AArch32: correct usdot-product RTL patterns Tamar Christina
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).