public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-235] aarch64: Use RTL builtins for polynomial vsri[q]_n intrinsics
@ 2021-04-28 20:15 Jonathan Wright
0 siblings, 0 replies; only message in thread
From: Jonathan Wright @ 2021-04-28 20:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:1d66367a71ef969235e10c77685f5ca4551bf519
commit r12-235-g1d66367a71ef969235e10c77685f5ca4551bf519
Author: Jonathan Wright <jonathan.wright@arm.com>
Date: Wed Feb 10 13:02:24 2021 +0000
aarch64: Use RTL builtins for polynomial vsri[q]_n intrinsics
Rewrite vsri[q]_n_p* Neon intrinsics to use RTL builtins rather than
inline assembly code, allowing for better scheduling and
optimization.
gcc/ChangeLog:
2021-02-10 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd-builtins.def: Add polynomial
ssri_n buitin generator macro.
* config/aarch64/arm_neon.h (vsri_n_p8): Use RTL builtin
instead of inline asm.
(vsri_n_p16): Likewise.
(vsri_n_p64): Likewise.
(vsriq_n_p8): Likewise.
(vsriq_n_p16): Likewise.
(vsriq_n_p64): Likewise.
Diff:
---
gcc/config/aarch64/aarch64-simd-builtins.def | 1 +
gcc/config/aarch64/arm_neon.h | 118 ++++++++++-----------------
2 files changed, 42 insertions(+), 77 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 534979133f4..86614e73b1b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -434,6 +434,7 @@
BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, NONE)
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, NONE)
+ BUILTIN_VALLP (SHIFTINSERTP, ssri_n, 0, NONE)
BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, NONE)
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0, NONE)
BUILTIN_VALLP (SHIFTINSERTP, ssli_n, 0, NONE)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 38a3a3ff01e..3536052e5d8 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9078,83 +9078,47 @@ vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
return __builtin_aarch64_ssli_nv8hi_ppps (__a, __b, __c);
}
-#define vsri_n_p8(a, b, c) \
- __extension__ \
- ({ \
- poly8x8_t b_ = (b); \
- poly8x8_t a_ = (a); \
- poly8x8_t result; \
- __asm__ ("sri %0.8b,%2.8b,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vsri_n_p16(a, b, c) \
- __extension__ \
- ({ \
- poly16x4_t b_ = (b); \
- poly16x4_t a_ = (a); \
- poly16x4_t result; \
- __asm__ ("sri %0.4h,%2.4h,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vsri_n_p64(a, b, c) \
- __extension__ \
- ({ \
- poly64x1_t b_ = (b); \
- poly64x1_t a_ = (a); \
- poly64x1_t result; \
- __asm__ ("sri %d0,%d2,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers. */); \
- result; \
- })
-
-#define vsriq_n_p8(a, b, c) \
- __extension__ \
- ({ \
- poly8x16_t b_ = (b); \
- poly8x16_t a_ = (a); \
- poly8x16_t result; \
- __asm__ ("sri %0.16b,%2.16b,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vsriq_n_p16(a, b, c) \
- __extension__ \
- ({ \
- poly16x8_t b_ = (b); \
- poly16x8_t a_ = (a); \
- poly16x8_t result; \
- __asm__ ("sri %0.8h,%2.8h,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers */); \
- result; \
- })
-
-#define vsriq_n_p64(a, b, c) \
- __extension__ \
- ({ \
- poly64x2_t b_ = (b); \
- poly64x2_t a_ = (a); \
- poly64x2_t result; \
- __asm__ ("sri %0.2d,%2.2d,%3" \
- : "=w"(result) \
- : "0"(a_), "w"(b_), "i"(c) \
- : /* No clobbers. */); \
- result; \
- })
+__extension__ extern __inline poly8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c)
+{
+ return __builtin_aarch64_ssri_nv8qi_ppps (__a, __b, __c);
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c)
+{
+ return __builtin_aarch64_ssri_nv4hi_ppps (__a, __b, __c);
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c)
+{
+ return (poly64x1_t) __builtin_aarch64_ssri_ndi_ppps (__a[0], __b[0], __c);
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c)
+{
+ return __builtin_aarch64_ssri_nv16qi_ppps (__a, __b, __c);
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c)
+{
+ return __builtin_aarch64_ssri_nv8hi_ppps (__a, __b, __c);
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c)
+{
+ return __builtin_aarch64_ssri_nv2di_ppps (__a, __b, __c);
+}
__extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-04-28 20:15 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-28 20:15 [gcc r12-235] aarch64: Use RTL builtins for polynomial vsri[q]_n intrinsics Jonathan Wright
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).