* [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.
@ 2014-08-19 17:02 Alan Lawrence
2014-09-02 16:36 ` Marcus Shawcroft
0 siblings, 1 reply; 2+ messages in thread
From: Alan Lawrence @ 2014-08-19 17:02 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 771 bytes --]
This patch adds the missing vrbit_p8 and vrbitq_p8 intrinsics to arm_neon.h, and
implements all the vrbit(q?)_[psu]8 intrinsics using a new builtin, rather than
the previous temporary asm. Also adds a testcase checking (a) execution results
and (b) that we output rbit vXX.8b,vYY.8b or corresponding with .16b.
Tested on aarch64-none-elf and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
* config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8):
Replace temporary asm with call to builtin.
(vrbit_p8, vrbitq_p8): New functions.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/vrbit_1.c: New test.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: rbit.patch --]
[-- Type: text/x-patch; name=rbit.patch, Size: 6097 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 268432cc117b7027ee9472fc5a4f9b1ea13bea0f..3b985b3176ff8bc50bd60105e8a2b1a983d54982 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -347,6 +347,8 @@
VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di)
+ BUILTIN_VB (UNOP, rbit, 0)
+
/* Implemented by
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
BUILTIN_VALL (BINOP, zip1, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1c32f0c4efa0e9b8e8bc06af726798f6aaecf39f..9997cdf8fd0269a0447edd8ce30515730d73d301 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -294,6 +294,15 @@
[(set_attr "type" "neon_rev<q>")]
)
+(define_insn "aarch64_rbit<mode>"
+ [(set (match_operand:VB 0 "register_operand" "=w")
+ (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
+ UNSPEC_RBIT))]
+ "TARGET_SIMD"
+ "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
+ [(set_attr "type" "neon_rbit")]
+)
+
(define_insn "*aarch64_mul3_elt<mode>"
[(set (match_operand:VMUL 0 "register_operand" "=w")
(mult:VMUL
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index d5d8c23acd75b6f2a4e8cd6cc4daca418372f883..626f418f1e6e49d4969119f43fd620d78b2c055a 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10477,50 +10477,6 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
result; \
})
-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
-vrbit_s8 (int8x8_t a)
-{
- int8x8_t result;
- __asm__ ("rbit %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
-vrbit_u8 (uint8x8_t a)
-{
- uint8x8_t result;
- __asm__ ("rbit %0.8b,%1.8b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
-vrbitq_s8 (int8x16_t a)
-{
- int8x16_t result;
- __asm__ ("rbit %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
-vrbitq_u8 (uint8x16_t a)
-{
- uint8x16_t result;
- __asm__ ("rbit %0.16b,%1.16b"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrecpe_u32 (uint32x2_t a)
{
@@ -20632,6 +20588,44 @@ vqsubd_u64 (uint64_t __a, uint64_t __b)
return __builtin_aarch64_uqsubdi_uuu (__a, __b);
}
+/* vrbit */
+
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vrbit_p8 (poly8x8_t __a)
+{
+ return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vrbit_s8 (int8x8_t __a)
+{
+ return __builtin_aarch64_rbitv8qi (__a);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vrbit_u8 (uint8x8_t __a)
+{
+ return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vrbitq_p8 (poly8x16_t __a)
+{
+ return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vrbitq_s8 (int8x16_t __a)
+{
+ return __builtin_aarch64_rbitv16qi (__a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vrbitq_u8 (uint8x16_t __a)
+{
+ return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
+}
+
/* vrecpe */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..77d13d48660e165cf113e3cd9c61d63ff2b4843e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps -fno-inline" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+uint64_t in1 = 0x0123456789abcdefULL;
+uint64_t expected1 = 0x80c4a2e691d5b3f7ULL;
+
+#define TEST8(BASETYPE, SUFFIX) \
+void test8_##SUFFIX () \
+{ \
+ BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \
+ uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \
+ if (res != expected1) abort (); \
+}
+
+uint64_t in2 = 0xdeadbeefcafebabeULL;
+uint64_t expected2 = 0x7bb57df7537f5d7dULL;
+
+#define TEST16(BASETYPE, SUFFIX) \
+void test16_##SUFFIX () \
+{ \
+ BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \
+ vcreate_##SUFFIX (in2)); \
+ uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \
+ uint64_t res1 = vgetq_lane_u64 (res, 0); \
+ uint64_t res2 = vgetq_lane_u64 (res, 1); \
+ if (res1 != expected1 || res2 != expected2) abort (); \
+}
+
+TEST8 (poly, p8);
+TEST8 (int, s8);
+TEST8 (uint, u8);
+
+TEST16 (poly, p8);
+TEST16 (int, s8);
+TEST16 (uint, u8);
+
+int
+main (int argc, char **argv)
+{
+ test8_p8 ();
+ test8_s8 ();
+ test8_u8 ();
+ test16_p8 ();
+ test16_s8 ();
+ test16_u8 ();
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */
+/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */
+
+/* { dg-final { cleanup-saved-temps } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.
2014-08-19 17:02 [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests Alan Lawrence
@ 2014-09-02 16:36 ` Marcus Shawcroft
0 siblings, 0 replies; 2+ messages in thread
From: Marcus Shawcroft @ 2014-09-02 16:36 UTC (permalink / raw)
To: Alan Lawrence; +Cc: gcc-patches
On 19 August 2014 18:02, Alan Lawrence <alan.lawrence@arm.com> wrote:
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-simd.md (aarch64_rbit<mode>): New pattern.
> * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin.
>
> * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8,
> vrbitq_u8):
> Replace temporary asm with call to builtin.
> (vrbit_p8, vrbitq_p8): New functions.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/simd/vrbit_1.c: New test.
>
OK
/Marcus
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-09-02 16:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-19 17:02 [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests Alan Lawrence
2014-09-02 16:36 ` Marcus Shawcroft
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).