public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][committed] arm: Implement ACLE Data Intrinsics
@ 2023-05-25 13:58 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2023-05-25 13:58 UTC (permalink / raw)
  To: gcc-patches; +Cc: Chris Sidebottom

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

Hi all,

This patch implements a number of scalar data processing intrinsics from ACLE
that were requested by some users. Some of these have fast single-instruction
sequences for Armv6 and later, but even for earlier versions they can still emit
an inline sequence or a call to libgcc (and ACLE recommends them being unconditionally
available).

Chris Sidebottom wrote most of the patch, I just cleaned it up, wired up some builtins
and adjusted the tests.

Bootstrapped and tested on arm-none-linux-gnueabihf.
Pushing to trunk.
Thanks,
Kyrill

Co-authored-by: Chris Sidebottom <chris.sidebottom@arm.com>

gcc/ChangeLog:

2023-05-24  Chris Sidebottom  <chris.sidebottom@arm.com>
            Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

	* config/arm/arm.md (rbitsi2): Rename to...
	(arm_rbit): ... This.
	(ctzsi2): Adjust for the above.
	(arm_rev16si2): Convert to define_expand.
	(arm_rev16si2_alt1): New pattern.
	(arm_rev16si2_alt): Rename to...
	(*arm_rev16si2_alt2): ... This.
	* config/arm/arm_acle.h (__ror, __rorl, __rorll, __clz, __clzl, __clzll,
	__cls, __clsl, __clsll, __revsh, __rev, __revl, __revll, __rev16,
	__rev16l, __rev16ll, __rbit, __rbitl, __rbitll): Define intrinsics.
	* config/arm/arm_acle_builtins.def (rbit, rev16si2): Define builtins.

gcc/testsuite/ChangeLog:

	* gcc.target/arm/acle/data-intrinsics-armv6.c: New test.
	* gcc.target/arm/acle/data-intrinsics-assembly.c: New test.
	* gcc.target/arm/acle/data-intrinsics-rbit.c: New test.
	* gcc.target/arm/acle/data-intrinsics.c: New test.

[-- Attachment #2: arm-acle.patch --]
[-- Type: application/octet-stream, Size: 14138 bytes --]

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index cbfc4543531452b0708a38bdf4abf5105b54f8b7..3a39f7941e8a6e0814c9d0c54be7b63ecc5965a9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12180,7 +12180,7 @@ (define_insn "clzsi2"
   [(set_attr "predicable" "yes")
    (set_attr "type" "clz")])
 
-(define_insn "rbitsi2"
+(define_insn "arm_rbit"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] UNSPEC_RBIT))]
   "TARGET_32BIT && arm_arch_thumb2"
@@ -12200,7 +12200,7 @@ (define_insn_and_split "ctzsi2"
   "&& reload_completed"
   [(const_int 0)]
   "
-  emit_insn (gen_rbitsi2 (operands[0], operands[1]));
+  emit_insn (gen_arm_rbit (operands[0], operands[1]));
   emit_insn (gen_clzsi2 (operands[0], operands[0]));
   DONE;
 ")
@@ -12564,7 +12564,7 @@ (define_insn "*arm_rev16"
 ;; operations within an IOR/AND RTX, therefore we have two patterns matching
 ;; each valid permutation.
 
-(define_insn "arm_rev16si2"
+(define_insn "arm_rev16si2_alt1"
   [(set (match_operand:SI 0 "register_operand" "=l,l,r")
         (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r")
                                    (const_int 8))
@@ -12581,7 +12581,7 @@ (define_insn "arm_rev16si2"
    (set_attr "type" "rev")]
 )
 
-(define_insn "arm_rev16si2_alt"
+(define_insn "*arm_rev16si2_alt2"
   [(set (match_operand:SI 0 "register_operand" "=l,l,r")
         (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r")
                                      (const_int 8))
@@ -12598,6 +12598,18 @@ (define_insn "arm_rev16si2_alt"
    (set_attr "type" "rev")]
 )
 
+(define_expand "arm_rev16si2"
+  [(set (match_operand:SI 0 "s_register_operand")
+	(bswap:SI (match_operand:SI 1 "s_register_operand")))]
+  "arm_arch6"
+  {
+    rtx left = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), SImode);
+    rtx right = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), SImode);
+    emit_insn (gen_arm_rev16si2_alt1 (operands[0], operands[1], right, left));
+    DONE;
+  }
+)
+
 (define_expand "bswaphi2"
   [(set (match_operand:HI 0 "s_register_operand")
 	(bswap:HI (match_operand:HI 1 "s_register_operand")))]
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 4a5a6a8161646f031e7adf132b242747e43b9a98..0ebd250ab6231e745e4e61b3c92eadc46253d4cf 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -28,10 +28,74 @@
 #define _GCC_ARM_ACLE_H
 
 #include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define _GCC_ARM_ACLE_ROR_FN(NAME, TYPE)				  \
+__extension__ extern __inline TYPE					  \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))	  \
+NAME (TYPE __value, uint32_t __rotate)					  \
+{									  \
+  int __size = (int) sizeof (TYPE) * __CHAR_BIT__;                        \
+  __rotate = __rotate % __size;                                           \
+  return __value >> __rotate | __value << ((__size - __rotate) % __size); \
+}
+
+_GCC_ARM_ACLE_ROR_FN (__ror, uint32_t)
+_GCC_ARM_ACLE_ROR_FN (__rorl, unsigned long)
+_GCC_ARM_ACLE_ROR_FN (__rorll, uint64_t)
+
+#undef _GCC_ARM_ACLE_ROR_FN
+
+#define _GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \
+__extension__ extern __inline RTYPE				    \
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \
+__##NAME (ITYPE __value)
+
+#define _GCC_ARM_ACLE_DATA_ALIAS(NAME, BUILTIN, ITYPE, RTYPE)	    \
+_GCC_ARM_ACLE_DATA_FN(NAME, ITYPE, RTYPE) \
+{                                         \
+  return __builtin_##BUILTIN (__value);   \
+}
+
+_GCC_ARM_ACLE_DATA_ALIAS (clz, clz, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clzl, clzl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clzll, clzll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (cls, clrsb, uint32_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clsl, clrsbl, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (clsll, clrsbll, uint64_t, unsigned int)
+_GCC_ARM_ACLE_DATA_ALIAS (revsh, bswap16, int16_t, int16_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rev, bswap32, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (revl, bswap32, unsigned long, unsigned long)
+_GCC_ARM_ACLE_DATA_ALIAS (revll, bswap64, uint64_t, uint64_t)
+#if __ARM_ARCH >= 6
+_GCC_ARM_ACLE_DATA_ALIAS (rev16, arm_rev16si2, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rev16l, arm_rev16si2, unsigned long, unsigned long)
+#else
+_GCC_ARM_ACLE_DATA_FN(rev16, uint32_t, uint32_t) {
+  return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8);
+}
+_GCC_ARM_ACLE_DATA_FN(rev16l, unsigned long, unsigned long) {
+  return ((__value & 0xff00ff) << 8 | (__value & 0xff00ff00) >> 8);
+}
+#endif
+_GCC_ARM_ACLE_DATA_FN(rev16ll, uint64_t, uint64_t) {
+  return __rev16l(__value) | (uint64_t)__rev16l(__value >> 32) << 32;
+}
+
+#if __ARM_ARCH_6T2__ ||  __ARM_ARCH >= 7
+_GCC_ARM_ACLE_DATA_ALIAS (rbit, arm_rbit, uint32_t, uint32_t)
+_GCC_ARM_ACLE_DATA_ALIAS (rbitl, arm_rbit, unsigned long, unsigned int)
+_GCC_ARM_ACLE_DATA_FN(rbitll, uint64_t, uint64_t) {
+  return ((uint64_t)__rbit(__value) << 32) | __rbit(__value >> 32);
+}
+#endif
+
+#undef _GCC_ARM_ACLE_DATA_ALIAS
+#undef _GCC_ARM_ACLE_DATA_FN
+
 #if (!__thumb__ || __thumb2__) &&  __ARM_ARCH >= 4
 __extension__ static __inline void __attribute__ ((__always_inline__))
 __arm_cdp (const unsigned int __coproc, const unsigned int __opc1,
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 48332eb64fddd94094bbb07926d452750baa3bcf..ff1afff576d993d7033b667fda1884261155f685 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -117,3 +117,6 @@ VAR1 (BINOP, smuadx, si)
 
 VAR1 (SAT_BINOP_UNSIGNED_IMM, ssat16, si)
 VAR1 (SAT_BINOP_UNSIGNED_IMM, usat16, si)
+
+VAR1 (BSWAP, rbit, si)
+VAR1 (BSWAP, rev16si2, si)
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c
new file mode 100644
index 0000000000000000000000000000000000000000..aafdff35ceeab109d53ad92fa1f954e6d3b09dd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-armv6.c
@@ -0,0 +1,28 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-add-options arm_arch_v6t2 } */
+
+#include "arm_acle.h"
+
+volatile uint32_t clz_in = 0x1234;
+volatile uint32_t rev_in = 0x12345678;
+volatile uint64_t rev64_in = 0x1234567890abcdef;
+
+int
+main (int argc, char **argv)
+{
+  if (__clz(clz_in) != 19) { __builtin_abort(); }
+  if (__clzl(clz_in) != 19) { __builtin_abort(); }
+  if (__clzll(clz_in) != 51) { __builtin_abort(); }
+  if (__cls(clz_in) != 18) { __builtin_abort(); }
+  if (__clsl(clz_in) != 18) { __builtin_abort(); }
+  if (__clsll(clz_in) != 50) { __builtin_abort(); }
+  if (__rev(rev_in) != 0x78563412) { __builtin_abort(); }
+  if (__revl(rev_in) != 0x78563412) { __builtin_abort(); }
+  if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); }
+  if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); }
+  if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); }
+  if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); }
+  if (__revsh(clz_in) != 0x3412) { __builtin_abort(); }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c
new file mode 100644
index 0000000000000000000000000000000000000000..3e066877a70b52f72e6745c0524b9d9f0222a5f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-assembly.c
@@ -0,0 +1,263 @@
+/* Test the ACLE data intrinsics get expanded to the correct instructions on a specific architecture  */
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_arch_v6_ok } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-add-options arm_arch_v6 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include "arm_acle.h"
+
+uint32_t *g32;
+unsigned long *gul;
+uint64_t *g64;
+unsigned int *gui;
+int16_t *g16;
+
+/*
+** test_clz:
+**	clz	r0, r0
+**	bx	lr
+*/
+
+unsigned int test_clz (uint32_t a)
+{
+  return __clz (a);
+}
+
+/*
+** test_clzl:
+**	clz	r0, r0
+**	bx	lr
+*/
+
+unsigned int test_clzl (unsigned long a)
+{
+  return __clzl (a);
+}
+
+/*
+** test_cls:
+** eor	(r[0-9]+), r0, r0, asr #31
+** clz	(r[0-9]+), \1
+** sub	r0, \2, #1
+** bx	lr
+*/
+
+unsigned int test_cls (uint32_t a)
+{
+  return __cls(a);
+}
+
+/*
+** test_clsl:
+** eor	(r[0-9]+), r0, r0, asr #31
+** clz	(r[0-9]+), \1
+** sub	r0, \2, #1
+** bx	lr
+*/
+
+unsigned int test_clsl (unsigned long a)
+{
+  return __clsl (a);
+}
+
+/*
+** test_rev:
+**	rev	r0, r0
+**	bx	lr
+*/
+
+uint32_t test_rev (uint32_t a)
+{
+  return __rev (a);
+}
+
+/*
+** test_revl:
+**	rev	r0, r0
+**	bx	lr
+*/
+
+unsigned long test_revl (unsigned long a)
+{
+  return __revl (a);
+}
+
+/*
+** test_revll:
+**  mov	(r[0-9]+), r0
+**  rev	r0, r1
+**	rev	r1, \1
+**	bx	lr
+*/
+
+uint64_t test_revll (uint64_t a)
+{
+  return __revll (a);
+}
+
+/*
+** test_ror:
+**	and	(r[0-9]+), r1, #31
+**	ror	r0, r0, \1
+**	bx	lr
+*/
+
+uint32_t test_ror (uint32_t a, uint32_t r)
+{
+  return __ror (a, r);
+}
+
+/*
+** test_rorl:
+**	and	(r[0-9]+), r1, #31
+**	ror	r0, r0, \1
+**	bx	lr
+*/
+
+unsigned long test_rorl (unsigned long a, uint32_t r)
+{
+  return __rorl (a, r);
+}
+
+/*
+** test_revsh:
+**	revsh	r0, r0
+**	bx	lr
+*/
+
+int16_t test_revsh (int16_t a)
+{
+  return __revsh (a);
+}
+
+/*
+** test_clz_mem:
+**	...
+**	clz	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_clz_mem (uint32_t *a)
+{
+  *gui = __clz (*a);
+}
+
+/*
+** test_clzl_mem:
+**	...
+**	clz	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_clzl_mem (unsigned long *a)
+{
+  *gui = __clzl (*a);
+}
+
+/*
+** test_cls_mem:
+**	...
+**	clz	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_cls_mem (uint32_t *a)
+{
+  *gui = __cls (*a);
+}
+
+/*
+** test_clsl_mem:
+**	...
+**	clz	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_clsl_mem (unsigned long *a)
+{
+  *gui = __clsl (*a);
+}
+
+/*
+** test_rev_mem:
+**	...
+**	rev	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_rev_mem (uint32_t *a)
+{
+  *g32 = __rev (*a);
+}
+
+/*
+** test_revl_mem:
+**	...
+**	rev	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_revl_mem (unsigned long *a)
+{
+  *gul = __revl (*a);
+}
+
+/*
+** test_revll_mem:
+**	...
+**	rev	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_revll_mem (uint64_t *a)
+{
+  *g64 = __revll (*a);
+}
+
+/*
+** test_ror_mem:
+**	...
+**	ror	r[0-9]+, r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_ror_mem (uint32_t *a, uint32_t *r)
+{
+  *g32 = __ror (*a, *r);
+}
+
+/*
+** test_rorl_mem:
+**	...
+**	ror	r[0-9]+, r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_rorl_mem (unsigned long *a, uint32_t *r)
+{
+  *gul = __rorl (*a, *r);
+}
+
+/*
+** test_revsh_mem:
+**	...
+**	rev16	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_revsh_mem (int16_t *a)
+{
+  *g16 = __revsh (*a);
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1fe274b5ce951d975e1f5e7c2b188afefa2a245
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics-rbit.c
@@ -0,0 +1,98 @@
+/* Test the ACLE data intrinsics existence for specific instruction.  */
+/* { dg-do run } */
+/* { dg-require-effective-target arm_arch_v6t2_ok } */
+/* { dg-additional-options "--save-temps -O1" } */
+/* { dg-add-options arm_arch_v6t2 } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include "arm_acle.h"
+
+extern void abort (void);
+
+uint32_t *g32;
+unsigned long *gul;
+uint64_t *g64;
+
+/*
+** test_rbit:
+**	rbit	r0, r0
+**	bx	lr
+*/
+
+uint32_t test_rbit (uint32_t a)
+{
+  return __rbit (a);
+}
+
+/*
+** test_rbitl:
+**	rbit	r0, r0
+**	bx	lr
+*/
+
+unsigned long test_rbitl (unsigned long a)
+{
+  return __rbitl (a);
+}
+
+/*
+** test_rbitll:
+** mov	(r[0-9]+), r0
+** rbit	r0, r1
+** rbit	r1, \1
+** bx	lr
+*/
+
+uint64_t test_rbitll (uint64_t a)
+{
+  return __rbitll (a);
+}
+
+/*
+** test_rbit_mem:
+**	...
+**	rbit	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_rbit_mem (uint32_t *a)
+{
+  *g32 = __rbit (*a);
+}
+
+/*
+** test_rbitl_mem:
+**	...
+**	rbit	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_rbitl_mem (unsigned long *a)
+{
+  *gul = __rbitl (*a);
+}
+
+/*
+** test_rbitll_mem:
+**	...
+**	rbit	r[0-9]+, r[0-9]+
+**	...
+**	bx	lr
+*/
+
+void test_rbitll_mem (uint64_t *a)
+{
+  *g64 = __rbitll (*a);
+}
+
+int
+main (int argc, char **argv)
+{
+  if (__rbit(0x12345678) != 0x1e6a2c48) { abort(); }
+  if (__rbitl(0x12345678) != 0x1e6a2c48) { abort(); }
+  if (__rbitll(0x1234567890abcdef) != 0xf7b3d5091e6a2c48) { abort(); }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c
new file mode 100644
index 0000000000000000000000000000000000000000..5c05b34f1699c4f74da6d152f6a0a69423be48aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/data-intrinsics.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+
+#include "arm_acle.h"
+
+volatile uint32_t clz_in = 0x1234;
+volatile uint32_t rev_in = 0x12345678;
+volatile uint64_t rev64_in = 0x1234567890abcdef;
+
+int
+main (int argc, char **argv)
+{
+  if (__clz(clz_in) != 19) { __builtin_abort(); }
+  if (__clzl(clz_in) != 19) { __builtin_abort(); }
+  if (__clzll(clz_in) != 51) { __builtin_abort(); }
+  if (__cls(clz_in) != 18) { __builtin_abort(); }
+  if (__clsl(clz_in) != 18) { __builtin_abort(); }
+  if (__clsll(clz_in) != 50) { __builtin_abort(); }
+  if (__rev(rev_in) != 0x78563412) { __builtin_abort(); }
+  if (__revl(rev_in) != 0x78563412) { __builtin_abort(); }
+  if (__revll(rev64_in) != 0xefcdab9078563412) { __builtin_abort(); }
+  if (__rev16(rev_in) != 0x34127856) { __builtin_abort(); }
+  if (__rev16l(rev_in) != 0x34127856) { __builtin_abort(); }
+  if (__rev16ll(rev64_in) != 0x34127856ab90efcd) { __builtin_abort(); }
+  if (__revsh(clz_in) != 0x3412) { __builtin_abort(); }
+  return 0;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-05-25 13:58 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-25 13:58 [PATCH][committed] arm: Implement ACLE Data Intrinsics Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).