[gcc(refs/vendors/redhat/heads/gcc-8-branch)] AArch64: Implement missing

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/redhat/heads/gcc-8-branch)] AArch64: Implement missing _p64 intrinsics for vector permutes
@ 2021-04-23 10:14 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-04-23 10:14 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:380552f2bf017cc30479c8b6ba326281bbd09130

commit 380552f2bf017cc30479c8b6ba326281bbd09130
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Sep 23 11:07:50 2020 +0100

    AArch64: Implement missing _p64 intrinsics for vector permutes
    
    This patch implements some missing vector permute intrinsics operating on poly64x2_t types.
    They are implemented identically to their uint64x2_t brethren.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    
    gcc/
            PR target/71233
            * config/aarch64/arm_neon.h (vtrn1q_p64, vtrn2q_p64, vuzp1q_p64,
            vuzp2q_p64, vzip1q_p64, vzip2q_p64): Define.
    
    gcc/testsuite/
            PR target/71233
            * gcc.target/aarch64/simd/trn_zip_p64_1.c: New test.
    
    (cherry picked from commit e8e818399d70c5a5a3d30a54d305c6e2b92e2c66)

Diff:
---
 gcc/config/aarch64/arm_neon.h                      | 67 ++++++++++++++++++++++
 .../gcc.target/aarch64/simd/trn_zip_p64_1.c        | 44 ++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index d7dc4568d41..a0080b73cf1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -30680,6 +30680,17 @@ vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -30950,6 +30961,18 @@ vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtrn_f16 (float16x4_t __a, float16x4_t __b)
@@ -31596,6 +31619,17 @@ vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vuzp2_f16 (float16x4_t __a, float16x4_t __b)
@@ -31855,6 +31889,17 @@ vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (uzp)
 
 /* vzip */
@@ -32123,6 +32168,17 @@ vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vzip2_f16 (float16x4_t __a, float16x4_t __b)
@@ -32387,6 +32443,17 @@ vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (zip)
 
 #undef __INTERLEAVE_LIST
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c
new file mode 100644
index 00000000000..a47321db80b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+poly64x2_t
+foo (poly64x2_t a, poly64x2_t b)
+{
+  return vtrn1q_p64 (a, b);
+}
+
+poly64x2_t
+foo1 (poly64x2_t a, poly64x2_t b)
+{
+  return vtrn2q_p64 (a, b);
+}
+
+poly64x2_t
+foo2 (poly64x2_t a, poly64x2_t b)
+{
+  return vuzp1q_p64 (a, b);
+}
+
+poly64x2_t
+foo3 (poly64x2_t a, poly64x2_t b)
+{
+  return vuzp2q_p64 (a, b);
+}
+
+poly64x2_t
+foo4 (poly64x2_t a, poly64x2_t b)
+{
+  return vzip1q_p64 (a, b);
+}
+
+poly64x2_t
+foo5 (poly64x2_t a, poly64x2_t b)
+{
+  return vzip2q_p64 (a, b);
+}
+
+/* { dg-final { scan-assembler-times {zip1\tv0.2d, v0.2d, v1.2d} 3 } } */
+/* { dg-final { scan-assembler-times {zip2\tv0.2d, v0.2d, v1.2d} 3 } } */
+


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-04-23 10:14 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-23 10:14 [gcc(refs/vendors/redhat/heads/gcc-8-branch)] AArch64: Implement missing _p64 intrinsics for vector permutes Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).