public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/redhat/heads/gcc-8-branch)] Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a
@ 2020-09-28 9:15 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-09-28 9:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:e4e74854c72a7b5df543c23f0c1d03f9bd507633
commit e4e74854c72a7b5df543c23f0c1d03f9bd507633
Author: Tamar Christina <tamar.christina@arm.com>
Date: Mon May 21 10:33:30 2018 +0000
Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a
This patch adds the missing neon intrinsics for all 128 bit vector Integer modes for the
three-way XOR and negate and xor instructions for Arm8.2-a to Armv8.4-a.
gcc/
PR target/71233
* config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to
eor3q<mode>4.
(aarch64_bcaxqv8hi): Change to bcaxq<mode>4.
* config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* config/aarch64/arm_neon.h: Likewise.
* config/aarch64/iterators.md (VQ_I): New.
gcc/testsuite/
PR target/71233
* gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* gcc.target/aarch64/sha3_1.c: Likewise.
* gcc.target/aarch64/sha3_2.c: Likewise.
* gcc.target/aarch64/sha3_3.c: Likewise.
(cherry picked from commit d21052ebd7ac9d545a26dde3229c57f872c1d5f3)
Diff:
---
gcc/config/aarch64/aarch64-simd-builtins.def | 10 +--
gcc/config/aarch64/aarch64-simd.md | 28 ++++----
gcc/config/aarch64/arm_neon.h | 101 +++++++++++++++++++++++++++
gcc/config/aarch64/iterators.md | 3 +
gcc/testsuite/gcc.target/aarch64/sha3.h | 40 ++++++++---
gcc/testsuite/gcc.target/aarch64/sha3_1.c | 4 +-
gcc/testsuite/gcc.target/aarch64/sha3_2.c | 4 +-
gcc/testsuite/gcc.target/aarch64/sha3_3.c | 4 +-
8 files changed, 160 insertions(+), 34 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index f71a40ef118..fe3e392cfb9 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -614,14 +614,16 @@
VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
/* Implemented by aarch64_crypto_sha512su1qv2di. */
VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
- /* Implemented by aarch64_eor3qv8hi. */
- VAR1 (TERNOPU, eor3q, 0, v8hi)
+ /* Implemented by eor3q<mode>4. */
+ BUILTIN_VQ_I (TERNOPU, eor3q, 4)
+ BUILTIN_VQ_I (TERNOP, eor3q, 4)
/* Implemented by aarch64_rax1qv2di. */
VAR1 (BINOPU, rax1q, 0, v2di)
/* Implemented by aarch64_xarqv2di. */
VAR1 (TERNOPUI, xarq, 0, v2di)
- /* Implemented by aarch64_bcaxqv8hi. */
- VAR1 (TERNOPU, bcaxq, 0, v8hi)
+ /* Implemented by bcaxq<mode>4. */
+ BUILTIN_VQ_I (TERNOPU, bcaxq, 4)
+ BUILTIN_VQ_I (TERNOP, bcaxq, 4)
/* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>. */
VAR1 (TERNOP, fmlal_low, 0, v2sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 816d627ce9e..8cec46d30ca 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6045,13 +6045,13 @@
;; sha3
-(define_insn "aarch64_eor3qv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=w")
- (xor:V8HI
- (xor:V8HI
- (match_operand:V8HI 2 "register_operand" "%w")
- (match_operand:V8HI 3 "register_operand" "w"))
- (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "eor3q<mode>4"
+ [(set (match_operand:VQ_I 0 "register_operand" "=w")
+ (xor:VQ_I
+ (xor:VQ_I
+ (match_operand:VQ_I 2 "register_operand" "w")
+ (match_operand:VQ_I 3 "register_operand" "w"))
+ (match_operand:VQ_I 1 "register_operand" "w")))]
"TARGET_SIMD && TARGET_SHA3"
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
@@ -6081,13 +6081,13 @@
[(set_attr "type" "crypto_sha3")]
)
-(define_insn "aarch64_bcaxqv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=w")
- (xor:V8HI
- (and:V8HI
- (not:V8HI (match_operand:V8HI 3 "register_operand" "w"))
- (match_operand:V8HI 2 "register_operand" "w"))
- (match_operand:V8HI 1 "register_operand" "w")))]
+(define_insn "bcaxq<mode>4"
+ [(set (match_operand:VQ_I 0 "register_operand" "=w")
+ (xor:VQ_I
+ (and:VQ_I
+ (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
+ (match_operand:VQ_I 2 "register_operand" "w"))
+ (match_operand:VQ_I 1 "register_operand" "w")))]
"TARGET_SIMD && TARGET_SHA3"
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7bcd1e1c844..8932a7b0e67 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -33675,6 +33675,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
}
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+ return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c);
+}
+
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
@@ -33682,6 +33689,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
}
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+ return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+ return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c);
+}
+
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+ return __builtin_aarch64_eor3qv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return __builtin_aarch64_eor3qv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return __builtin_aarch64_eor3qv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+ return __builtin_aarch64_eor3qv2di (__a, __b, __c);
+}
+
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -33696,12 +33746,63 @@ vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
}
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
+{
+ return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c);
+}
+
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
{
return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
+{
+ return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
+{
+ return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
+{
+ return __builtin_aarch64_bcaxqv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
+{
+ return __builtin_aarch64_bcaxqv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
+{
+ return __builtin_aarch64_bcaxqv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
+{
+ return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
+}
+
+
#pragma GCC pop_options
#pragma GCC push_options
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 914a30aa77c..88176a1ed97 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -81,6 +81,9 @@
;; Quad vector modes.
(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
+;; Quad integer vector modes.
+(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
+
;; VQ without 2 element modes.
(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3.h b/gcc/testsuite/gcc.target/aarch64/sha3.h
index 76dd1931dff..c8537c25196 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3.h
+++ b/gcc/testsuite/gcc.target/aarch64/sha3.h
@@ -1,10 +1,26 @@
#include "arm_neon.h"
-uint16x8_t
-test_veor3q_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
-{
- return veor3q_u16 (a, b, c);
-}
+#define TEST_VEOR3(T, S) T \
+test_veor3q_ ## S (T a, T b, T c) \
+{ \
+ return veor3q_ ## S (a, b, c); \
+} \
+
+#define TEST_VBCAX(T, S) T \
+test_vbcaxq_ ## S (T a, T b, T c) \
+{ \
+ return vbcaxq_ ## S (a, b, c); \
+} \
+
+
+TEST_VEOR3 (uint8x16_t, u8)
+TEST_VEOR3 (uint16x8_t, u16)
+TEST_VEOR3 (uint32x4_t, u32)
+TEST_VEOR3 (uint64x2_t, u64)
+TEST_VEOR3 (int8x16_t, s8)
+TEST_VEOR3 (int16x8_t, s16)
+TEST_VEOR3 (int32x4_t, s32)
+TEST_VEOR3 (int64x2_t, s64)
uint64x2_t
test_vrax1q_u64 (uint64x2_t a, uint64x2_t b)
@@ -18,8 +34,12 @@ test_vxarq_u64 (uint64x2_t a, uint64x2_t b)
return vxarq_u64 (a, b, 15);
}
-uint16x8_t
-test_vbcaxq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
-{
- return vbcaxq_u16 (a, b, c);
-}
+TEST_VBCAX (uint8x16_t, u8)
+TEST_VBCAX (uint16x8_t, u16)
+TEST_VBCAX (uint32x4_t, u32)
+TEST_VBCAX (uint64x2_t, u64)
+TEST_VBCAX (int8x16_t, s8)
+TEST_VBCAX (int16x8_t, s16)
+TEST_VBCAX (int32x4_t, s32)
+TEST_VBCAX (int64x2_t, s64)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_1.c b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
index 879eadd875e..0727ce77028 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_1.c
@@ -4,7 +4,7 @@
#include "sha3.h"
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_2.c b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
index 2afe28c4744..2d051161133 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_2.c
@@ -3,7 +3,7 @@
#include "sha3.h"
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sha3_3.c b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
index 8915c805c3e..8d8ee77c293 100644
--- a/gcc/testsuite/gcc.target/aarch64/sha3_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sha3_3.c
@@ -3,7 +3,7 @@
#include "sha3.h"
-/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
-/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-09-28 9:15 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-28 9:15 [gcc(refs/vendors/redhat/heads/gcc-8-branch)] Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).