* [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
@ 2016-11-07 13:55 Tamar Christina
2016-11-08 11:21 ` Christophe Lyon
0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-07 13:55 UTC (permalink / raw)
To: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov
Cc: nd
[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]
Hi all,
This patch (3 of 3) adds updates tests for the NEON intrinsics
added by the previous patches:
Ran regression tests on aarch64-none-linux-gnu
and on arm-none-linux-gnueabihf.
Ok for trunk?
Thanks,
Tamar
gcc/testsuite/
2016-11-04 Tamar Christina <tamar.christina@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/p64.c: New.
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(Poly64x1_t, Poly64x2_t): Added type.
(AARCH64_ONLY): Added macro.
* gcc.target/aarch64/advsimd-intrinsics/vcombine.c:
Added test for Poly64.
* gcc.target/aarch64/advsimd-intrinsics/vcreate.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vget_high.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vget_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vget_low.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
Added AArch64 flags.
* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
Added Aarch64 flags.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: gcc-poly64-3.patch --]
[-- Type: text/x-patch; name="gcc-poly64-3.patch", Size: 106997 bytes --]
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..174c1948221025b860aaac503354b406fa804007 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
VECT_VAR(expected, int, 16, 4) -> expected_int16x4
VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
*/
+/* Some instructions don't exist on ARM.
+ Use this macro to guard against them. */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
#define xSTR(X) #X
#define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@@ -197,11 +214,14 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
-#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+#define CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name,EXPECTED,comment) \
{ \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
@@ -228,6 +248,13 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
+ } \
+
/* Check results against EXPECTED. Operates on all possible vector types. */
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
@@ -398,6 +425,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@@ -413,6 +443,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@@ -438,6 +471,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+ DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@@ -473,6 +513,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@@ -481,6 +522,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@@ -491,6 +533,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@@ -499,6 +542,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@@ -531,6 +575,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+ MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@@ -601,13 +652,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
- MACRO(VAR1, VAR2, , poly, p, 16, 4)
+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
- MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c
new file mode 100644
index 0000000000000000000000000000000000000000..df66eaa749f7a1a34011d6d169d1262ba976c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c
@@ -0,0 +1,302 @@
+/* This file contains tests for the VLD{X}, VLD{X}_DUP and VSLI. */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* { dg-options "-march=armv8-a+crypto" } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
+/* Expected results: vld1. */
+VECT_VAR_DECL (vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+/* Expected results: vld1_dup. */
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
+/* Expected results: vldX. */
+VECT_VAR_DECL (vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vldX_dup. */
+VECT_VAR_DECL (vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vsli. */
+VECT_VAR_DECL (vsli_expected,poly,64,1) [] = { 0x10 };
+VECT_VAR_DECL (vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
+ 0x7ffffffffffff1 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+
+int main (void)
+{
+ int i;
+
+ /* vld1_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1/VLD1Q"
+
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = vld1##Q##_##T2##W (VECT_VAR (BUF, T1, W, N)); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_vector, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vld1_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ TEST_VLD1 (vld1_vector, buffer, , poly, p, 64, 1);
+ TEST_VLD1 (vld1_vector, buffer, q, poly, p, 64, 2);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
+
+ /* vld1_dup_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR (VAR, T1, W, N) = \
+ vld1##Q##_dup_##T2##W (&VECT_VAR (BUF, T1, W, N)[i]); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 1);
+ DECL_VARIABLE (vld1_dup_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i<3; i++)
+ {
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
+ TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
+
+ switch (i)
+ {
+ case 0:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
+ break;
+ case 1:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
+ break;
+ case 2:
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* vldX_p64 tests. */
+#define DECL_VLDX(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_##T2##W (VECT_ARRAY_VAR (buffer_vld##X, T1, W, N, X)); \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX (poly, 64, 1, 2);
+ DECL_VLDX (poly, 64, 1, 3);
+ DECL_VLDX (poly, 64, 1, 4);
+
+ VECT_ARRAY_INIT2 (buffer_vld2, poly, 64, 1);
+ PAD (buffer_vld2_pad, poly, 64, 1);
+ VECT_ARRAY_INIT3 (buffer_vld3, poly, 64, 1);
+ PAD (buffer_vld3_pad, poly, 64, 1);
+ VECT_ARRAY_INIT4 (buffer_vld4, poly, 64, 1);
+ PAD (buffer_vld4_pad, poly, 64, 1);
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2/VLD2Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
+
+ /* vldX_dup_p64 tests. */
+#define DECL_VLDX_DUP(T1, W, N, X) \
+ VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X); \
+ VECT_VAR_DECL (vldX_dup_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X) = \
+ vld##X##Q##_dup_##T2##W (&VECT_VAR (buffer_dup, T1, W, N)[0]); \
+ \
+ vst##X##Q##_##T2##W (VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X)); \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \
+ memcpy (VECT_VAR (result, T1, W, N), \
+ &(VECT_VAR (vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof (VECT_VAR (result, T1, W, N)));
+
+ DECL_VLDX_DUP (poly, 64, 1, 2);
+ DECL_VLDX_DUP (poly, 64, 1, 3);
+ DECL_VLDX_DUP (poly, 64, 1, 4);
+
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP (, poly, p, 64, 1, 4);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
+ CLEAN (result, poly, 64, 1);
+ TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+
+ /* vsli_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VSLI"
+
+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \
+ VECT_VAR (vsXi_vector_res, T1, W, N) = \
+ INSN##Q##_n_##T2##W (VECT_VAR (vsXi_vector, T1, W, N), \
+ VECT_VAR (vsXi_vector2, T1, W, N), \
+ V); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vsXi_vector_res, T1, W, N))
+
+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \
+ TEST_VSXI1 (INSN, Q, T1, T2, W, N, V)
+
+ DECL_VARIABLE (vsXi_vector, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector2, poly, 64, 2);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 1);
+ DECL_VARIABLE (vsXi_vector_res, poly, 64, 2);
+
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ VLOAD (vsXi_vector, buffer, , poly, p, 64, 1);
+ VLOAD (vsXi_vector, buffer, q, poly, p, 64, 2);
+
+ VDUP (vsXi_vector2, , poly, p, 64, 1, 2);
+ VDUP (vsXi_vector2, q, poly, p, 64, 2, 3);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 3);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 53);
+
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
+
+ /* Test cases with maximum shift amount. */
+ CLEAN (result, poly, 64, 1);
+ CLEAN (result, poly, 64, 2);
+
+ TEST_VSXI (vsli, , poly, p, 64, 1, 63);
+ TEST_VSXI (vsli, q, poly, p, 64, 2, 63);
+
+#define COMMENT "(max shift amount)"
+ CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
+ CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
index 5100375e5fe0c1f1f6b1e0cbff549990d73948e3..0c6b25d578102f042c669d9bdeaa15e5a1292267 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -25,6 +26,9 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x55, 0x55, 0x55, 0x55 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0x66, 0x66, 0x66, 0x66 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x77 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0x40533333, 0x40533333 };
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
@@ -62,6 +66,9 @@ void exec_vcombine (void)
VDUP(vector64_b, , uint, u, 64, 1, 0x88);
VDUP(vector64_b, , poly, p, 8, 8, 0x55);
VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VDUP(vector64_b, , poly, p, 64, 1, 0x77);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VDUP(vector64_b, , float, f, 16, 4, 2.25);
#endif
@@ -80,6 +87,9 @@ void exec_vcombine (void)
TEST_VCOMBINE(uint, u, 64, 1, 2);
TEST_VCOMBINE(poly, p, 8, 8, 16);
TEST_VCOMBINE(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCOMBINE(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VCOMBINE(float, f, 16, 4, 8);
#endif
@@ -95,6 +105,9 @@ void exec_vcombine (void)
CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
index b8b338ef3c06ff2489b525e22760cbaed1fda335..d6d3bba39523e9e9f91b4fe80065682ea01bd6b8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
0x78, 0x56, 0x34, 0x12 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0x123456789abcdef0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
@@ -49,6 +53,9 @@ FNNAME (INSN_NAME)
DECL_VAL(val, uint, 64, 1);
DECL_VAL(val, poly, 8, 8);
DECL_VAL(val, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VAL(val, poly, 64, 1);
+#endif
DECL_VARIABLE(vector_res, int, 8, 8);
DECL_VARIABLE(vector_res, int, 16, 4);
@@ -64,6 +71,9 @@ FNNAME (INSN_NAME)
DECL_VARIABLE(vector_res, uint, 64, 1);
DECL_VARIABLE(vector_res, poly, 8, 8);
DECL_VARIABLE(vector_res, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DECL_VARIABLE(vector_res, poly, 64, 1);
+#endif
clean_results ();
@@ -82,6 +92,9 @@ FNNAME (INSN_NAME)
VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL;
VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL;
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_VAR(val, poly, 64, 1) = 0x123456789abcdef0ULL;
+#endif
TEST_VCREATE(int, s, 8, 8);
TEST_VCREATE(int, s, 16, 4);
@@ -97,6 +110,9 @@ FNNAME (INSN_NAME)
TEST_VCREATE(uint, u, 64, 1);
TEST_VCREATE(poly, p, 8, 8);
TEST_VCREATE(poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VCREATE(poly, p, 64, 1);
+#endif
CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
@@ -108,6 +124,9 @@ FNNAME (INSN_NAME)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
index aef4173326c907a5f487f2520d0e354afbc16fd4..0a9db6367e9eba57d69487ff3011be9e2555ea35 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* We test vdup and vmov in the same place since they are aliases. */
@@ -23,6 +24,11 @@ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
0xcc00, 0xcc00 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0,
@@ -77,6 +83,11 @@ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
0xcb80, 0xcb80 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1,
@@ -131,6 +142,11 @@ VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
0xcb00, 0xcb00 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2,
@@ -167,6 +183,12 @@ VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
0xc1600000, 0xc1600000 };
+#if defined(__aarch64__)
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16(M, E, C)
+#else
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(M, E , C)
+#endif
+
#define TEST_MSG "VDUP/VDUPQ"
void exec_vdup_vmov (void)
{
@@ -204,6 +226,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP(, float, f, 16, 4);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP(, poly, p, 64, 1));
+#endif
TEST_VDUP(, float, f, 32, 2);
TEST_VDUP(q, int, s, 8, 16);
@@ -219,18 +244,21 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP(q, float, f, 16, 8);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP(q, poly, p, 64, 2));
+#endif
TEST_VDUP(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
switch (i) {
case 0:
- CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
break;
case 1:
- CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
break;
case 2:
- CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+ CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
break;
default:
abort();
@@ -271,6 +299,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VMOV(, float, f, 16, 4);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VMOV(, poly, p, 64, 1));
+#endif
TEST_VMOV(, float, f, 32, 2);
TEST_VMOV(q, int, s, 8, 16);
@@ -286,6 +317,9 @@ void exec_vdup_vmov (void)
#if defined (FP16_SUPPORTED)
TEST_VMOV(q, float, f, 16, 8);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VMOV(q, poly, p, 64, 2));
+#endif
TEST_VMOV(q, float, f, 32, 4);
#if defined (FP16_SUPPORTED)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
index 5d0dba36e297ffa6bccc956d0bc9e0c8ca793626..92899686c8213658a3aadfcdcb895ce11a2d7696 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -31,6 +32,10 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
0xfffffff1, 0xfffffff1 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
@@ -53,6 +58,9 @@ VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
0xca80, 0xca80,
0xca80, 0xca80 };
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
0xc1700000, 0xc1700000 };
@@ -92,6 +100,9 @@ void exec_vdup_lane (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
+#endif
TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -107,6 +118,9 @@ void exec_vdup_lane (void)
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
#endif
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
+#endif
TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
#if defined (FP16_SUPPORTED)
@@ -131,6 +145,9 @@ VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
0xf7, 0xf7, 0xf7, 0xf7 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
@@ -159,6 +176,10 @@ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
0xfffffff0, 0xfffffff0 };
VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
0xf5, 0xf5, 0xf5, 0xf5,
@@ -204,6 +225,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0);
TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7);
TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP_LANEQ(, poly, p, 64, 1, 2, 0));
+#endif
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3);
#endif
@@ -219,6 +243,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0);
TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5);
TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VDUP_LANEQ(q, poly, p, 64, 2, 2, 0));
+#endif
#if defined (FP16_SUPPORTED)
TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7);
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
index 9f0a1687f189bc2dfbe111c7f5c3b96c9acecd52..8c9f52e39aadcfb37fed3c3cefc5fef941ca5314 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -50,6 +54,9 @@ void exec_vget_high (void)
TEST_VGET_HIGH(uint, u, 64, 1, 2);
TEST_VGET_HIGH(poly, p, 8, 8, 16);
TEST_VGET_HIGH(poly, p, 16, 4, 8);
+ #if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+ #endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_HIGH(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,7 @@ void exec_vget_high (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+ CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
index ee6d6503ad622c936be2f6c7468db845398a6c2e..ab010e309bedf1dcb97749cd8b69afc829805437 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
int8_t expected_s8 = 0xf7;
@@ -13,6 +14,9 @@ uint32_t expected_u32 = 0xfffffff1;
uint64_t expected_u64 = 0xfffffffffffffff0;
poly8_t expected_p8 = 0xf6;
poly16_t expected_p16 = 0xfff2;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expected_p64 = 0xfffffffffffffff0;
+#endif
hfloat16_t expected_f16 = 0xcb80;
hfloat32_t expected_f32 = 0xc1700000;
@@ -26,6 +30,9 @@ uint32_t expectedq_u32 = 0xfffffff2;
uint64_t expectedq_u64 = 0xfffffffffffffff1;
poly8_t expectedq_p8 = 0xfe;
poly16_t expectedq_p16 = 0xfff6;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t expectedq_p64 = 0xfffffffffffffff1;
+#endif
hfloat16_t expectedq_f16 = 0xca80;
hfloat32_t expectedq_f32 = 0xc1500000;
@@ -89,6 +96,9 @@ void exec_vget_lane (void)
VAR_DECL(var, uint, 64);
VAR_DECL(var, poly, 8);
VAR_DECL(var, poly, 16);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VAR_DECL(var, poly, 64);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VAR_DECL(var, float, 16);
#endif
@@ -114,6 +124,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(, uint, u, 64, 1, 0);
TEST_VGET_LANE(, poly, p, 8, 8, 6);
TEST_VGET_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(, float, f, 16, 4, 1);
#endif
@@ -129,6 +142,9 @@ void exec_vget_lane (void)
TEST_VGET_LANE(q, uint, u, 64, 2, 1);
TEST_VGET_LANE(q, poly, p, 8, 16, 14);
TEST_VGET_LANE(q, poly, p, 16, 8, 6);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(TEST_VGET_LANE(q, poly, p, 64, 2, 1));
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LANE_FP(q, float, f, 16, 8, 3);
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
index 2b875b9b7b86d850647cf977086f336b932cfb0b..6a67baa6c64ae59b6d454d6c97b9a219e2610490 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -50,6 +54,9 @@ void exec_vget_low (void)
TEST_VGET_LOW(uint, u, 64, 1, 2);
TEST_VGET_LOW(poly, p, 8, 8, 16);
TEST_VGET_LOW(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VGET_LOW(poly, p, 64, 1, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VGET_LOW(float, f, 16, 4, 8);
#endif
@@ -65,6 +72,9 @@ void exec_vget_low (void)
CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
index 4ed0e464f9ce6b0f599a6a72d3f49db5ac9a0374..96cf06be923efa47e7977d02a8ad63ce2e6cba1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -33,7 +37,7 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
- 0xfff3, 0xfff4, 0xfff5,
+ 0xfff3, 0xfff4, 0xfff5,
0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
index 34be214e9122c5701a341b09479443fdb5f2716b..62585e8371fd1c738d0285a7997cc3bb1ab30948 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
/* Chunk 0. */
@@ -17,6 +18,9 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0 };
VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
0xcc00, 0xcc00, 0xcc00, 0xcc00 };
VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
@@ -64,6 +72,9 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -92,6 +103,10 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
0xf1, 0xf1, 0xf1, 0xf1 };
VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
0xcb80, 0xcb80, 0xcb80, 0xcb80 };
VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
@@ -111,6 +126,9 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
@@ -139,6 +157,10 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
0xf2, 0xf2, 0xf2, 0xf2 };
VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
0xcb00, 0xcb00, 0xcb00, 0xcb00 };
VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
index e1e8562ac6be424e638d11a90aeb406116abca24..94d349fac42992eee239dfca2cecab6bbbf4afe8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -61,6 +67,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -104,6 +115,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -147,6 +163,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -193,6 +214,11 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -238,6 +264,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -281,6 +312,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -324,6 +360,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xfffffffffffffff4,
+ 0xfffffffffffffff5 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -367,6 +408,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xfffffffffffffff6,
+ 0xfffffffffffffff7 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
@@ -424,27 +470,42 @@ void exec_vldX (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_CRYPTO(T1, W, N, X) \
+ DECL_VLDX(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X) \
+ TEST_VLDX(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#else
+#define DECL_VLDX_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
-#define DECL_ALL_VLDX_NO_FP16(X) \
- DECL_VLDX(int, 8, 8, X); \
- DECL_VLDX(int, 16, 4, X); \
- DECL_VLDX(int, 32, 2, X); \
- DECL_VLDX(int, 64, 1, X); \
- DECL_VLDX(uint, 8, 8, X); \
- DECL_VLDX(uint, 16, 4, X); \
- DECL_VLDX(uint, 32, 2, X); \
- DECL_VLDX(uint, 64, 1, X); \
- DECL_VLDX(poly, 8, 8, X); \
- DECL_VLDX(poly, 16, 4, X); \
- DECL_VLDX(float, 32, 2, X); \
- DECL_VLDX(int, 8, 16, X); \
- DECL_VLDX(int, 16, 8, X); \
- DECL_VLDX(int, 32, 4, X); \
- DECL_VLDX(uint, 8, 16, X); \
- DECL_VLDX(uint, 16, 8, X); \
- DECL_VLDX(uint, 32, 4, X); \
- DECL_VLDX(poly, 8, 16, X); \
- DECL_VLDX(poly, 16, 8, X); \
+#define DECL_ALL_VLDX_NO_FP16(X) \
+ DECL_VLDX(int, 8, 8, X); \
+ DECL_VLDX(int, 16, 4, X); \
+ DECL_VLDX(int, 32, 2, X); \
+ DECL_VLDX(int, 64, 1, X); \
+ DECL_VLDX(uint, 8, 8, X); \
+ DECL_VLDX(uint, 16, 4, X); \
+ DECL_VLDX(uint, 32, 2, X); \
+ DECL_VLDX(uint, 64, 1, X); \
+ DECL_VLDX(poly, 8, 8, X); \
+ DECL_VLDX(poly, 16, 4, X); \
+ DECL_VLDX_CRYPTO(poly, 64, 1, X); \
+ DECL_VLDX(float, 32, 2, X); \
+ DECL_VLDX(int, 8, 16, X); \
+ DECL_VLDX(int, 16, 8, X); \
+ DECL_VLDX(int, 32, 4, X); \
+ DECL_VLDX(uint, 8, 16, X); \
+ DECL_VLDX(uint, 16, 8, X); \
+ DECL_VLDX(uint, 32, 4, X); \
+ DECL_VLDX(poly, 8, 16, X); \
+ DECL_VLDX(poly, 16, 8, X); \
+ AARCH64_ONLY(DECL_VLDX_CRYPTO(poly, 64, 2, X)); \
DECL_VLDX(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -456,26 +517,28 @@ void exec_vldX (void)
#define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
#endif
-#define TEST_ALL_VLDX_NO_FP16(X) \
- TEST_VLDX(, int, s, 8, 8, X); \
- TEST_VLDX(, int, s, 16, 4, X); \
- TEST_VLDX(, int, s, 32, 2, X); \
- TEST_VLDX(, int, s, 64, 1, X); \
- TEST_VLDX(, uint, u, 8, 8, X); \
- TEST_VLDX(, uint, u, 16, 4, X); \
- TEST_VLDX(, uint, u, 32, 2, X); \
- TEST_VLDX(, uint, u, 64, 1, X); \
- TEST_VLDX(, poly, p, 8, 8, X); \
- TEST_VLDX(, poly, p, 16, 4, X); \
- TEST_VLDX(, float, f, 32, 2, X); \
- TEST_VLDX(q, int, s, 8, 16, X); \
- TEST_VLDX(q, int, s, 16, 8, X); \
- TEST_VLDX(q, int, s, 32, 4, X); \
- TEST_VLDX(q, uint, u, 8, 16, X); \
- TEST_VLDX(q, uint, u, 16, 8, X); \
- TEST_VLDX(q, uint, u, 32, 4, X); \
- TEST_VLDX(q, poly, p, 8, 16, X); \
- TEST_VLDX(q, poly, p, 16, 8, X); \
+#define TEST_ALL_VLDX_NO_FP16(X) \
+ TEST_VLDX(, int, s, 8, 8, X); \
+ TEST_VLDX(, int, s, 16, 4, X); \
+ TEST_VLDX(, int, s, 32, 2, X); \
+ TEST_VLDX(, int, s, 64, 1, X); \
+ TEST_VLDX(, uint, u, 8, 8, X); \
+ TEST_VLDX(, uint, u, 16, 4, X); \
+ TEST_VLDX(, uint, u, 32, 2, X); \
+ TEST_VLDX(, uint, u, 64, 1, X); \
+ TEST_VLDX(, poly, p, 8, 8, X); \
+ TEST_VLDX(, poly, p, 16, 4, X); \
+ TEST_VLDX_CRYPTO(, poly, p, 64, 1, X); \
+ TEST_VLDX(, float, f, 32, 2, X); \
+ TEST_VLDX(q, int, s, 8, 16, X); \
+ TEST_VLDX(q, int, s, 16, 8, X); \
+ TEST_VLDX(q, int, s, 32, 4, X); \
+ TEST_VLDX(q, uint, u, 8, 16, X); \
+ TEST_VLDX(q, uint, u, 16, 8, X); \
+ TEST_VLDX(q, uint, u, 32, 4, X); \
+ TEST_VLDX(q, poly, p, 8, 16, X); \
+ TEST_VLDX(q, poly, p, 16, 8, X); \
+ AARCH64_ONLY(TEST_VLDX_CRYPTO(q, poly, p, 64, 2, X)); \
TEST_VLDX(q, float, f, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -487,26 +550,28 @@ void exec_vldX (void)
#define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
#endif
-#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
- TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \
- TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
- TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
- TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
- TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
- TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
- TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
- TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
- TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
- TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
- TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \
+ TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(int, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
+ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
+ TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
+ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \
+ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -519,27 +584,29 @@ void exec_vldX (void)
#endif
/* vldX supports all vector types except [u]int64x2. */
-#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment) \
- CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
- CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
- CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
- CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
- CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
- \
- CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
- CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
- CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
- CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment) \
+ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+ \
+ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -580,6 +647,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -607,6 +678,10 @@ void exec_vldX (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -635,6 +710,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -662,6 +741,10 @@ void exec_vldX (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -690,6 +773,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -717,6 +804,10 @@ void exec_vldX (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
index b44a987cb5d2f169b633d9c1e862fb782bd65d39..60fdd20f42a19862684c28c6f44db3f6f5642c98 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,9 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -36,6 +40,9 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
0xf0, 0xf1, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -56,6 +63,9 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
0xf1, 0xf2, 0xf0, 0xf1 };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
0xfff2, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -76,6 +86,9 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
0xf0, 0xf1, 0xf2, 0xf0 };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
@@ -96,6 +109,9 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
0xf2, 0xf0, 0xf1, 0xf2 };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
@@ -114,6 +130,9 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -131,6 +150,9 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -148,6 +170,9 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
@@ -165,6 +190,9 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf0, 0xf1, 0xf2, 0xf3 };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
@@ -197,6 +225,16 @@ void exec_vldX_dup (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X) TEST_VLDX_DUP(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X) DECL_VLDX_DUP(T1, W, N, X)
+#else
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X)
+#endif
+
#define DECL_ALL_VLDX_DUP_NO_FP16(X) \
DECL_VLDX_DUP(int, 8, 8, X); \
DECL_VLDX_DUP(int, 16, 4, X); \
@@ -208,6 +246,7 @@ void exec_vldX_dup (void)
DECL_VLDX_DUP(uint, 64, 1, X); \
DECL_VLDX_DUP(poly, 8, 8, X); \
DECL_VLDX_DUP(poly, 16, 4, X); \
+ DECL_VLDX_DUP_CRYPTO(poly, 64, 1, X); \
DECL_VLDX_DUP(float, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -229,6 +268,7 @@ void exec_vldX_dup (void)
TEST_VLDX_DUP(, uint, u, 64, 1, X); \
TEST_VLDX_DUP(, poly, p, 8, 8, X); \
TEST_VLDX_DUP(, poly, p, 16, 4, X); \
+ TEST_VLDX_DUP_CRYPTO(, poly, p, 64, 1, X); \
TEST_VLDX_DUP(, float, f, 32, 2, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -250,6 +290,7 @@ void exec_vldX_dup (void)
TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -272,6 +313,7 @@ void exec_vldX_dup (void)
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -313,6 +355,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 8);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+ PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
PAD(buffer_vld2_pad, float, 16, 4);
@@ -340,6 +386,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld2_pad, poly, 8, 16);
VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+ PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
PAD(buffer_vld2_pad, float, 16, 8);
@@ -368,6 +418,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 8);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+ PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
PAD(buffer_vld3_pad, float, 16, 4);
@@ -395,6 +449,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld3_pad, poly, 8, 16);
VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+ PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
PAD(buffer_vld3_pad, float, 16, 8);
@@ -423,6 +481,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 8);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+ PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
PAD(buffer_vld4_pad, float, 16, 4);
@@ -450,6 +512,10 @@ void exec_vldX_dup (void)
PAD(buffer_vld4_pad, poly, 8, 16);
VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+ PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
PAD(buffer_vld4_pad, float, 16, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
index cda76abfe0a18f648331ec9cffc030368b2a7c70..c57be3023a0625753599d2b2702ef84937cc4255 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -47,6 +53,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -76,6 +87,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -105,6 +121,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xf0, 0xf1, 0xf2, 0xaa };
VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -134,6 +155,11 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
@@ -163,6 +189,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -192,6 +223,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -221,6 +257,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -250,6 +291,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
0xaa, 0xaa, 0xaa, 0xaa };
VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+#endif
VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -279,6 +325,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -295,6 +344,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -311,6 +363,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -356,6 +411,16 @@ void exec_vldX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X) DECL_VLDX_LANE(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#else
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant. */
#define DECL_ALL_VLDX_LANE_NO_FP16(X) \
DECL_VLDX_LANE(int, 8, 8, X); \
@@ -366,11 +431,13 @@ void exec_vldX_lane (void)
DECL_VLDX_LANE(uint, 32, 2, X); \
DECL_VLDX_LANE(poly, 8, 8, X); \
DECL_VLDX_LANE(poly, 16, 4, X); \
+ AARCH64_ONLY(DECL_VLDX_LANE(poly, 64, 1, X)); \
DECL_VLDX_LANE(int, 16, 8, X); \
DECL_VLDX_LANE(int, 32, 4, X); \
DECL_VLDX_LANE(uint, 16, 8, X); \
DECL_VLDX_LANE(uint, 32, 4, X); \
DECL_VLDX_LANE(poly, 16, 8, X); \
+ AARCH64_ONLY(DECL_VLDX_LANE_CRYPTO(poly, 64, 2, X)); \
DECL_VLDX_LANE(float, 32, 2, X); \
DECL_VLDX_LANE(float, 32, 4, X)
@@ -400,11 +467,13 @@ void exec_vldX_lane (void)
TEST_VLDX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VLDX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VLDX_LANE(, poly, p, 16, 4, X, 3); \
+ AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
TEST_VLDX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VLDX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \
TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \
TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \
+ AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(q, poly, p, 64, 2, X, 0));\
TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \
TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
@@ -426,11 +495,13 @@ void exec_vldX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y)); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
@@ -453,12 +524,14 @@ void exec_vldX_lane (void)
CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment)); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -489,11 +562,17 @@ void exec_vldX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 1, 4));
+#endif
DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 2, 4));
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..c53f4240a20550c1e1b17072fa0d73566fa0c2a8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,6 +1,7 @@
/* This file contains tests for the vreinterpret *p128 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* { dg-add-options arm_crypto } */
#include <arm_neon.h>
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
int main (void)
{
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..976b605e265e15c6e17c11ce3d92588fde874d16 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,6 +1,7 @@
/* This file contains tests for the vreinterpret *p64 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* { dg-add-options arm_crypto } */
#include <arm_neon.h>
@@ -121,11 +122,7 @@ int main (void)
CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 1);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
index 825d07dbf77fc54b5ef796b57a42c81d6dd6d611..047ee1fa80be89083315505c6c228a03df290047 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
@@ -25,7 +29,7 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333,
- 0x33333333, 0x33333333 };
+ 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
@@ -43,6 +47,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0x3333333333333333 };
+#endif
VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
@@ -72,6 +80,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(, uint, u, 64, 1, 0);
TEST_VST1_LANE(, poly, p, 8, 8, 6);
TEST_VST1_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(, poly, p, 64, 1, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(, float, f, 16, 4, 1);
#endif
@@ -87,6 +98,9 @@ void exec_vst1_lane (void)
TEST_VST1_LANE(q, uint, u, 64, 2, 0);
TEST_VST1_LANE(q, poly, p, 8, 16, 10);
TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ TEST_VST1_LANE(q, poly, p, 64, 2, 0);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
TEST_VST1_LANE(q, float, f, 16, 8, 6);
#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
index f5bf3bd325fa05e330d766dc0a93582d6c12e8c8..e00277d25c306559ca4149a5afcc857f7da63b52 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
@@ -1,6 +1,7 @@
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
/* Expected results for vst2, chunk 0. */
VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
@@ -14,6 +15,9 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
@@ -42,6 +46,9 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -68,6 +75,9 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 };
VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
@@ -97,6 +107,9 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -123,6 +136,9 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -149,6 +165,9 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -178,6 +197,9 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -204,6 +226,9 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -230,6 +255,9 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -256,6 +284,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
#endif
@@ -272,6 +303,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
#endif
@@ -288,6 +322,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
#endif
@@ -336,6 +373,19 @@ void exec_vstX_lane (void)
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) \
+ TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) \
+ TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X) \
+ DECL_VSTX_LANE(T1, W, N, X)
+#else
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X)
+#endif
+
/* We need all variants in 64 bits, but there is no 64x2 variant,
nor 128 bits vectors of int8/uint8/poly8. */
#define DECL_ALL_VSTX_LANE_NO_FP16(X) \
@@ -347,12 +397,14 @@ void exec_vstX_lane (void)
DECL_VSTX_LANE(uint, 32, 2, X); \
DECL_VSTX_LANE(poly, 8, 8, X); \
DECL_VSTX_LANE(poly, 16, 4, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 1, X); \
DECL_VSTX_LANE(float, 32, 2, X); \
DECL_VSTX_LANE(int, 16, 8, X); \
DECL_VSTX_LANE(int, 32, 4, X); \
DECL_VSTX_LANE(uint, 16, 8, X); \
DECL_VSTX_LANE(uint, 32, 4, X); \
DECL_VSTX_LANE(poly, 16, 8, X); \
+ DECL_VSTX_LANE_CRYPTO(poly, 64, 2, X); \
DECL_VSTX_LANE(float, 32, 4, X)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -378,6 +430,7 @@ void exec_vstX_lane (void)
TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \
TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \
TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \
+ AARCH64_ONLY(TEST_VSTX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \
TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \
TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \
@@ -403,6 +456,7 @@ void exec_vstX_lane (void)
TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \
TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \
+ AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y)); \
TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \
TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \
TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \
@@ -434,6 +488,9 @@ void exec_vstX_lane (void)
DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
#endif
@@ -462,6 +519,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
@@ -485,6 +543,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
@@ -514,6 +573,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
@@ -538,6 +598,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
@@ -562,6 +623,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_2, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
@@ -591,6 +653,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_0, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
@@ -615,6 +678,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_1, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
@@ -639,6 +703,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_2, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
@@ -663,6 +728,7 @@ void exec_vstX_lane (void)
CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
+ AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_3, CMT));
CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-07 13:55 [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC Tamar Christina
@ 2016-11-08 11:21 ` Christophe Lyon
2016-11-08 11:59 ` Tamar Christina
0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-08 11:21 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
On 7 November 2016 at 14:55, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi all,
>
> This patch (3 of 3) adds updates tests for the NEON intrinsics
> added by the previous patches:
>
> Ran regression tests on aarch64-none-linux-gnu
> and on arm-none-linux-gnueabihf.
>
> Ok for trunk?
>
> Thanks,
> Tamar
>
>
> gcc/testsuite/
> 2016-11-04 Tamar Christina <tamar.christina@arm.com>
>
> * gcc.target/aarch64/advsimd-intrinsics/p64.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
> (Poly64x1_t, Poly64x2_t): Added type.
> (AARCH64_ONLY): Added macro.
> * gcc.target/aarch64/advsimd-intrinsics/vcombine.c:
> Added test for Poly64.
> * gcc.target/aarch64/advsimd-intrinsics/vcreate.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vget_high.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vget_lane.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vget_low.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
> Added AArch64 flags.
> * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
> Added Aarch64 flags.
Hi Tamar,
A while ago I added p64_p128.c, to contain all the poly64/128
tests except for vreinterpret.
Why do you need to create p64.c ?
Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c
might be easier to maintain than adding them to vcreate.c etc
with several #ifdef conditions.
For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
condition? These intrinsics are defined in arm/arm_neon.h, right?
They are tested in p64_p128.c
Looking at your patch, it seems some tests are currently missing
for arm: vget_high_p64. I'm not sure why I missed it when I removed
neont-testgen...
Regarding vreinterpret_p128.c, doesn't the existing
effective-target arm_crypto_ok prevent the tests from
running on aarch64?
Thanks,
Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-08 11:21 ` Christophe Lyon
@ 2016-11-08 11:59 ` Tamar Christina
2016-11-24 11:45 ` Tamar Christina
0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-08 11:59 UTC (permalink / raw)
To: Christophe Lyon
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
Hi Christophe,
Thanks for the review!
>
> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
> vreinterpret.
> Why do you need to create p64.c ?
I originally created it because I had a much smaller set of intrinsics that I wanted to
add initially, this grew and It hadn't occurred to me that I can use the existing file now.
Another reason was the effective-target arm_crypto_ok as you mentioned below.
>
> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
> easier to maintain than adding them to vcreate.c etc with several #ifdef
> conditions.
Fair enough, I'll move them to p64_p128.c.
> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> condition? These intrinsics are defined in arm/arm_neon.h, right?
> They are tested in p64_p128.c
I should have looked for them, they weren't being tested before so I had
Mistakenly assumed that they weren't available. Now I realize I just need
To add the proper test option to the file to enable crypto. I'll update this as well.
> Looking at your patch, it seems some tests are currently missing for arm:
> vget_high_p64. I'm not sure why I missed it when I removed neont-
> testgen...
I'll adjust the test conditions so they run for ARM as well.
>
> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> arm_crypto_ok prevent the tests from running on aarch64?
Yes they do, I was comparing the output against a clean version and hasn't noticed
That they weren't running. Thanks!
>
> Thanks,
>
> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-08 11:59 ` Tamar Christina
@ 2016-11-24 11:45 ` Tamar Christina
2016-11-25 14:54 ` Christophe Lyon
0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-24 11:45 UTC (permalink / raw)
To: Christophe Lyon
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
[-- Attachment #1: Type: text/plain, Size: 2269 bytes --]
Hi Christoph,
I have combined most of the tests in p64_p128 except for the
vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
of code that would be have to be added to p64_p128 vs having them in those
files isn't worth it. Since a lot of the test setup would have to be copied.
Kind regards,
Tamar
________________________________________
From: Tamar Christina
Sent: Tuesday, November 8, 2016 11:58:46 AM
To: Christophe Lyon
Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
Hi Christophe,
Thanks for the review!
>
> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
> vreinterpret.
> Why do you need to create p64.c ?
I originally created it because I had a much smaller set of intrinsics that I wanted to
add initially, this grew and It hadn't occurred to me that I can use the existing file now.
Another reason was the effective-target arm_crypto_ok as you mentioned below.
>
> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
> easier to maintain than adding them to vcreate.c etc with several #ifdef
> conditions.
Fair enough, I'll move them to p64_p128.c.
> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> condition? These intrinsics are defined in arm/arm_neon.h, right?
> They are tested in p64_p128.c
I should have looked for them, they weren't being tested before so I had
Mistakenly assumed that they weren't available. Now I realize I just need
To add the proper test option to the file to enable crypto. I'll update this as well.
> Looking at your patch, it seems some tests are currently missing for arm:
> vget_high_p64. I'm not sure why I missed it when I removed neont-
> testgen...
I'll adjust the test conditions so they run for ARM as well.
>
> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> arm_crypto_ok prevent the tests from running on aarch64?
Yes they do, I was comparing the output against a clean version and hasn't noticed
That they weren't running. Thanks!
>
> Thanks,
>
> Christophe
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: poly_test.patch --]
[-- Type: text/x-patch; name="poly_test.patch", Size: 27130 bytes --]
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..174c1948221025b860aaac503354b406fa804007 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
VECT_VAR(expected, int, 16, 4) -> expected_int16x4
VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
*/
+/* Some instructions don't exist on ARM.
+ Use this macro to guard against them. */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
#define xSTR(X) #X
#define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@@ -197,11 +214,14 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
-#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+#define CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name,EXPECTED,comment) \
{ \
CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
@@ -228,6 +248,13 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment) \
+ { \
+ CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
+ } \
+
/* Check results against EXPECTED. Operates on all possible vector types. */
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
@@ -398,6 +425,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@@ -413,6 +443,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@@ -438,6 +471,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+ DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@@ -473,6 +513,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@@ -481,6 +522,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@@ -491,6 +533,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@@ -499,6 +542,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@@ -531,6 +575,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+ MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@@ -601,13 +652,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
- MACRO(VAR1, VAR2, , poly, p, 16, 4)
+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
- MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 519cffb0125079022e7ba876c1ca657d9e37cac2..f92c820f4c7de0dff2e593559412cf1702e860ff 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -1,8 +1,9 @@
/* This file contains tests for all the *p64 intrinsics, except for
vreinterpret which have their own testcase. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -14,7 +15,7 @@ VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
0xfffffff1 };
/* Expected results: vceq. */
-VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(vceq_expected,poly,64,1) [] = { 0x0 };
/* Expected results: vcombine. */
VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
@@ -38,6 +39,17 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
+/* Expected results: vmov_n. */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
/* Expected results: vext. */
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -45,6 +57,9 @@ VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
/* Expected results: vget_low. */
VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+/* Expected results: vget_high. */
+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+
/* Expected results: vld1. */
VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -109,6 +124,39 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
0x3333333333333333 };
+/* Expected results: vldX_lane. */
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vget_lane. */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
int main (void)
{
int i;
@@ -159,24 +207,24 @@ int main (void)
VECT_VAR(vceq_vector_res, T3, W, N) = \
INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N), \
VECT_VAR(vceq_vector2, T1, W, N)); \
- vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
+ vst1##Q##_p##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \
- TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+ TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N);
DECL_VARIABLE(vceq_vector, poly, 64, 1);
DECL_VARIABLE(vceq_vector2, poly, 64, 1);
- DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
+ DECL_VARIABLE(vceq_vector_res, poly, 64, 1);
- CLEAN(result, uint, 64, 1);
+ CLEAN(result, poly, 64, 1);
VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
- TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
+ TEST_VCOMP(vceq, , poly, p, poly, 64, 1);
- CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vceq_expected, "");
/* vcombine_p64 tests. */
#undef TEST_MSG
@@ -288,6 +336,44 @@ int main (void)
}
}
+ /* vmov_n_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+
+#define TEST_VMOV(Q, T1, T2, W, N) \
+ VECT_VAR(vmov_n_vector, T1, W, N) = \
+ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
+
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i< 3; i++) {
+ CLEAN(result, poly, 64, 1);
+ CLEAN(result, poly, 64, 2);
+
+ TEST_VDUP(, poly, p, 64, 1);
+ TEST_VDUP(q, poly, p, 64, 2);
+
+ switch (i) {
+ case 0:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
+ break;
+ case 1:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
+ break;
+ case 2:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
+ break;
+ default:
+ abort();
+ }
+ }
+
/* vexit_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VEXT/VEXTQ"
@@ -341,6 +427,26 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
+ /* vget_high_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_HIGH"
+
+#define TEST_VGET_HIGH(T1, T2, W, N, N2) \
+ VECT_VAR(vget_high_vector64, T1, W, N) = \
+ vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
+
+ DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
+ DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
+
+ CLEAN(result, poly, 64, 1);
+
+ VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
+
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
+
/* vld1_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLD1/VLD1Q"
@@ -432,6 +538,8 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, "");
+#ifdef __aarch64__
+
/* vldX_p64 tests. */
#define DECL_VLDX(T1, W, N, X) \
VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \
@@ -560,6 +668,8 @@ int main (void)
TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3);
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+#endif /* __aarch64__. */
+
/* vsli_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VSLI"
@@ -645,7 +755,7 @@ int main (void)
VECT_VAR(vst1_lane_vector, T1, W, N) = \
vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
- VECT_VAR(vst1_lane_vector, T1, W, N), L)
+ VECT_VAR(vst1_lane_vector, T1, W, N), L);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
@@ -659,5 +769,260 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
+#ifdef __aarch64__
+
+ /* vget_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
+ VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+ if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in result '%s') at type %s " \
+ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
+ TEST_MSG, __FILE__, __LINE__, \
+ STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
+ STR(VECT_NAME(T1, W, N)), \
+ VECT_VAR(vget_lane_vector, T1, W, N), \
+ VECT_VAR(vget_lane_expected, T1, W, N)); \
+ abort (); \
+ }
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vector, poly, 64, 1);
+ DECL_VARIABLE(vector, poly, 64, 2);
+
+ VLOAD(vector, buffer, , poly, p, 64, 1);
+ VLOAD(vector, buffer, q, poly, p, 64, 2);
+
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+ TEST_VGET_LANE( , poly, p, 64, 1, 0);
+ TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+ /* vget_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
+
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VLD_STX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size. X */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X)); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)))
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#undef TEST_EXTRA_CHUNK
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
+ memcpy(VECT_VAR(result, T1, W, N), \
+ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+ /* Add some padding to try to catch out of bound accesses. */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+ ARRAY1(V##_pad,T,W,N)
+
+#define DECL_ALL_VLD_STX_LANE(X) \
+ DECL_VLD_STX_LANE(poly, 64, 1, X); \
+ DECL_VLD_STX_LANE(poly, 64, 2, X);
+
+#define TEST_ALL_VLDX_LANE(X) \
+ TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
+
+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
+
+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
+ CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
+
+ /* Declare the temporary buffers / variables. */
+ DECL_ALL_VLD_STX_LANE(2);
+ DECL_ALL_VLD_STX_LANE(3);
+ DECL_ALL_VLD_STX_LANE(4);
+
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+ DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+
+ /* Check vld2_lane/vld2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+ TEST_ALL_VLDX_LANE(2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
+
+ /* Check vld3_lane/vld3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+ TEST_ALL_VLDX_LANE(3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
+
+ /* Check vld4_lane/vld4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+ TEST_ALL_VLDX_LANE(4);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VSTX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
+ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X), \
+ L); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+#define TEST_ALL_VSTX_LANE(X) \
+ TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
+
+ /* Check vst2_lane/vst2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+ TEST_ALL_VSTX_LANE(2);
+
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+#undef CMT
+#define CMT " chunk 1"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
+
+ /* Check vst3_lane/vst3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+ TEST_ALL_VSTX_LANE(3);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
+
+ /* Check vst4_lane/vst4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+ TEST_ALL_VSTX_LANE(4);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+
+#undef CMT
+#define CMT " (chunk 3)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
+
+#endif /* __aarch64__. */
+
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..f192d4dda514287c8417e7fc922bc580b209b163 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p128 intrinsics. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
int main (void)
{
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..c915fd2fea6b4d8770c9a4aab88caad391105d89 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p64 intrinsics. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -121,11 +122,7 @@ int main (void)
CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 1);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-24 11:45 ` Tamar Christina
@ 2016-11-25 14:54 ` Christophe Lyon
2016-11-25 15:03 ` Christophe Lyon
0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-25 14:54 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
Hi Tamar,
On 24 November 2016 at 12:45, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi Christoph,
>
> I have combined most of the tests in p64_p128 except for the
> vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
> of code that would be have to be added to p64_p128 vs having them in those
> files isn't worth it. Since a lot of the test setup would have to be copied.
>
A few comments about this new version:
* arm-neon-ref.h: why do you create CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
Can't you just add calls to CHECK_CRYPTO in the existing
CHECK_RESULTS_NAMED_NO_FP16?
* p64_p128:
From what I can see ARM and AArch64 differ on the vceq variants
available with poly64.
For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
poly64x1_t __b)
For AArch64, I can't see vceq_p64 in arm_neon.h? ... Actually I've just noticed
the other you submitted while I was writing this, where you add vceq_p64 for
aarch64, but it still returns uint64_t.
Why do you change the vceq_64 test to return poly64_t instead of uint64_t?
Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE tests
You need to protect the new vmov, vget_high and vget_lane tests with
#ifdef __aarch64__.
Christophe
> Kind regards,
> Tamar
> ________________________________________
> From: Tamar Christina
> Sent: Tuesday, November 8, 2016 11:58:46 AM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> Hi Christophe,
>
> Thanks for the review!
>
>>
>> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
>> vreinterpret.
>> Why do you need to create p64.c ?
>
> I originally created it because I had a much smaller set of intrinsics that I wanted to
> add initially, this grew and It hadn't occurred to me that I can use the existing file now.
>
> Another reason was the effective-target arm_crypto_ok as you mentioned below.
>
>>
>> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
>> easier to maintain than adding them to vcreate.c etc with several #ifdef
>> conditions.
>
> Fair enough, I'll move them to p64_p128.c.
>
>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>> They are tested in p64_p128.c
>
> I should have looked for them, they weren't being tested before so I had
> Mistakenly assumed that they weren't available. Now I realize I just need
> To add the proper test option to the file to enable crypto. I'll update this as well.
>
>> Looking at your patch, it seems some tests are currently missing for arm:
>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>> testgen...
>
> I'll adjust the test conditions so they run for ARM as well.
>
>>
>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>> arm_crypto_ok prevent the tests from running on aarch64?
>
> Yes they do, I was comparing the output against a clean version and hasn't noticed
> That they weren't running. Thanks!
>
>>
>> Thanks,
>>
>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-25 14:54 ` Christophe Lyon
@ 2016-11-25 15:03 ` Christophe Lyon
2016-11-25 16:01 ` Tamar Christina
0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-25 15:03 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
On 25 November 2016 at 15:53, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
> On 24 November 2016 at 12:45, Tamar Christina <Tamar.Christina@arm.com> wrote:
>> Hi Christoph,
>>
>> I have combined most of the tests in p64_p128 except for the
>> vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
>> of code that would be have to be added to p64_p128 vs having them in those
>> files isn't worth it. Since a lot of the test setup would have to be copied.
>>
>
> A few comments about this new version:
> * arm-neon-ref.h: why do you create CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> Can't you just add calls to CHECK_CRYPTO in the existing
> CHECK_RESULTS_NAMED_NO_FP16?
>
> * p64_p128:
> From what I can see ARM and AArch64 differ on the vceq variants
> available with poly64.
> For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> poly64x1_t __b)
> For AArch64, I can't see vceq_p64 in arm_neon.h? ... Actually I've just noticed
> the other you submitted while I was writing this, where you add vceq_p64 for
> aarch64, but it still returns uint64_t.
> Why do you change the vceq_64 test to return poly64_t instead of uint64_t?
>
> Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>
> The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE tests
>
> You need to protect the new vmov, vget_high and vget_lane tests with
> #ifdef __aarch64__.
>
Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
> Christophe
>
>> Kind regards,
>> Tamar
>> ________________________________________
>> From: Tamar Christina
>> Sent: Tuesday, November 8, 2016 11:58:46 AM
>> To: Christophe Lyon
>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>>
>> Hi Christophe,
>>
>> Thanks for the review!
>>
>>>
>>> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
>>> vreinterpret.
>>> Why do you need to create p64.c ?
>>
>> I originally created it because I had a much smaller set of intrinsics that I wanted to
>> add initially, this grew and It hadn't occurred to me that I can use the existing file now.
>>
>> Another reason was the effective-target arm_crypto_ok as you mentioned below.
>>
>>>
>>> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
>>> easier to maintain than adding them to vcreate.c etc with several #ifdef
>>> conditions.
>>
>> Fair enough, I'll move them to p64_p128.c.
>>
>>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>> They are tested in p64_p128.c
>>
>> I should have looked for them, they weren't being tested before so I had
>> Mistakenly assumed that they weren't available. Now I realize I just need
>> To add the proper test option to the file to enable crypto. I'll update this as well.
>>
>>> Looking at your patch, it seems some tests are currently missing for arm:
>>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>> testgen...
>>
>> I'll adjust the test conditions so they run for ARM as well.
>>
>>>
>>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>> arm_crypto_ok prevent the tests from running on aarch64?
>>
>> Yes they do, I was comparing the output against a clean version and hasn't noticed
>> That they weren't running. Thanks!
>>
>>>
>>> Thanks,
>>>
>>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-25 15:03 ` Christophe Lyon
@ 2016-11-25 16:01 ` Tamar Christina
2016-11-29 9:50 ` Tamar Christina
0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-25 16:01 UTC (permalink / raw)
To: Christophe Lyon
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
>
> > A few comments about this new version:
> > * arm-neon-ref.h: why do you create
> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> > Can't you just add calls to CHECK_CRYPTO in the existing
> > CHECK_RESULTS_NAMED_NO_FP16?
Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
I didn't remove the split. I'll do it now.
> >
> > * p64_p128:
> > From what I can see ARM and AArch64 differ on the vceq variants
> > available with poly64.
> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
> > Actually I've just noticed the other you submitted while I was writing
> > this, where you add vceq_p64 for aarch64, but it still returns
> > uint64_t.
> > Why do you change the vceq_64 test to return poly64_t instead of
> uint64_t?
This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
This patch was already sent. New one coming soon.
> >
> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
> >
This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
> > The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE
> > tests
> >
> > You need to protect the new vmov, vget_high and vget_lane tests with
> > #ifdef __aarch64__.
> >
vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
Thanks for the feedback,
I'll get these updated.
>
> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>
>
> > Christophe
> >
> >> Kind regards,
> >> Tamar
> >> ________________________________________
> >> From: Tamar Christina
> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
> >> To: Christophe Lyon
> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
> >> Poly64_t intrinsics to GCC
> >>
> >> Hi Christophe,
> >>
> >> Thanks for the review!
> >>
> >>>
> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
> >>> except for vreinterpret.
> >>> Why do you need to create p64.c ?
> >>
> >> I originally created it because I had a much smaller set of
> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
> me that I can use the existing file now.
> >>
> >> Another reason was the effective-target arm_crypto_ok as you
> mentioned below.
> >>
> >>>
> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
> >>> etc with several #ifdef conditions.
> >>
> >> Fair enough, I'll move them to p64_p128.c.
> >>
> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
> >>> They are tested in p64_p128.c
> >>
> >> I should have looked for them, they weren't being tested before so I
> >> had Mistakenly assumed that they weren't available. Now I realize I
> >> just need To add the proper test option to the file to enable crypto. I'll
> update this as well.
> >>
> >>> Looking at your patch, it seems some tests are currently missing for arm:
> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
> >>> testgen...
> >>
> >> I'll adjust the test conditions so they run for ARM as well.
> >>
> >>>
> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> >>> arm_crypto_ok prevent the tests from running on aarch64?
> >>
> >> Yes they do, I was comparing the output against a clean version and
> >> hasn't noticed That they weren't running. Thanks!
> >>
> >>>
> >>> Thanks,
> >>>
> >>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-25 16:01 ` Tamar Christina
@ 2016-11-29 9:50 ` Tamar Christina
2016-11-29 10:12 ` Christophe Lyon
2016-11-29 13:48 ` Kyrill Tkachov
0 siblings, 2 replies; 17+ messages in thread
From: Tamar Christina @ 2016-11-29 9:50 UTC (permalink / raw)
To: Christophe Lyon
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
[-- Attachment #1: Type: text/plain, Size: 4464 bytes --]
Hi All,
The new patch contains the proper types for the intrinsics that should be returning uint64x1
and has the rest of the comments by Christophe in them.
Kind Regards,
Tamar
________________________________________
From: Tamar Christina
Sent: Friday, November 25, 2016 4:01:30 PM
To: Christophe Lyon
Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> > A few comments about this new version:
> > * arm-neon-ref.h: why do you create
> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> > Can't you just add calls to CHECK_CRYPTO in the existing
> > CHECK_RESULTS_NAMED_NO_FP16?
Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
I didn't remove the split. I'll do it now.
> >
> > * p64_p128:
> > From what I can see ARM and AArch64 differ on the vceq variants
> > available with poly64.
> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
> > Actually I've just noticed the other you submitted while I was writing
> > this, where you add vceq_p64 for aarch64, but it still returns
> > uint64_t.
> > Why do you change the vceq_64 test to return poly64_t instead of
> uint64_t?
This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
This patch was already sent. New one coming soon.
> >
> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
> >
This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
> > The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE
> > tests
> >
> > You need to protect the new vmov, vget_high and vget_lane tests with
> > #ifdef __aarch64__.
> >
vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
Thanks for the feedback,
I'll get these updated.
>
> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>
>
> > Christophe
> >
> >> Kind regards,
> >> Tamar
> >> ________________________________________
> >> From: Tamar Christina
> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
> >> To: Christophe Lyon
> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
> >> Poly64_t intrinsics to GCC
> >>
> >> Hi Christophe,
> >>
> >> Thanks for the review!
> >>
> >>>
> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
> >>> except for vreinterpret.
> >>> Why do you need to create p64.c ?
> >>
> >> I originally created it because I had a much smaller set of
> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
> me that I can use the existing file now.
> >>
> >> Another reason was the effective-target arm_crypto_ok as you
> mentioned below.
> >>
> >>>
> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
> >>> etc with several #ifdef conditions.
> >>
> >> Fair enough, I'll move them to p64_p128.c.
> >>
> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
> >>> They are tested in p64_p128.c
> >>
> >> I should have looked for them, they weren't being tested before so I
> >> had Mistakenly assumed that they weren't available. Now I realize I
> >> just need To add the proper test option to the file to enable crypto. I'll
> update this as well.
> >>
> >>> Looking at your patch, it seems some tests are currently missing for arm:
> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
> >>> testgen...
> >>
> >> I'll adjust the test conditions so they run for ARM as well.
> >>
> >>>
> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> >>> arm_crypto_ok prevent the tests from running on aarch64?
> >>
> >> Yes they do, I was comparing the output against a clean version and
> >> hasn't noticed That they weren't running. Thanks!
> >>
> >>>
> >>> Thanks,
> >>>
> >>> Christophe
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: aarch64-test-ver3.patch --]
[-- Type: text/x-patch; name="aarch64-test-ver3.patch", Size: 24846 bytes --]
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..beaf6ac31d5c5affe3702a505ad0df8679229e32 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
VECT_VAR(expected, int, 16, 4) -> expected_int16x4
VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
*/
+/* Some instructions don't exist on ARM.
+ Use this macro to guard against them. */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
#define xSTR(X) #X
#define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@@ -197,6 +214,9 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
@@ -213,6 +233,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
\
CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
@@ -225,6 +246,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
@@ -398,6 +420,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 1);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@@ -413,6 +438,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+ CLEAN(result, poly, 64, 2);
+#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@@ -438,6 +466,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+ DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@@ -473,6 +508,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@@ -481,6 +517,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@@ -491,6 +528,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@@ -499,6 +537,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@@ -531,6 +570,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+ MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@@ -601,13 +647,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
- MACRO(VAR1, VAR2, , poly, p, 16, 4)
+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
- MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 519cffb0125079022e7ba876c1ca657d9e37cac2..8907b38cde90b44a8f1501f72b2c4e812cba5707 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -1,8 +1,9 @@
/* This file contains tests for all the *p64 intrinsics, except for
vreinterpret which have their own testcase. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -38,6 +39,17 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
+/* Expected results: vmov_n. */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
/* Expected results: vext. */
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -45,6 +57,9 @@ VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
/* Expected results: vget_low. */
VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+/* Expected results: vget_high. */
+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+
/* Expected results: vld1. */
VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -109,6 +124,39 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
0x3333333333333333 };
+/* Expected results: vldX_lane. */
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+ 0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vget_lane. */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
int main (void)
{
int i;
@@ -341,6 +389,26 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
+ /* vget_high_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_HIGH"
+
+#define TEST_VGET_HIGH(T1, T2, W, N, N2) \
+ VECT_VAR(vget_high_vector64, T1, W, N) = \
+ vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
+
+ DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
+ DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
+
+ CLEAN(result, poly, 64, 1);
+
+ VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
+
+ TEST_VGET_HIGH(poly, p, 64, 1, 2);
+
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
+
/* vld1_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLD1/VLD1Q"
@@ -645,7 +713,7 @@ int main (void)
VECT_VAR(vst1_lane_vector, T1, W, N) = \
vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
- VECT_VAR(vst1_lane_vector, T1, W, N), L)
+ VECT_VAR(vst1_lane_vector, T1, W, N), L);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
@@ -659,5 +727,298 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
+#ifdef __aarch64__
+
+ /* vmov_n_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+
+#define TEST_VMOV(Q, T1, T2, W, N) \
+ VECT_VAR(vmov_n_vector, T1, W, N) = \
+ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
+
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
+ DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
+
+ /* Try to read different places from the input buffer. */
+ for (i=0; i< 3; i++) {
+ CLEAN(result, poly, 64, 1);
+ CLEAN(result, poly, 64, 2);
+
+ TEST_VMOV(, poly, p, 64, 1);
+ TEST_VMOV(q, poly, p, 64, 2);
+
+ switch (i) {
+ case 0:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
+ break;
+ case 1:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
+ break;
+ case 2:
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
+ break;
+ default:
+ abort();
+ }
+ }
+
+ /* vget_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
+ VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+ if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in result '%s') at type %s " \
+ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
+ TEST_MSG, __FILE__, __LINE__, \
+ STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
+ STR(VECT_NAME(T1, W, N)), \
+ VECT_VAR(vget_lane_vector, T1, W, N), \
+ VECT_VAR(vget_lane_expected, T1, W, N)); \
+ abort (); \
+ }
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vector, poly, 64, 1);
+ DECL_VARIABLE(vector, poly, 64, 2);
+
+ VLOAD(vector, buffer, , poly, p, 64, 1);
+ VLOAD(vector, buffer, q, poly, p, 64, 2);
+
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+ TEST_VGET_LANE( , poly, p, 64, 1, 0);
+ TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+ /* vldx_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
+
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VLD_STX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size. X */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X)); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)))
+
+ /* Overwrite "result" with the contents of "result_bis"[Y]. */
+#undef TEST_EXTRA_CHUNK
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
+ memcpy(VECT_VAR(result, T1, W, N), \
+ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+ /* Add some padding to try to catch out of bound accesses. */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+ ARRAY1(V##_pad,T,W,N)
+
+#define DECL_ALL_VLD_STX_LANE(X) \
+ DECL_VLD_STX_LANE(poly, 64, 1, X); \
+ DECL_VLD_STX_LANE(poly, 64, 2, X);
+
+#define TEST_ALL_VLDX_LANE(X) \
+ TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
+
+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
+ TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
+
+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
+ CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
+
+ /* Declare the temporary buffers / variables. */
+ DECL_ALL_VLD_STX_LANE(2);
+ DECL_ALL_VLD_STX_LANE(3);
+ DECL_ALL_VLD_STX_LANE(4);
+
+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+ DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+
+ /* Check vld2_lane/vld2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+ TEST_ALL_VLDX_LANE(2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
+
+ /* Check vld3_lane/vld3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+ TEST_ALL_VLDX_LANE(3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
+
+ /* Check vld4_lane/vld4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+ TEST_ALL_VLDX_LANE(4);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
+
+ /* In this case, input variables are arrays of vectors. */
+#define DECL_VSTX_LANE(T1, W, N, X) \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+ /* We need to use a temporary result buffer (result_bis), because
+ the one used for other tests is not large enough. A subset of the
+ result data is moved from result_bis to result, and it is this
+ subset which is used to check the actual behavior. The next
+ macro enables to move another chunk of data from result_bis to
+ result. */
+ /* We also use another extra input buffer (buffer_src), which we
+ fill with 0xAA, and which it used to load a vector from which we
+ read a given lane. */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \
+ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
+ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
+ \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
+ \
+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \
+ /* Use dedicated init buffer, of size X. */ \
+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
+ L); \
+ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
+ VECT_ARRAY_VAR(vector, T1, W, N, X), \
+ L); \
+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+ sizeof(VECT_VAR(result, T1, W, N)));
+
+#define TEST_ALL_VSTX_LANE(X) \
+ TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
+ TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
+
+ /* Check vst2_lane/vst2q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+ TEST_ALL_VSTX_LANE(2);
+
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(2, 1);
+#undef CMT
+#define CMT " chunk 1"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
+
+ /* Check vst3_lane/vst3q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+ TEST_ALL_VSTX_LANE(3);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(3, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
+
+ /* Check vst4_lane/vst4q_lane. */
+ clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+ TEST_ALL_VSTX_LANE(4);
+
+#undef CMT
+#define CMT " (chunk 0)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
+
+ TEST_ALL_EXTRA_CHUNKS(4, 3);
+
+#undef CMT
+#define CMT " (chunk 3)"
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
+
+#endif /* __aarch64__. */
+
return 0;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..f192d4dda514287c8417e7fc922bc580b209b163 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p128 intrinsics. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
int main (void)
{
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..c915fd2fea6b4d8770c9a4aab88caad391105d89 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p64 intrinsics. */
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@@ -121,11 +122,7 @@ int main (void)
CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
- DECL_VARIABLE(vreint_vector, poly, 64, 1);
- DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
- DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-29 9:50 ` Tamar Christina
@ 2016-11-29 10:12 ` Christophe Lyon
2016-11-29 12:58 ` Christophe Lyon
2016-11-29 13:48 ` Kyrill Tkachov
1 sibling, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-29 10:12 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
Hi Tamar,
On 29 November 2016 at 10:50, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi All,
>
> The new patch contains the proper types for the intrinsics that should be returning uint64x1
> and has the rest of the comments by Christophe in them.
>
LGTM.
One more question: maybe we want to add explicit tests for vdup*_v_p64
even though they are aliases for vmov?
Christophe
> Kind Regards,
> Tamar
>
> ________________________________________
> From: Tamar Christina
> Sent: Friday, November 25, 2016 4:01:30 PM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> >
>> > A few comments about this new version:
>> > * arm-neon-ref.h: why do you create
>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>> > Can't you just add calls to CHECK_CRYPTO in the existing
>> > CHECK_RESULTS_NAMED_NO_FP16?
>
> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
> I didn't remove the split. I'll do it now.
>
>> >
>> > * p64_p128:
>> > From what I can see ARM and AArch64 differ on the vceq variants
>> > available with poly64.
>> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>> > Actually I've just noticed the other you submitted while I was writing
>> > this, where you add vceq_p64 for aarch64, but it still returns
>> > uint64_t.
>> > Why do you change the vceq_64 test to return poly64_t instead of
>> uint64_t?
>
> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
> This patch was already sent. New one coming soon.
>
>> >
>> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>> >
>
> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>
>> > The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE
>> > tests
>> >
>> > You need to protect the new vmov, vget_high and vget_lane tests with
>> > #ifdef __aarch64__.
>> >
>
> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>
> Thanks for the feedback,
> I'll get these updated.
>
>>
>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>
>>
>> > Christophe
>> >
>> >> Kind regards,
>> >> Tamar
>> >> ________________________________________
>> >> From: Tamar Christina
>> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
>> >> To: Christophe Lyon
>> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>> >> Poly64_t intrinsics to GCC
>> >>
>> >> Hi Christophe,
>> >>
>> >> Thanks for the review!
>> >>
>> >>>
>> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>> >>> except for vreinterpret.
>> >>> Why do you need to create p64.c ?
>> >>
>> >> I originally created it because I had a much smaller set of
>> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>> me that I can use the existing file now.
>> >>
>> >> Another reason was the effective-target arm_crypto_ok as you
>> mentioned below.
>> >>
>> >>>
>> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>> >>> etc with several #ifdef conditions.
>> >>
>> >> Fair enough, I'll move them to p64_p128.c.
>> >>
>> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>> >>> They are tested in p64_p128.c
>> >>
>> >> I should have looked for them, they weren't being tested before so I
>> >> had Mistakenly assumed that they weren't available. Now I realize I
>> >> just need To add the proper test option to the file to enable crypto. I'll
>> update this as well.
>> >>
>> >>> Looking at your patch, it seems some tests are currently missing for arm:
>> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>> >>> testgen...
>> >>
>> >> I'll adjust the test conditions so they run for ARM as well.
>> >>
>> >>>
>> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>> >>> arm_crypto_ok prevent the tests from running on aarch64?
>> >>
>> >> Yes they do, I was comparing the output against a clean version and
>> >> hasn't noticed That they weren't running. Thanks!
>> >>
>> >>>
>> >>> Thanks,
>> >>>
>> >>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-29 10:12 ` Christophe Lyon
@ 2016-11-29 12:58 ` Christophe Lyon
0 siblings, 0 replies; 17+ messages in thread
From: Christophe Lyon @ 2016-11-29 12:58 UTC (permalink / raw)
To: Tamar Christina
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh, Kyrylo Tkachov, nd
On 29 November 2016 at 11:12, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 10:50, Tamar Christina <Tamar.Christina@arm.com> wrote:
>> Hi All,
>>
>> The new patch contains the proper types for the intrinsics that should be returning uint64x1
>> and has the rest of the comments by Christophe in them.
>>
>
> LGTM.
>
> One more question: maybe we want to add explicit tests for vdup*_v_p64
> even though they are aliases for vmov?
>
Sorry, I meant vdup_n_p64, but the tests are already in place.
So, OK for me, but I can't approve.
Thanks,
Christophe
> Christophe
>
>> Kind Regards,
>> Tamar
>>
>> ________________________________________
>> From: Tamar Christina
>> Sent: Friday, November 25, 2016 4:01:30 PM
>> To: Christophe Lyon
>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>>
>> >
>>> > A few comments about this new version:
>>> > * arm-neon-ref.h: why do you create
>>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>>> > Can't you just add calls to CHECK_CRYPTO in the existing
>>> > CHECK_RESULTS_NAMED_NO_FP16?
>>
>> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
>> I didn't remove the split. I'll do it now.
>>
>>> >
>>> > * p64_p128:
>>> > From what I can see ARM and AArch64 differ on the vceq variants
>>> > available with poly64.
>>> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>>> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>>> > Actually I've just noticed the other you submitted while I was writing
>>> > this, where you add vceq_p64 for aarch64, but it still returns
>>> > uint64_t.
>>> > Why do you change the vceq_64 test to return poly64_t instead of
>>> uint64_t?
>>
>> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
>> This patch was already sent. New one coming soon.
>>
>>> >
>>> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>>> >
>>
>> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>>
>>> > The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE
>>> > tests
>>> >
>>> > You need to protect the new vmov, vget_high and vget_lane tests with
>>> > #ifdef __aarch64__.
>>> >
>>
>> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
>> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>>
>> Thanks for the feedback,
>> I'll get these updated.
>>
>>>
>>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>>
>>>
>>> > Christophe
>>> >
>>> >> Kind regards,
>>> >> Tamar
>>> >> ________________________________________
>>> >> From: Tamar Christina
>>> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
>>> >> To: Christophe Lyon
>>> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>>> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>>> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>>> >> Poly64_t intrinsics to GCC
>>> >>
>>> >> Hi Christophe,
>>> >>
>>> >> Thanks for the review!
>>> >>
>>> >>>
>>> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>>> >>> except for vreinterpret.
>>> >>> Why do you need to create p64.c ?
>>> >>
>>> >> I originally created it because I had a much smaller set of
>>> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>>> me that I can use the existing file now.
>>> >>
>>> >> Another reason was the effective-target arm_crypto_ok as you
>>> mentioned below.
>>> >>
>>> >>>
>>> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>>> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>>> >>> etc with several #ifdef conditions.
>>> >>
>>> >> Fair enough, I'll move them to p64_p128.c.
>>> >>
>>> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>> >>> They are tested in p64_p128.c
>>> >>
>>> >> I should have looked for them, they weren't being tested before so I
>>> >> had Mistakenly assumed that they weren't available. Now I realize I
>>> >> just need To add the proper test option to the file to enable crypto. I'll
>>> update this as well.
>>> >>
>>> >>> Looking at your patch, it seems some tests are currently missing for arm:
>>> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>> >>> testgen...
>>> >>
>>> >> I'll adjust the test conditions so they run for ARM as well.
>>> >>
>>> >>>
>>> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>> >>> arm_crypto_ok prevent the tests from running on aarch64?
>>> >>
>>> >> Yes they do, I was comparing the output against a clean version and
>>> >> hasn't noticed That they weren't running. Thanks!
>>> >>
>>> >>>
>>> >>> Thanks,
>>> >>>
>>> >>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-29 9:50 ` Tamar Christina
2016-11-29 10:12 ` Christophe Lyon
@ 2016-11-29 13:48 ` Kyrill Tkachov
2016-11-29 13:55 ` James Greenhalgh
1 sibling, 1 reply; 17+ messages in thread
From: Kyrill Tkachov @ 2016-11-29 13:48 UTC (permalink / raw)
To: Tamar Christina, Christophe Lyon
Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
James Greenhalgh
On 29/11/16 09:50, Tamar Christina wrote:
> Hi All,
>
> The new patch contains the proper types for the intrinsics that should be returning uint64x1
> and has the rest of the comments by Christophe in them.
Ok with an appropriate ChangeLog entry.
Thanks,
Kyrill
> Kind Regards,
> Tamar
>
> ________________________________________
> From: Tamar Christina
> Sent: Friday, November 25, 2016 4:01:30 PM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> >
>>> A few comments about this new version:
>>> * arm-neon-ref.h: why do you create
>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>>> Can't you just add calls to CHECK_CRYPTO in the existing
>>> CHECK_RESULTS_NAMED_NO_FP16?
> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
> I didn't remove the split. I'll do it now.
>
>>> * p64_p128:
>>> From what I can see ARM and AArch64 differ on the vceq variants
>>> available with poly64.
>>> For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>>> poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>>> Actually I've just noticed the other you submitted while I was writing
>>> this, where you add vceq_p64 for aarch64, but it still returns
>>> uint64_t.
>>> Why do you change the vceq_64 test to return poly64_t instead of
>> uint64_t?
> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
> This patch was already sent. New one coming soon.
>
>>> Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>>>
> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>
>>> The comment /* vget_lane_p64 tests. */ is wrong before VLDX_LANE
>>> tests
>>>
>>> You need to protect the new vmov, vget_high and vget_lane tests with
>>> #ifdef __aarch64__.
>>>
> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>
> Thanks for the feedback,
> I'll get these updated.
>
>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>
>>
>>> Christophe
>>>
>>>> Kind regards,
>>>> Tamar
>>>> ________________________________________
>>>> From: Tamar Christina
>>>> Sent: Tuesday, November 8, 2016 11:58:46 AM
>>>> To: Christophe Lyon
>>>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>>>> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>>>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>>>> Poly64_t intrinsics to GCC
>>>>
>>>> Hi Christophe,
>>>>
>>>> Thanks for the review!
>>>>
>>>>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>>>>> except for vreinterpret.
>>>>> Why do you need to create p64.c ?
>>>> I originally created it because I had a much smaller set of
>>>> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>> me that I can use the existing file now.
>>>> Another reason was the effective-target arm_crypto_ok as you
>> mentioned below.
>>>>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>>>>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>>>>> etc with several #ifdef conditions.
>>>> Fair enough, I'll move them to p64_p128.c.
>>>>
>>>>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>>>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>>>> They are tested in p64_p128.c
>>>> I should have looked for them, they weren't being tested before so I
>>>> had Mistakenly assumed that they weren't available. Now I realize I
>>>> just need To add the proper test option to the file to enable crypto. I'll
>> update this as well.
>>>>> Looking at your patch, it seems some tests are currently missing for arm:
>>>>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>>>> testgen...
>>>> I'll adjust the test conditions so they run for ARM as well.
>>>>
>>>>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>>>> arm_crypto_ok prevent the tests from running on aarch64?
>>>> Yes they do, I was comparing the output against a clean version and
>>>> hasn't noticed That they weren't running. Thanks!
>>>>
>>>>> Thanks,
>>>>>
>>>>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-29 13:48 ` Kyrill Tkachov
@ 2016-11-29 13:55 ` James Greenhalgh
2016-11-30 9:05 ` Christophe Lyon
0 siblings, 1 reply; 17+ messages in thread
From: James Greenhalgh @ 2016-11-29 13:55 UTC (permalink / raw)
To: Kyrill Tkachov
Cc: Tamar Christina, Christophe Lyon, GCC Patches, christophe.lyon,
Marcus Shawcroft, Richard Earnshaw, nd
On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>
> On 29/11/16 09:50, Tamar Christina wrote:
> >Hi All,
> >
> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
> >and has the rest of the comments by Christophe in them.
>
> Ok with an appropriate ChangeLog entry.
Also OK from an AArch64 persepctive based on the detailed review from
Christophe.
Thanks,
James
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-29 13:55 ` James Greenhalgh
@ 2016-11-30 9:05 ` Christophe Lyon
2016-11-30 9:40 ` Tamar Christina
2016-12-07 4:34 ` Andrew Pinski
0 siblings, 2 replies; 17+ messages in thread
From: Christophe Lyon @ 2016-11-30 9:05 UTC (permalink / raw)
To: Tamar Christina
Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
Marcus Shawcroft, Richard Earnshaw, nd
Hi Tamar,
On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>
>> On 29/11/16 09:50, Tamar Christina wrote:
>> >Hi All,
>> >
>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>> >and has the rest of the comments by Christophe in them.
>>
>> Ok with an appropriate ChangeLog entry.
>
> Also OK from an AArch64 persepctive based on the detailed review from
> Christophe.
>
> Thanks,
> James
>
After you committed this patch (r242962), I've noticed some
regressions as follows:
* on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
with errors like
warning: implicit declaration of function 'vreinterpretq_p64_p128
warning: implicit declaration of function 'vreinterpretq_p128_s8
error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
etc...
* on arm configured for armv8-a, several tests fail to link or compile:
vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
You can have more details at
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html
Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-30 9:05 ` Christophe Lyon
@ 2016-11-30 9:40 ` Tamar Christina
2016-12-07 4:34 ` Andrew Pinski
1 sibling, 0 replies; 17+ messages in thread
From: Tamar Christina @ 2016-11-30 9:40 UTC (permalink / raw)
To: Christophe Lyon
Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
Marcus Shawcroft, Richard Earnshaw, nd
Hi Christophe,
> After you committed this patch (r242962), I've noticed some regressions as
> follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile with
> errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...
Sorry for the screw up. On the last patch I only tested the file p64_p128.c.
I'll fix these asap.
>
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-
> validation/gcc/trunk/242962/report-build-info.html
>
>
> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-11-30 9:05 ` Christophe Lyon
2016-11-30 9:40 ` Tamar Christina
@ 2016-12-07 4:34 ` Andrew Pinski
2016-12-12 11:29 ` Tamar Christina
1 sibling, 1 reply; 17+ messages in thread
From: Andrew Pinski @ 2016-12-07 4:34 UTC (permalink / raw)
To: Christophe Lyon
Cc: Tamar Christina, Kyrill Tkachov, James Greenhalgh, GCC Patches,
christophe.lyon, Marcus Shawcroft, Richard Earnshaw, nd
On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>
>>> On 29/11/16 09:50, Tamar Christina wrote:
>>> >Hi All,
>>> >
>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>> >and has the rest of the comments by Christophe in them.
>>>
>>> Ok with an appropriate ChangeLog entry.
>>
>> Also OK from an AArch64 persepctive based on the detailed review from
>> Christophe.
>>
>> Thanks,
>> James
>>
>
> After you committed this patch (r242962), I've noticed some
> regressions as follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
> with errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...
>
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html
I see the expected_poly64x1 failures also for aarch64:
https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html
FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c -O0 (test for
excess errors)
Excess errors:
vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'
....
FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c -O0 (test
for excess errors)
Excess errors:
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x1' undeclared (first use in this function);
did you mean 'expected_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x2' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x1' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x2' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x1' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x2' undeclared (first use in this function);
did you mean 'expected2_poly64x1'?
etc.
>
>
> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-12-07 4:34 ` Andrew Pinski
@ 2016-12-12 11:29 ` Tamar Christina
2016-12-12 23:53 ` Andrew Pinski
0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-12-12 11:29 UTC (permalink / raw)
To: Andrew Pinski, Christophe Lyon
Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
Marcus Shawcroft, Richard Earnshaw, nd
Hi Andrew,
These should be fixed now.
Thanks,
Tamar
________________________________________
From: Andrew Pinski <pinskia@gmail.com>
Sent: Wednesday, December 7, 2016 4:33:51 AM
To: Christophe Lyon
Cc: Tamar Christina; Kyrill Tkachov; James Greenhalgh; GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; nd
Subject: Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>
>>> On 29/11/16 09:50, Tamar Christina wrote:
>>> >Hi All,
>>> >
>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>> >and has the rest of the comments by Christophe in them.
>>>
>>> Ok with an appropriate ChangeLog entry.
>>
>> Also OK from an AArch64 persepctive based on the detailed review from
>> Christophe.
>>
>> Thanks,
>> James
>>
>
> After you committed this patch (r242962), I've noticed some
> regressions as follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
> with errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...
>
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html
I see the expected_poly64x1 failures also for aarch64:
https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html
FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c -O0 (test for
excess errors)
Excess errors:
vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'
....
FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c -O0 (test
for excess errors)
Excess errors:
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x1' undeclared (first use in this function);
did you mean 'expected_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x2' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x1' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x2' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x1' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x2' undeclared (first use in this function);
did you mean 'expected2_poly64x1'?
etc.
>
>
> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
2016-12-12 11:29 ` Tamar Christina
@ 2016-12-12 23:53 ` Andrew Pinski
0 siblings, 0 replies; 17+ messages in thread
From: Andrew Pinski @ 2016-12-12 23:53 UTC (permalink / raw)
To: Tamar Christina
Cc: Christophe Lyon, Kyrill Tkachov, James Greenhalgh, GCC Patches,
christophe.lyon, Marcus Shawcroft, Richard Earnshaw, nd
On Mon, Dec 12, 2016 at 3:29 AM, Tamar Christina
<Tamar.Christina@arm.com> wrote:
> Hi Andrew,
>
> These should be fixed now.
Yes they are fixed.
Thanks,
Andrew
>
> Thanks,
> Tamar
>
> ________________________________________
> From: Andrew Pinski <pinskia@gmail.com>
> Sent: Wednesday, December 7, 2016 4:33:51 AM
> To: Christophe Lyon
> Cc: Tamar Christina; Kyrill Tkachov; James Greenhalgh; GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; nd
> Subject: Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
> <christophe.lyon@linaro.org> wrote:
>> Hi Tamar,
>>
>>
>> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>>
>>>> On 29/11/16 09:50, Tamar Christina wrote:
>>>> >Hi All,
>>>> >
>>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>>> >and has the rest of the comments by Christophe in them.
>>>>
>>>> Ok with an appropriate ChangeLog entry.
>>>
>>> Also OK from an AArch64 persepctive based on the detailed review from
>>> Christophe.
>>>
>>> Thanks,
>>> James
>>>
>>
>> After you committed this patch (r242962), I've noticed some
>> regressions as follows:
>> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
>> with errors like
>> warning: implicit declaration of function 'vreinterpretq_p64_p128
>> warning: implicit declaration of function 'vreinterpretq_p128_s8
>> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
>> etc...
>>
>> * on arm configured for armv8-a, several tests fail to link or compile:
>> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
>> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
>> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>>
>> You can have more details at
>> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html
>
> I see the expected_poly64x1 failures also for aarch64:
> https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html
>
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c -O0 (test for
> excess errors)
> Excess errors:
> vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'
>
> ....
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c -O0 (test
> for excess errors)
> Excess errors:
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
> error: 'expected0_poly64x1' undeclared (first use in this function);
> did you mean 'expected_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
> error: 'expected0_poly64x2' undeclared (first use in this function);
> did you mean 'expected0_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
> error: 'expected1_poly64x1' undeclared (first use in this function);
> did you mean 'expected0_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
> error: 'expected1_poly64x2' undeclared (first use in this function);
> did you mean 'expected1_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
> error: 'expected2_poly64x1' undeclared (first use in this function);
> did you mean 'expected1_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
> error: 'expected2_poly64x2' undeclared (first use in this function);
> did you mean 'expected2_poly64x1'?
>
>
> etc.
>
>>
>>
>> Christophe
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2016-12-12 23:53 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-07 13:55 [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC Tamar Christina
2016-11-08 11:21 ` Christophe Lyon
2016-11-08 11:59 ` Tamar Christina
2016-11-24 11:45 ` Tamar Christina
2016-11-25 14:54 ` Christophe Lyon
2016-11-25 15:03 ` Christophe Lyon
2016-11-25 16:01 ` Tamar Christina
2016-11-29 9:50 ` Tamar Christina
2016-11-29 10:12 ` Christophe Lyon
2016-11-29 12:58 ` Christophe Lyon
2016-11-29 13:48 ` Kyrill Tkachov
2016-11-29 13:55 ` James Greenhalgh
2016-11-30 9:05 ` Christophe Lyon
2016-11-30 9:40 ` Tamar Christina
2016-12-07 4:34 ` Andrew Pinski
2016-12-12 11:29 ` Tamar Christina
2016-12-12 23:53 ` Andrew Pinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).