[AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
@ 2016-11-07 13:55 Tamar Christina
  2016-11-08 11:21 ` Christophe Lyon
  0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-07 13:55 UTC (permalink / raw)
  To: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov
  Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1500 bytes --]

Hi all,

This patch (3 of 3) adds updates tests for the NEON intrinsics
added by the previous patches:

Ran regression tests on aarch64-none-linux-gnu
and on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Tamar


gcc/testsuite/
2016-11-04  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/aarch64/advsimd-intrinsics/p64.c: New.
	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
	(Poly64x1_t, Poly64x2_t): Added type.
	(AARCH64_ONLY): Added macro.
	* gcc.target/aarch64/advsimd-intrinsics/vcombine.c:
	Added test for Poly64.
	* gcc.target/aarch64/advsimd-intrinsics/vcreate.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vget_high.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vget_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vget_low.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
	Added AArch64 flags.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
	Added Aarch64 flags.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: gcc-poly64-3.patch --]
[-- Type: text/x-patch; name="gcc-poly64-3.patch", Size: 106997 bytes --]

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..174c1948221025b860aaac503354b406fa804007 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
    VECT_VAR(expected, int, 16, 4) -> expected_int16x4
    VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
 */
+/* Some instructions don't exist on ARM.
+   Use this macro to guard against them.  */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
 
 #define xSTR(X) #X
 #define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
   }
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+	       CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
 /* Floating-point variant.  */
 #define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
   {									\
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
 extern ARRAY(expected, uint, 64, 1);
 extern ARRAY(expected, poly, 8, 8);
 extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
 extern ARRAY(expected, hfloat, 16, 4);
 extern ARRAY(expected, hfloat, 32, 2);
 extern ARRAY(expected, hfloat, 64, 1);
@@ -197,11 +214,14 @@ extern ARRAY(expected, uint, 32, 4);
 extern ARRAY(expected, uint, 64, 2);
 extern ARRAY(expected, poly, 8, 16);
 extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
 extern ARRAY(expected, hfloat, 16, 8);
 extern ARRAY(expected, hfloat, 32, 4);
 extern ARRAY(expected, hfloat, 64, 2);
 
-#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment)		\
+#define CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name,EXPECTED,comment)		\
   {									\
     CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
@@ -228,6 +248,13 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
   }									\
 
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment)		\
+  {									\
+    CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name, EXPECTED, comment);		\
+    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
+    CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);	\
+  }									\
+
 /* Check results against EXPECTED.  Operates on all possible vector types.  */
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 #define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)			\
@@ -398,6 +425,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 1);
   CLEAN(result, poly, 8, 8);
   CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 4);
 #endif
@@ -413,6 +443,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 2);
   CLEAN(result, poly, 8, 16);
   CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 8);
 #endif
@@ -438,6 +471,13 @@ static void clean_results (void)
 #define DECL_VARIABLE(VAR, T1, W, N)		\
   VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+  DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
 /* Declare only 64 bits signed variants.  */
 #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
   DECL_VARIABLE(VAR, int, 8, 8);			\
@@ -473,6 +513,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 16, 4);		\
   DECL_VARIABLE(VAR, float, 32, 2)
 #else
@@ -481,6 +522,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 32, 2)
 #endif
 
@@ -491,6 +533,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 16, 8);		\
   DECL_VARIABLE(VAR, float, 32, 4)
 #else
@@ -499,6 +542,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 32, 4)
 #endif
 /* Declare all variants.  */
@@ -531,6 +575,13 @@ static void clean_results (void)
 
 /* Helpers to call macros with 1 constant and 5 variable
    arguments.  */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+  MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
   MACRO(VAR, , int, s, 8, 8);					\
   MACRO(VAR, , int, s, 16, 4);					\
@@ -601,13 +652,15 @@ static void clean_results (void)
   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
-  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+  MACRO(VAR1, VAR2, , poly, p, 16, 4);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
 
 #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
-  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
 
 #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c
new file mode 100644
index 0000000000000000000000000000000000000000..df66eaa749f7a1a34011d6d169d1262ba976c6ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64.c
@@ -0,0 +1,302 @@
+/* This file contains tests for the VLD{X}, VLD{X}_DUP and VSLI.  */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* { dg-options "-march=armv8-a+crypto" } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
+/* Expected results: vld1.  */
+VECT_VAR_DECL (vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
+					       0xfffffffffffffff1 };
+
+/* Expected results: vld1_dup.  */
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						    0xfffffffffffffff0 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						    0xfffffffffffffff1 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						    0xfffffffffffffff2 };
+
+/* Expected results: vldX.  */
+VECT_VAR_DECL (vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vldX_dup.  */
+VECT_VAR_DECL (vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL (vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL (vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL (vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+
+/* Expected results: vsli.  */
+VECT_VAR_DECL (vsli_expected,poly,64,1) [] = { 0x10 };
+VECT_VAR_DECL (vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
+					       0x7ffffffffffff1 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
+VECT_VAR_DECL (vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
+							 0xfffffffffffffff1 };
+
+int main (void)
+{
+  int i;
+
+  /* vld1_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLD1/VLD1Q"
+
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N)				\
+  VECT_VAR (VAR, T1, W, N) = vld1##Q##_##T2##W (VECT_VAR (BUF, T1, W, N)); \
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+  DECL_VARIABLE (vld1_vector, poly, 64, 1);
+  DECL_VARIABLE (vld1_vector, poly, 64, 2);
+
+  CLEAN (result, poly, 64, 1);
+  CLEAN (result, poly, 64, 2);
+
+  VLOAD (vld1_vector, buffer, , poly, p, 64, 1);
+  VLOAD (vld1_vector, buffer, q, poly, p, 64, 2);
+
+  TEST_VLD1 (vld1_vector, buffer, , poly, p, 64, 1);
+  TEST_VLD1 (vld1_vector, buffer, q, poly, p, 64, 2);
+
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
+  CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
+
+  /* vld1_dup_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
+
+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N)			\
+  VECT_VAR (VAR, T1, W, N) =						\
+    vld1##Q##_dup_##T2##W (&VECT_VAR (BUF, T1, W, N)[i]); \
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), VECT_VAR (VAR, T1, W, N))
+
+  DECL_VARIABLE (vld1_dup_vector, poly, 64, 1);
+  DECL_VARIABLE (vld1_dup_vector, poly, 64, 2);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i<3; i++)
+  {
+    CLEAN (result, poly, 64, 1);
+    CLEAN (result, poly, 64, 2);
+
+    TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
+    TEST_VLD1_DUP (vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
+
+    switch (i)
+    {
+    case 0:
+      CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
+      CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
+      break;
+    case 1:
+      CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
+      CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
+      break;
+    case 2:
+      CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
+      CHECK (TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
+      break;
+    default:
+      abort ();
+    }
+  }
+
+  /* vldX_p64 tests.  */
+#define DECL_VLDX(T1, W, N, X)						\
+  VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_vector, T1, W, N, X); \
+  VECT_VAR_DECL (vldX_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX(Q, T1, T2, W, N, X)					\
+  VECT_ARRAY_VAR (vldX_vector, T1, W, N, X) =				\
+    /* Use dedicated init buffer, of size X.  */	\
+    vld##X##Q##_##T2##W (VECT_ARRAY_VAR (buffer_vld##X, T1, W, N, X));	\
+  vst##X##Q##_##T2##W (VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+		      VECT_ARRAY_VAR (vldX_vector, T1, W, N, X));	\
+	 memcpy (VECT_VAR (result, T1, W, N), \
+		 VECT_VAR (vldX_result_bis_##X, T1, W, N), \
+		 sizeof (VECT_VAR (result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y)				\
+  memcpy (VECT_VAR (result, T1, W, N),				\
+	 &(VECT_VAR (vldX_result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof (VECT_VAR (result, T1, W, N)));
+
+  DECL_VLDX (poly, 64, 1, 2);
+  DECL_VLDX (poly, 64, 1, 3);
+  DECL_VLDX (poly, 64, 1, 4);
+
+  VECT_ARRAY_INIT2 (buffer_vld2, poly, 64, 1);
+  PAD (buffer_vld2_pad, poly, 64, 1);
+  VECT_ARRAY_INIT3 (buffer_vld3, poly, 64, 1);
+  PAD (buffer_vld3_pad, poly, 64, 1);
+  VECT_ARRAY_INIT4 (buffer_vld4, poly, 64, 1);
+  PAD (buffer_vld4_pad, poly, 64, 1);
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2/VLD2Q"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX (, poly, p, 64, 1, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3/VLD3Q"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX (, poly, p, 64, 1, 3);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4/VLD4Q"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX (, poly, p, 64, 1, 4);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
+  CLEAN (result, poly, 64, 1);
+  TEST_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
+
+  /* vldX_dup_p64 tests.  */
+#define DECL_VLDX_DUP(T1, W, N, X)					\
+  VECT_ARRAY_TYPE (T1, W, N, X) VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X); \
+  VECT_VAR_DECL (vldX_dup_result_bis_##X, T1, W, N)[X * N]
+
+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X)				\
+  VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X) =			\
+    vld##X##Q##_dup_##T2##W (&VECT_VAR (buffer_dup, T1, W, N)[0]);	\
+									\
+  vst##X##Q##_##T2##W (VECT_VAR (vldX_dup_result_bis_##X, T1, W, N),	\
+		      VECT_ARRAY_VAR (vldX_dup_vector, T1, W, N, X));	\
+  memcpy (VECT_VAR (result, T1, W, N), \
+	 VECT_VAR (vldX_dup_result_bis_##X, T1, W, N), \
+	 sizeof (VECT_VAR (result, T1, W, N)));
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y)		\
+  memcpy (VECT_VAR (result, T1, W, N),				\
+	 &(VECT_VAR (vldX_dup_result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof (VECT_VAR (result, T1, W, N)));
+
+  DECL_VLDX_DUP (poly, 64, 1, 2);
+  DECL_VLDX_DUP (poly, 64, 1, 3);
+  DECL_VLDX_DUP (poly, 64, 1, 4);
+
+
+#undef TEST_MSG
+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP (, poly, p, 64, 1, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 2, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP (, poly, p, 64, 1, 3);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 3, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
+
+#undef TEST_MSG
+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP (, poly, p, 64, 1, 4);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 1);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 2);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
+  CLEAN (result, poly, 64, 1);
+  TEST_VLDX_DUP_EXTRA_CHUNK (poly, 64, 1, 4, 3);
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
+
+  /* vsli_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSLI"
+
+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V)				\
+  VECT_VAR (vsXi_vector_res, T1, W, N) = \
+    INSN##Q##_n_##T2##W (VECT_VAR (vsXi_vector, T1, W, N),		\
+		      VECT_VAR (vsXi_vector2, T1, W, N), \
+		      V);						\
+  vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+		     VECT_VAR (vsXi_vector_res, T1, W, N))
+
+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V)	\
+  TEST_VSXI1 (INSN, Q, T1, T2, W, N, V)
+
+  DECL_VARIABLE (vsXi_vector, poly, 64, 1);
+  DECL_VARIABLE (vsXi_vector, poly, 64, 2);
+  DECL_VARIABLE (vsXi_vector2, poly, 64, 1);
+  DECL_VARIABLE (vsXi_vector2, poly, 64, 2);
+  DECL_VARIABLE (vsXi_vector_res, poly, 64, 1);
+  DECL_VARIABLE (vsXi_vector_res, poly, 64, 2);
+
+  CLEAN (result, poly, 64, 1);
+  CLEAN (result, poly, 64, 2);
+
+  VLOAD (vsXi_vector, buffer, , poly, p, 64, 1);
+  VLOAD (vsXi_vector, buffer, q, poly, p, 64, 2);
+
+  VDUP (vsXi_vector2, , poly, p, 64, 1, 2);
+  VDUP (vsXi_vector2, q, poly, p, 64, 2, 3);
+
+  TEST_VSXI (vsli, , poly, p, 64, 1, 3);
+  TEST_VSXI (vsli, q, poly, p, 64, 2, 53);
+
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
+  CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
+
+  /* Test cases with maximum shift amount.  */
+  CLEAN (result, poly, 64, 1);
+  CLEAN (result, poly, 64, 2);
+
+  TEST_VSXI (vsli, , poly, p, 64, 1, 63);
+  TEST_VSXI (vsli, q, poly, p, 64, 2, 63);
+
+#define COMMENT "(max shift amount)"
+  CHECK (TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
+  CHECK (TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
index 5100375e5fe0c1f1f6b1e0cbff549990d73948e3..0c6b25d578102f042c669d9bdeaa15e5a1292267 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -25,6 +26,9 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0x55, 0x55, 0x55, 0x55 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
 					 0x66, 0x66, 0x66, 0x66 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x77 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
 					   0x40533333, 0x40533333 };
 VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
@@ -62,6 +66,9 @@ void exec_vcombine (void)
   VDUP(vector64_b, , uint, u, 64, 1, 0x88);
   VDUP(vector64_b, , poly, p, 8, 8, 0x55);
   VDUP(vector64_b, , poly, p, 16, 4, 0x66);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VDUP(vector64_b, , poly, p, 64, 1, 0x77);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VDUP(vector64_b, , float, f, 16, 4, 2.25);
 #endif
@@ -80,6 +87,9 @@ void exec_vcombine (void)
   TEST_VCOMBINE(uint, u, 64, 1, 2);
   TEST_VCOMBINE(poly, p, 8, 8, 16);
   TEST_VCOMBINE(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VCOMBINE(poly, p, 64, 1, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VCOMBINE(float, f, 16, 4, 8);
 #endif
@@ -95,6 +105,9 @@ void exec_vcombine (void)
   CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, "");
   CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, "");
   CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, expected, "");
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, "");
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
index b8b338ef3c06ff2489b525e22760cbaed1fda335..d6d3bba39523e9e9f91b4fe80065682ea01bd6b8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a,
 					0x78, 0x56, 0x34, 0x12 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0x123456789abcdef0 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 };
 
@@ -49,6 +53,9 @@ FNNAME (INSN_NAME)
   DECL_VAL(val, uint, 64, 1);
   DECL_VAL(val, poly, 8, 8);
   DECL_VAL(val, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  DECL_VAL(val, poly, 64, 1);
+#endif
 
   DECL_VARIABLE(vector_res, int, 8, 8);
   DECL_VARIABLE(vector_res, int, 16, 4);
@@ -64,6 +71,9 @@ FNNAME (INSN_NAME)
   DECL_VARIABLE(vector_res, uint, 64, 1);
   DECL_VARIABLE(vector_res, poly, 8, 8);
   DECL_VARIABLE(vector_res, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  DECL_VARIABLE(vector_res, poly, 64, 1);
+#endif
 
   clean_results ();
 
@@ -82,6 +92,9 @@ FNNAME (INSN_NAME)
   VECT_VAR(val, uint, 64, 1) = 0x123456789abcdef0ULL;
   VECT_VAR(val, poly, 8, 8) = 0x123456789abcdef0ULL;
   VECT_VAR(val, poly, 16, 4) = 0x123456789abcdef0ULL;
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_VAR(val, poly, 64, 1) = 0x123456789abcdef0ULL;
+#endif
 
   TEST_VCREATE(int, s, 8, 8);
   TEST_VCREATE(int, s, 16, 4);
@@ -97,6 +110,9 @@ FNNAME (INSN_NAME)
   TEST_VCREATE(uint, u, 64, 1);
   TEST_VCREATE(poly, p, 8, 8);
   TEST_VCREATE(poly, p, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VCREATE(poly, p, 64, 1);
+#endif
 
   CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, "");
   CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, "");
@@ -108,6 +124,9 @@ FNNAME (INSN_NAME)
   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
index aef4173326c907a5f487f2520d0e354afbc16fd4..0a9db6367e9eba57d69487ff3011be9e2555ea35 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* We test vdup and vmov in the same place since they are aliases.  */
 
@@ -23,6 +24,11 @@ VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
 VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00,
 						0xcc00, 0xcc00 };
 #endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+					  0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0,
@@ -77,6 +83,11 @@ VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
 VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80,
 						0xcb80, 0xcb80 };
 #endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+					  0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1,
@@ -131,6 +142,11 @@ VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
 VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00,
 						0xcb00, 0xcb00 };
 #endif
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+					  0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2,
@@ -167,6 +183,12 @@ VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00,
 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
 					    0xc1600000, 0xc1600000 };
 
+#if defined(__aarch64__)
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16(M, E, C)
+#else
+#define CHECK_EXEC_RESULTS(M, E, C) CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(M, E , C)
+#endif
+
 #define TEST_MSG "VDUP/VDUPQ"
 void exec_vdup_vmov (void)
 {
@@ -204,6 +226,9 @@ void exec_vdup_vmov (void)
 #if defined (FP16_SUPPORTED)
     TEST_VDUP(, float, f, 16, 4);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VDUP(, poly, p, 64, 1));
+#endif
     TEST_VDUP(, float, f, 32, 2);
 
     TEST_VDUP(q, int, s, 8, 16);
@@ -219,18 +244,21 @@ void exec_vdup_vmov (void)
 #if defined (FP16_SUPPORTED)
     TEST_VDUP(q, float, f, 16, 8);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VDUP(q, poly, p, 64, 2));
+#endif
     TEST_VDUP(q, float, f, 32, 4);
 
 #if defined (FP16_SUPPORTED)
     switch (i) {
     case 0:
-      CHECK_RESULTS_NAMED (TEST_MSG, expected0, "");
+      CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, "");
       break;
     case 1:
-      CHECK_RESULTS_NAMED (TEST_MSG, expected1, "");
+      CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected1, "");
       break;
     case 2:
-      CHECK_RESULTS_NAMED (TEST_MSG, expected2, "");
+      CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected2, "");
       break;
     default:
       abort();
@@ -271,6 +299,9 @@ void exec_vdup_vmov (void)
 #if defined (FP16_SUPPORTED)
     TEST_VMOV(, float, f, 16, 4);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VMOV(, poly, p, 64, 1));
+#endif
     TEST_VMOV(, float, f, 32, 2);
 
     TEST_VMOV(q, int, s, 8, 16);
@@ -286,6 +317,9 @@ void exec_vdup_vmov (void)
 #if defined (FP16_SUPPORTED)
     TEST_VMOV(q, float, f, 16, 8);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VMOV(q, poly, p, 64, 2));
+#endif
     TEST_VMOV(q, float, f, 32, 4);
 
 #if defined (FP16_SUPPORTED)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
index 5d0dba36e297ffa6bccc956d0bc9e0c8ca793626..92899686c8213658a3aadfcdcb895ce11a2d7696 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -31,6 +32,10 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0xfffffff1,
 					0xfffffff1, 0xfffffff1 };
 VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
 					0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+					0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
 					 0xf5, 0xf5, 0xf5, 0xf5,
 					 0xf5, 0xf5, 0xf5, 0xf5,
@@ -53,6 +58,9 @@ VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80,
 					       0xca80, 0xca80,
 					       0xca80, 0xca80 };
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
 					   0xc1700000, 0xc1700000 };
 
@@ -92,6 +100,9 @@ void exec_vdup_lane (void)
 #if defined (FP16_SUPPORTED)
   TEST_VDUP_LANE(, float, f, 16, 4, 4, 3);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
+#endif
   TEST_VDUP_LANE(, float, f, 32, 2, 2, 1);
 
   TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2);
@@ -107,6 +118,9 @@ void exec_vdup_lane (void)
 #if defined (FP16_SUPPORTED)
   TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3);
 #endif
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
+#endif
   TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1);
 
 #if defined (FP16_SUPPORTED)
@@ -131,6 +145,9 @@ VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff,
 VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
 VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 };
 VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7,
 					 0xf7, 0xf7, 0xf7, 0xf7 };
 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 };
@@ -159,6 +176,10 @@ VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0,
 					  0xfffffff0, 0xfffffff0 };
 VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0,
 					  0xfffffffffffffff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff0,
+					  0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5,
 					  0xf5, 0xf5, 0xf5, 0xf5,
 					  0xf5, 0xf5, 0xf5, 0xf5,
@@ -204,6 +225,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
   TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0);
   TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7);
   TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VDUP_LANEQ(, poly, p, 64, 1, 2, 0));
+#endif
 #if defined (FP16_SUPPORTED)
   TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3);
 #endif
@@ -219,6 +243,9 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
   TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0);
   TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5);
   TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1);
+#if defined (__ARM_FEATURE_CRYPTO)
+    AARCH64_ONLY(TEST_VDUP_LANEQ(q, poly, p, 64, 2, 2, 0));
+#endif
 #if defined (FP16_SUPPORTED)
   TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7);
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
index 9f0a1687f189bc2dfbe111c7f5c3b96c9acecd52..8c9f52e39aadcfb37fed3c3cefc5fef941ca5314 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
 					0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 
@@ -50,6 +54,9 @@ void exec_vget_high (void)
   TEST_VGET_HIGH(uint, u, 64, 1, 2);
   TEST_VGET_HIGH(poly, p, 8, 8, 16);
   TEST_VGET_HIGH(poly, p, 16, 4, 8);
+  #if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VGET_HIGH(poly, p, 64, 1, 2);
+  #endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VGET_HIGH(float, f, 16, 4, 8);
 #endif
@@ -65,6 +72,7 @@ void exec_vget_high (void)
   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+  CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, "");
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
index ee6d6503ad622c936be2f6c7468db845398a6c2e..ab010e309bedf1dcb97749cd8b69afc829805437 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 int8_t     expected_s8   = 0xf7;
@@ -13,6 +14,9 @@ uint32_t   expected_u32  = 0xfffffff1;
 uint64_t   expected_u64  = 0xfffffffffffffff0;
 poly8_t    expected_p8   = 0xf6;
 poly16_t   expected_p16  = 0xfff2;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t   expected_p64  = 0xfffffffffffffff0;
+#endif
 hfloat16_t expected_f16  = 0xcb80;
 hfloat32_t expected_f32  = 0xc1700000;
 
@@ -26,6 +30,9 @@ uint32_t   expectedq_u32 = 0xfffffff2;
 uint64_t   expectedq_u64 = 0xfffffffffffffff1;
 poly8_t    expectedq_p8  = 0xfe;
 poly16_t   expectedq_p16 = 0xfff6;
+#if defined (__ARM_FEATURE_CRYPTO)
+poly64_t   expectedq_p64 = 0xfffffffffffffff1;
+#endif
 hfloat16_t expectedq_f16 = 0xca80;
 hfloat32_t expectedq_f32 = 0xc1500000;
 
@@ -89,6 +96,9 @@ void exec_vget_lane (void)
   VAR_DECL(var, uint, 64);
   VAR_DECL(var, poly, 8);
   VAR_DECL(var, poly, 16);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VAR_DECL(var, poly, 64);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VAR_DECL(var, float, 16);
 #endif
@@ -114,6 +124,9 @@ void exec_vget_lane (void)
   TEST_VGET_LANE(, uint, u, 64, 1, 0);
   TEST_VGET_LANE(, poly, p, 8, 8, 6);
   TEST_VGET_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VGET_LANE(, poly, p, 64, 1, 0);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VGET_LANE_FP(, float, f, 16, 4, 1);
 #endif
@@ -129,6 +142,9 @@ void exec_vget_lane (void)
   TEST_VGET_LANE(q, uint, u, 64, 2, 1);
   TEST_VGET_LANE(q, poly, p, 8, 16, 14);
   TEST_VGET_LANE(q, poly, p, 16, 8, 6);
+#if defined (__ARM_FEATURE_CRYPTO)
+  AARCH64_ONLY(TEST_VGET_LANE(q, poly, p, 64, 2, 1));
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VGET_LANE_FP(q, float, f, 16, 8, 3);
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
index 2b875b9b7b86d850647cf977086f336b932cfb0b..6a67baa6c64ae59b6d454d6c97b9a219e2610490 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -50,6 +54,9 @@ void exec_vget_low (void)
   TEST_VGET_LOW(uint, u, 64, 1, 2);
   TEST_VGET_LOW(poly, p, 8, 8, 16);
   TEST_VGET_LOW(poly, p, 16, 4, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VGET_LOW(poly, p, 64, 1, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VGET_LOW(float, f, 16, 4, 8);
 #endif
@@ -65,6 +72,9 @@ void exec_vget_low (void)
   CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, "");
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, "");
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, "");
+#if defined (__ARM_FEATURE_CRYPTO)
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected, "");
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, "");
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
index 4ed0e464f9ce6b0f599a6a72d3f49db5ac9a0374..96cf06be923efa47e7977d02a8ad63ce2e6cba1f 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -33,7 +37,7 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xf8, 0xf9, 0xfa, 0xfb,
 					 0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
-					 0xfff3, 0xfff4, 0xfff5,
+					 0xfff3, 0xfff4, 0xfff5,	
 					 0xfff6, 0xfff7 };
 VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
 					 0xfffffff2, 0xfffffff3 };
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					 0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
 					 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+					 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80,
 					   0xca00, 0xc980, 0xc900, 0xc880 };
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
index 34be214e9122c5701a341b09479443fdb5f2716b..62585e8371fd1c738d0285a7997cc3bb1ab30948 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 /* Chunk 0.  */
@@ -17,6 +18,9 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					 0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 };
 VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 };
 VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
@@ -45,6 +49,10 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0,
 					  0xf0, 0xf0, 0xf0, 0xf0 };
 VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0,
 					  0xfff0, 0xfff0, 0xfff0, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+					  0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00,
 					    0xcc00, 0xcc00, 0xcc00, 0xcc00 };
 VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000,
@@ -64,6 +72,9 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					 0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 };
 VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 };
 VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
@@ -92,6 +103,10 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1,
 					  0xf1, 0xf1, 0xf1, 0xf1 };
 VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1,
 					  0xfff1, 0xfff1, 0xfff1, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+					  0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80,
 					    0xcb80, 0xcb80, 0xcb80, 0xcb80 };
 VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000,
@@ -111,6 +126,9 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					 0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 };
 VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 };
 VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
@@ -139,6 +157,10 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2,
 					  0xf2, 0xf2, 0xf2, 0xf2 };
 VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2,
 					  0xfff2, 0xfff2, 0xfff2, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+					  0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00,
 					    0xcb00, 0xcb00, 0xcb00, 0xcb00 };
 VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000,
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
index e1e8562ac6be424e638d11a90aeb406116abca24..94d349fac42992eee239dfca2cecab6bbbf4afe8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -61,6 +67,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
 					       0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
 VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -104,6 +115,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -147,6 +163,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
 					       0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
 VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -193,6 +214,11 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
 					       0x4, 0x5, 0x6, 0x7 };
 VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9,
 						0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xfffffffffffffff4,
+						0xfffffffffffffff5 };
+#endif
 VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
 VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
 VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -238,6 +264,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf4, 0xf5, 0xf6, 0xf7 };
 VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld4_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
@@ -281,6 +312,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
 					       0xfc, 0xfd, 0xfe, 0xff };
 VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 };
 VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3,
@@ -324,6 +360,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3,
 					       0x4, 0x5, 0x6, 0x7 };
 VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xfffffffffffffff4,
+						0xfffffffffffffff5 };
+#endif
 VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 };
 VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 };
 VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13,
@@ -367,6 +408,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb,
 					       0xc, 0xd, 0xe, 0xf };
 VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xfffffffffffffff6,
+						0xfffffffffffffff7 };
+#endif
 VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 };
 VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 };
 VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23,
@@ -424,27 +470,42 @@ void exec_vldX (void)
 	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
 	 sizeof(VECT_VAR(result, T1, W, N)));
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_CRYPTO(T1, W, N, X) \
+  DECL_VLDX(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X) \
+   TEST_VLDX(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) \
+  TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#else
+#define DECL_VLDX_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#endif
+
   /* We need all variants in 64 bits, but there is no 64x2 variant.  */
-#define DECL_ALL_VLDX_NO_FP16(X)		\
-  DECL_VLDX(int, 8, 8, X);			\
-  DECL_VLDX(int, 16, 4, X);			\
-  DECL_VLDX(int, 32, 2, X);			\
-  DECL_VLDX(int, 64, 1, X);			\
-  DECL_VLDX(uint, 8, 8, X);			\
-  DECL_VLDX(uint, 16, 4, X);			\
-  DECL_VLDX(uint, 32, 2, X);			\
-  DECL_VLDX(uint, 64, 1, X);			\
-  DECL_VLDX(poly, 8, 8, X);			\
-  DECL_VLDX(poly, 16, 4, X);			\
-  DECL_VLDX(float, 32, 2, X);			\
-  DECL_VLDX(int, 8, 16, X);			\
-  DECL_VLDX(int, 16, 8, X);			\
-  DECL_VLDX(int, 32, 4, X);			\
-  DECL_VLDX(uint, 8, 16, X);			\
-  DECL_VLDX(uint, 16, 8, X);			\
-  DECL_VLDX(uint, 32, 4, X);			\
-  DECL_VLDX(poly, 8, 16, X);			\
-  DECL_VLDX(poly, 16, 8, X);			\
+#define DECL_ALL_VLDX_NO_FP16(X)		  \
+  DECL_VLDX(int, 8, 8, X);			  \
+  DECL_VLDX(int, 16, 4, X);			  \
+  DECL_VLDX(int, 32, 2, X);			  \
+  DECL_VLDX(int, 64, 1, X);			  \
+  DECL_VLDX(uint, 8, 8, X);			  \
+  DECL_VLDX(uint, 16, 4, X);			  \
+  DECL_VLDX(uint, 32, 2, X);			  \
+  DECL_VLDX(uint, 64, 1, X);			  \
+  DECL_VLDX(poly, 8, 8, X);			  \
+  DECL_VLDX(poly, 16, 4, X);			  \
+  DECL_VLDX_CRYPTO(poly, 64, 1, X);		  \
+  DECL_VLDX(float, 32, 2, X);			  \
+  DECL_VLDX(int, 8, 16, X);			  \
+  DECL_VLDX(int, 16, 8, X);			  \
+  DECL_VLDX(int, 32, 4, X);			  \
+  DECL_VLDX(uint, 8, 16, X);			  \
+  DECL_VLDX(uint, 16, 8, X);			  \
+  DECL_VLDX(uint, 32, 4, X);			  \
+  DECL_VLDX(poly, 8, 16, X);			  \
+  DECL_VLDX(poly, 16, 8, X);			  \
+  AARCH64_ONLY(DECL_VLDX_CRYPTO(poly, 64, 2, X)); \
   DECL_VLDX(float, 32, 4, X)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -456,26 +517,28 @@ void exec_vldX (void)
 #define DECL_ALL_VLDX(X) DECL_ALL_VLDX_NO_FP16(X)
 #endif
 
-#define TEST_ALL_VLDX_NO_FP16(X)		\
-  TEST_VLDX(, int, s, 8, 8, X);			\
-  TEST_VLDX(, int, s, 16, 4, X);		\
-  TEST_VLDX(, int, s, 32, 2, X);		\
-  TEST_VLDX(, int, s, 64, 1, X);		\
-  TEST_VLDX(, uint, u, 8, 8, X);		\
-  TEST_VLDX(, uint, u, 16, 4, X);		\
-  TEST_VLDX(, uint, u, 32, 2, X);		\
-  TEST_VLDX(, uint, u, 64, 1, X);		\
-  TEST_VLDX(, poly, p, 8, 8, X);		\
-  TEST_VLDX(, poly, p, 16, 4, X);		\
-  TEST_VLDX(, float, f, 32, 2, X);		\
-  TEST_VLDX(q, int, s, 8, 16, X);		\
-  TEST_VLDX(q, int, s, 16, 8, X);		\
-  TEST_VLDX(q, int, s, 32, 4, X);		\
-  TEST_VLDX(q, uint, u, 8, 16, X);		\
-  TEST_VLDX(q, uint, u, 16, 8, X);		\
-  TEST_VLDX(q, uint, u, 32, 4, X);		\
-  TEST_VLDX(q, poly, p, 8, 16, X);		\
-  TEST_VLDX(q, poly, p, 16, 8, X);		\
+#define TEST_ALL_VLDX_NO_FP16(X)			\
+  TEST_VLDX(, int, s, 8, 8, X);				\
+  TEST_VLDX(, int, s, 16, 4, X);			\
+  TEST_VLDX(, int, s, 32, 2, X);			\
+  TEST_VLDX(, int, s, 64, 1, X);			\
+  TEST_VLDX(, uint, u, 8, 8, X);			\
+  TEST_VLDX(, uint, u, 16, 4, X);			\
+  TEST_VLDX(, uint, u, 32, 2, X);			\
+  TEST_VLDX(, uint, u, 64, 1, X);			\
+  TEST_VLDX(, poly, p, 8, 8, X);			\
+  TEST_VLDX(, poly, p, 16, 4, X);			\
+  TEST_VLDX_CRYPTO(, poly, p, 64, 1, X);		\
+  TEST_VLDX(, float, f, 32, 2, X);			\
+  TEST_VLDX(q, int, s, 8, 16, X);			\
+  TEST_VLDX(q, int, s, 16, 8, X);			\
+  TEST_VLDX(q, int, s, 32, 4, X);			\
+  TEST_VLDX(q, uint, u, 8, 16, X);			\
+  TEST_VLDX(q, uint, u, 16, 8, X);			\
+  TEST_VLDX(q, uint, u, 32, 4, X);			\
+  TEST_VLDX(q, poly, p, 8, 16, X);			\
+  TEST_VLDX(q, poly, p, 16, 8, X);			\
+  AARCH64_ONLY(TEST_VLDX_CRYPTO(q, poly, p, 64, 2, X));	\
   TEST_VLDX(q, float, f, 32, 4, X)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -487,26 +550,28 @@ void exec_vldX (void)
 #define TEST_ALL_VLDX(X) TEST_ALL_VLDX_NO_FP16(X)
 #endif
 
-#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)	\
-  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
-  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
-  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
-  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 8, 16, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
-  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
-  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
-  TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);		\
-  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y)		    \
+  TEST_EXTRA_CHUNK(int, 8, 8, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 16, 4, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 32, 2, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 64, 1, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 8, 8, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 16, 4, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);			    \
+  TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);			    \
+  TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);			    \
+  TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y);		    \
+  TEST_EXTRA_CHUNK(float, 32, 2, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 8, 16, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 16, 8, X, Y);			    \
+  TEST_EXTRA_CHUNK(int, 32, 4, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 8, 16, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);			    \
+  TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);			    \
+  TEST_EXTRA_CHUNK(poly, 8, 16, X, Y);			    \
+  TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);			    \
+  AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y)); \
   TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -519,27 +584,29 @@ void exec_vldX (void)
 #endif
 
   /* vldX supports all vector types except [u]int64x2.  */
-#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment)		\
-    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
-    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);		\
-    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);		\
-    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);		\
-    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
-    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
-    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
-    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
-									\
-    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
-    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
-    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
-    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
-    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
-    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
+#define CHECK_RESULTS_VLDX_NO_FP16(test_name,EXPECTED,comment)			   \
+    CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);			   \
+    CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment);			   \
+    CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);			   \
+    CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);			   \
+    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);		   \
+    CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);		   \
+										   \
+    CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);			   \
+    CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);			   \
+    CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);			   \
+    CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);			   \
+    CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);			   \
+    AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment)); \
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -580,6 +647,10 @@ void exec_vldX (void)
   PAD(buffer_vld2_pad, poly, 8, 8);
   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
   PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+  PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
   PAD(buffer_vld2_pad, float, 16, 4);
@@ -607,6 +678,10 @@ void exec_vldX (void)
   PAD(buffer_vld2_pad, poly, 8, 16);
   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
   PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+  PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
   PAD(buffer_vld2_pad, float, 16, 8);
@@ -635,6 +710,10 @@ void exec_vldX (void)
   PAD(buffer_vld3_pad, poly, 8, 8);
   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
   PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+  PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
   PAD(buffer_vld3_pad, float, 16, 4);
@@ -662,6 +741,10 @@ void exec_vldX (void)
   PAD(buffer_vld3_pad, poly, 8, 16);
   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
   PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined(__aarch64__)
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+  PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
   PAD(buffer_vld3_pad, float, 16, 8);
@@ -690,6 +773,10 @@ void exec_vldX (void)
   PAD(buffer_vld4_pad, poly, 8, 8);
   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
   PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+  PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
   PAD(buffer_vld4_pad, float, 16, 4);
@@ -717,6 +804,10 @@ void exec_vldX (void)
   PAD(buffer_vld4_pad, poly, 8, 16);
   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
   PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO) && defined (__aarch64__)
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+  PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
   PAD(buffer_vld4_pad, float, 16, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
index b44a987cb5d2f169b633d9c1e862fb782bd65d39..60fdd20f42a19862684c28c6f44db3f6f5642c98 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 
@@ -18,6 +19,9 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
 					0xf0, 0xf1, 0xf0, 0xf1 };
 VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 };
 VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -36,6 +40,9 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1,
 					       0xf0, 0xf1, 0xf0, 0xf1 };
 VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1,
 						0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 };
 VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -56,6 +63,9 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0,
 					       0xf1, 0xf2, 0xf0, 0xf1 };
 VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1,
 						0xfff2, 0xfff0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 };
 VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -76,6 +86,9 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2,
 					       0xf0, 0xf1, 0xf2, 0xf0 };
 VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2,
 						0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 };
 VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 };
 
@@ -96,6 +109,9 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1,
 					       0xf2, 0xf0, 0xf1, 0xf2 };
 VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0,
 						0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 };
 VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 };
 
@@ -114,6 +130,9 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf0, 0xf1, 0xf2, 0xf3 };
 VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -131,6 +150,9 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 };
 VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf0, 0xf1, 0xf2, 0xf3 };
 VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 
@@ -148,6 +170,9 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf0, 0xf1, 0xf2, 0xf3 };
 VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 
@@ -165,6 +190,9 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xf0, 0xf1, 0xf2, 0xf3 };
 VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 
@@ -197,6 +225,16 @@ void exec_vldX_dup (void)
 	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
 	 sizeof(VECT_VAR(result, T1, W, N)));
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X) TEST_VLDX_DUP(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y) TEST_EXTRA_CHUNK(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X) DECL_VLDX_DUP(T1, W, N, X)
+#else
+#define TEST_VLDX_DUP_CRYPTO(Q, T1, T2, W, N, X)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X,Y)
+#define DECL_VLDX_DUP_CRYPTO(T1, W, N, X)
+#endif
+
 #define DECL_ALL_VLDX_DUP_NO_FP16(X)		\
   DECL_VLDX_DUP(int, 8, 8, X);			\
   DECL_VLDX_DUP(int, 16, 4, X);			\
@@ -208,6 +246,7 @@ void exec_vldX_dup (void)
   DECL_VLDX_DUP(uint, 64, 1, X);		\
   DECL_VLDX_DUP(poly, 8, 8, X);			\
   DECL_VLDX_DUP(poly, 16, 4, X);		\
+  DECL_VLDX_DUP_CRYPTO(poly, 64, 1, X);		\
   DECL_VLDX_DUP(float, 32, 2, X)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -229,6 +268,7 @@ void exec_vldX_dup (void)
   TEST_VLDX_DUP(, uint, u, 64, 1, X);		\
   TEST_VLDX_DUP(, poly, p, 8, 8, X);		\
   TEST_VLDX_DUP(, poly, p, 16, 4, X);		\
+  TEST_VLDX_DUP_CRYPTO(, poly, p, 64, 1, X);	\
   TEST_VLDX_DUP(, float, f, 32, 2, X)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -250,6 +290,7 @@ void exec_vldX_dup (void)
   TEST_EXTRA_CHUNK(uint, 64, 1, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y);	\
   TEST_EXTRA_CHUNK(float, 32, 2, X, Y)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -272,6 +313,7 @@ void exec_vldX_dup (void)
     CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
+    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -313,6 +355,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld2_pad, poly, 8, 8);
   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4);
   PAD(buffer_vld2_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
+  PAD(buffer_vld2_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4);
   PAD(buffer_vld2_pad, float, 16, 4);
@@ -340,6 +386,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld2_pad, poly, 8, 16);
   VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8);
   PAD(buffer_vld2_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 2);
+  PAD(buffer_vld2_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8);
   PAD(buffer_vld2_pad, float, 16, 8);
@@ -368,6 +418,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld3_pad, poly, 8, 8);
   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4);
   PAD(buffer_vld3_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
+  PAD(buffer_vld3_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4);
   PAD(buffer_vld3_pad, float, 16, 4);
@@ -395,6 +449,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld3_pad, poly, 8, 16);
   VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8);
   PAD(buffer_vld3_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 2);
+  PAD(buffer_vld3_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8);
   PAD(buffer_vld3_pad, float, 16, 8);
@@ -423,6 +481,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld4_pad, poly, 8, 8);
   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4);
   PAD(buffer_vld4_pad, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
+  PAD(buffer_vld4_pad, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4);
   PAD(buffer_vld4_pad, float, 16, 4);
@@ -450,6 +512,10 @@ void exec_vldX_dup (void)
   PAD(buffer_vld4_pad, poly, 8, 16);
   VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8);
   PAD(buffer_vld4_pad, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 2);
+  PAD(buffer_vld4_pad, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8);
   PAD(buffer_vld4_pad, float, 16, 8);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
index cda76abfe0a18f648331ec9cffc030368b2a7c70..c57be3023a0625753599d2b2702ef84937cc4255 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 
@@ -18,6 +19,11 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa,
 						0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -47,6 +53,11 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						0xaaaaaaaaaaaaaaaa };
+#endif
 VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -76,6 +87,11 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -105,6 +121,11 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 };
 VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xf0, 0xf1, 0xf2, 0xaa };
 VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						0xaaaaaaaaaaaaaaaa };
+#endif
 VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 };
 VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -134,6 +155,11 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 };
 VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						0xaaaaaaaaaaaaaaaa };
+#endif
 VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1,
@@ -163,6 +189,11 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -192,6 +223,11 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -221,6 +257,11 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						0xaaaaaaaaaaaaaaaa };
+#endif
 VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa,
@@ -250,6 +291,11 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
 VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa,
 					       0xaa, 0xaa, 0xaa, 0xaa };
 VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_vld4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						0xaaaaaaaaaaaaaaaa };
+#endif
 VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa };
 VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa };
 VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -279,6 +325,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
 #endif
@@ -295,6 +344,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
 #endif
@@ -311,6 +363,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
 #endif
@@ -356,6 +411,16 @@ void exec_vldX_lane (void)
 	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
 	 sizeof(VECT_VAR(result, T1, W, N)));
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X) DECL_VLDX_LANE(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#else
+#define DECL_VLDX_LANE_CRYPTO(T1, W, N, X)
+#define TEST_VLDX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#endif
+
   /* We need all variants in 64 bits, but there is no 64x2 variant.  */
 #define DECL_ALL_VLDX_LANE_NO_FP16(X)		\
   DECL_VLDX_LANE(int, 8, 8, X);			\
@@ -366,11 +431,13 @@ void exec_vldX_lane (void)
   DECL_VLDX_LANE(uint, 32, 2, X);		\
   DECL_VLDX_LANE(poly, 8, 8, X);		\
   DECL_VLDX_LANE(poly, 16, 4, X);		\
+  AARCH64_ONLY(DECL_VLDX_LANE(poly, 64, 1, X));		\
   DECL_VLDX_LANE(int, 16, 8, X);		\
   DECL_VLDX_LANE(int, 32, 4, X);		\
   DECL_VLDX_LANE(uint, 16, 8, X);		\
   DECL_VLDX_LANE(uint, 32, 4, X);		\
   DECL_VLDX_LANE(poly, 16, 8, X);		\
+  AARCH64_ONLY(DECL_VLDX_LANE_CRYPTO(poly, 64, 2, X));	\
   DECL_VLDX_LANE(float, 32, 2, X);		\
   DECL_VLDX_LANE(float, 32, 4, X)
 
@@ -400,11 +467,13 @@ void exec_vldX_lane (void)
   TEST_VLDX_LANE(, uint, u, 32, 2, X, 1);	\
   TEST_VLDX_LANE(, poly, p, 8, 8, X, 4);	\
   TEST_VLDX_LANE(, poly, p, 16, 4, X, 3);	\
+  AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
   TEST_VLDX_LANE(q, int, s, 16, 8, X, 6);	\
   TEST_VLDX_LANE(q, int, s, 32, 4, X, 2);	\
   TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5);	\
   TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0);	\
   TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5);	\
+  AARCH64_ONLY(TEST_VLDX_LANE_CRYPTO(q, poly, p, 64, 2, X, 0));\
   TEST_VLDX_LANE(, float, f, 32, 2, X, 0);	\
   TEST_VLDX_LANE(q, float, f, 32, 4, X, 2)
 
@@ -426,11 +495,13 @@ void exec_vldX_lane (void)
   TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y));	\
   TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
   TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
   TEST_EXTRA_CHUNK(uint, 16, 8, X, Y);		\
   TEST_EXTRA_CHUNK(uint, 32, 4, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 16, 8, X, Y);		\
+  AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 2, X, Y));	\
   TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
   TEST_EXTRA_CHUNK(float, 32, 4, X, Y)
 
@@ -453,12 +524,14 @@ void exec_vldX_lane (void)
     CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
+    AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment));	\
     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
     CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment);		\
     CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment);		\
     CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment);		\
     CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
+    AARCH64_ONLY(CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment));	\
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -489,11 +562,17 @@ void exec_vldX_lane (void)
   DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
   DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
   DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 1, 4));
+#endif
   DUMMY_ARRAY(buffer_src, int, 16, 8, 4);
   DUMMY_ARRAY(buffer_src, int, 32, 4, 4);
   DUMMY_ARRAY(buffer_src, uint, 16, 8, 4);
   DUMMY_ARRAY(buffer_src, uint, 32, 4, 4);
   DUMMY_ARRAY(buffer_src, poly, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  AARCH64_ONLY(DUMMY_ARRAY(buffer_src, poly, 64, 2, 4));
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
   DUMMY_ARRAY(buffer_src, float, 16, 8, 4);
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..c53f4240a20550c1e1b17072fa0d73566fa0c2a8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,6 +1,7 @@
 /* This file contains tests for the vreinterpret *p128 intrinsics.  */
 
 /* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 /* { dg-add-options arm_crypto } */
 
 #include <arm_neon.h>
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
 int main (void)
 {
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..976b605e265e15c6e17c11ce3d92588fde874d16 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,6 +1,7 @@
 /* This file contains tests for the vreinterpret *p64 intrinsics.  */
 
 /* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 /* { dg-add-options arm_crypto } */
 
 #include <arm_neon.h>
@@ -121,11 +122,7 @@ int main (void)
   CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
index 825d07dbf77fc54b5ef796b57a42c81d6dd6d611..047ee1fa80be89083315505c6c228a03df290047 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results.  */
 VECT_VAR_DECL(expected,int,8,8) [] = { 0xf7, 0x33, 0x33, 0x33,
@@ -16,6 +17,9 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33,
 					0x33, 0x33, 0x33, 0x33 };
 VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 };
 VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 };
 VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
@@ -25,7 +29,7 @@ VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33,
 VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff5, 0x3333, 0x3333, 0x3333,
 					0x3333, 0x3333, 0x3333, 0x3333 };
 VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff1, 0x33333333,
-					0x33333333, 0x33333333 };
+					0x33333333, 0x33333333   };
 VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff1, 0x3333333333333333 };
 VECT_VAR_DECL(expected,uint,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
 					 0x33, 0x33, 0x33, 0x33,
@@ -43,6 +47,10 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33,
 					 0x33, 0x33, 0x33, 0x33 };
 VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333,
 					 0x3333, 0x3333, 0x3333, 0x3333 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected,poly,64,2) [] = { 0xfffffffffffffff0,
+					 0x3333333333333333 };
+#endif
 VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333,
 					   0x3333, 0x3333, 0x3333, 0x3333 };
 VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333,
@@ -72,6 +80,9 @@ void exec_vst1_lane (void)
   TEST_VST1_LANE(, uint, u, 64, 1, 0);
   TEST_VST1_LANE(, poly, p, 8, 8, 6);
   TEST_VST1_LANE(, poly, p, 16, 4, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VST1_LANE(, poly, p, 64, 1, 0);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VST1_LANE(, float, f, 16, 4, 1);
 #endif
@@ -87,6 +98,9 @@ void exec_vst1_lane (void)
   TEST_VST1_LANE(q, uint, u, 64, 2, 0);
   TEST_VST1_LANE(q, poly, p, 8, 16, 10);
   TEST_VST1_LANE(q, poly, p, 16, 8, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  TEST_VST1_LANE(q, poly, p, 64, 2, 0);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   TEST_VST1_LANE(q, float, f, 16, 8, 6);
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
index f5bf3bd325fa05e330d766dc0a93582d6c12e8c8..e00277d25c306559ca4149a5afcc857f7da63b52 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c
@@ -1,6 +1,7 @@
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
+/* { dg-additional-options "-march=armv8-a+crypto" { target aarch64*-*-* } } */
 
 /* Expected results for vst2, chunk 0.  */
 VECT_VAR_DECL(expected_st2_0,int,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
@@ -14,6 +15,9 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0,
@@ -42,6 +46,9 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -68,6 +75,9 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 };
 VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0,
@@ -97,6 +107,9 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 };
 VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 };
 VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -123,6 +136,9 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -149,6 +165,9 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
 VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+#endif
 VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 };
 VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
 VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
@@ -178,6 +197,9 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 };
 VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+#endif
 VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 };
 VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -204,6 +226,9 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+#endif
 VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -230,6 +255,9 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
 					      0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL(expected_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+#endif
 VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 };
 VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
@@ -256,6 +284,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2);
 VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2);
 #endif
@@ -272,6 +303,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3);
 VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3);
 #endif
@@ -288,6 +322,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4);
 VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4);
 #endif
@@ -336,6 +373,19 @@ void exec_vstX_lane (void)
 	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
 	 sizeof(VECT_VAR(result, T1, W, N)));
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y) \
+		TEST_EXTRA_CHUNK(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L) \
+		TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X) \
+		DECL_VSTX_LANE(T1, W, N, X)
+#else
+#define TEST_EXTRA_CHUNK_CRYPTO(T1, W, N, X, Y)
+#define TEST_VSTX_LANE_CRYPTO(Q, T1, T2, W, N, X, L)
+#define DECL_VSTX_LANE_CRYPTO(T1, W, N, X)
+#endif
+
   /* We need all variants in 64 bits, but there is no 64x2 variant,
      nor 128 bits vectors of int8/uint8/poly8.  */
 #define DECL_ALL_VSTX_LANE_NO_FP16(X)		\
@@ -347,12 +397,14 @@ void exec_vstX_lane (void)
   DECL_VSTX_LANE(uint, 32, 2, X);		\
   DECL_VSTX_LANE(poly, 8, 8, X);		\
   DECL_VSTX_LANE(poly, 16, 4, X);		\
+  DECL_VSTX_LANE_CRYPTO(poly, 64, 1, X);		\
   DECL_VSTX_LANE(float, 32, 2, X);		\
   DECL_VSTX_LANE(int, 16, 8, X);		\
   DECL_VSTX_LANE(int, 32, 4, X);		\
   DECL_VSTX_LANE(uint, 16, 8, X);		\
   DECL_VSTX_LANE(uint, 32, 4, X);		\
   DECL_VSTX_LANE(poly, 16, 8, X);		\
+  DECL_VSTX_LANE_CRYPTO(poly, 64, 2, X);		\
   DECL_VSTX_LANE(float, 32, 4, X)
 
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
@@ -378,6 +430,7 @@ void exec_vstX_lane (void)
   TEST_VSTX_LANE(, uint, u, 32, 2, X, 1);	\
   TEST_VSTX_LANE(, poly, p, 8, 8, X, 4);	\
   TEST_VSTX_LANE(, poly, p, 16, 4, X, 3);	\
+  AARCH64_ONLY(TEST_VSTX_LANE_CRYPTO(, poly, p, 64, 1, X, 0));\
   TEST_VSTX_LANE(q, int, s, 16, 8, X, 6);	\
   TEST_VSTX_LANE(q, int, s, 32, 4, X, 2);	\
   TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5);	\
@@ -403,6 +456,7 @@ void exec_vstX_lane (void)
   TEST_EXTRA_CHUNK(uint, 32, 2, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 8, 8, X, Y);		\
   TEST_EXTRA_CHUNK(poly, 16, 4, X, Y);		\
+  AARCH64_ONLY(TEST_EXTRA_CHUNK_CRYPTO(poly, 64, 1, X, Y));		\
   TEST_EXTRA_CHUNK(float, 32, 2, X, Y);		\
   TEST_EXTRA_CHUNK(int, 16, 8, X, Y);		\
   TEST_EXTRA_CHUNK(int, 32, 4, X, Y);		\
@@ -434,6 +488,9 @@ void exec_vstX_lane (void)
   DUMMY_ARRAY(buffer_src, uint, 32, 2, 4);
   DUMMY_ARRAY(buffer_src, poly, 8, 8, 4);
   DUMMY_ARRAY(buffer_src, poly, 16, 4, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   DUMMY_ARRAY(buffer_src, float, 16, 4, 4);
 #endif
@@ -462,6 +519,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_0, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT);
@@ -485,6 +543,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st2_1, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT);
@@ -514,6 +573,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_0, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT);
@@ -538,6 +598,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_1, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT);
@@ -562,6 +623,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st3_2, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT);
@@ -591,6 +653,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_0, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT);
@@ -615,6 +678,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_1, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT);
@@ -639,6 +703,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_2, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT);
@@ -663,6 +728,7 @@ void exec_vstX_lane (void)
   CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT);
   CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT);
   CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT);
+  AARCH64_ONLY(CHECK_CRYPTO(TEST_MSG, poly, 64, 1, PRIx64, expected_st4_3, CMT));
   CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT);
   CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT);
   CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-07 13:55 [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC Tamar Christina
@ 2016-11-08 11:21 ` Christophe Lyon
  2016-11-08 11:59   ` Tamar Christina
  0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-08 11:21 UTC (permalink / raw)
  To: Tamar Christina
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

On 7 November 2016 at 14:55, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi all,
>
> This patch (3 of 3) adds updates tests for the NEON intrinsics
> added by the previous patches:
>
> Ran regression tests on aarch64-none-linux-gnu
> and on arm-none-linux-gnueabihf.
>
> Ok for trunk?
>
> Thanks,
> Tamar
>
>
> gcc/testsuite/
> 2016-11-04  Tamar Christina  <tamar.christina@arm.com>
>
>         * gcc.target/aarch64/advsimd-intrinsics/p64.c: New.
>         * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
>         (Poly64x1_t, Poly64x2_t): Added type.
>         (AARCH64_ONLY): Added macro.
>         * gcc.target/aarch64/advsimd-intrinsics/vcombine.c:
>         Added test for Poly64.
>         * gcc.target/aarch64/advsimd-intrinsics/vcreate.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vget_high.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vget_lane.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vget_low.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vldX.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vld1.c: Likewise.
>         * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
>         Added AArch64 flags.
>         * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
>         Added Aarch64 flags.

Hi Tamar,

A while ago I added p64_p128.c, to contain all the poly64/128
tests except for vreinterpret.
Why do you need to create p64.c ?

Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c
might be easier to maintain than adding them to vcreate.c etc
with several #ifdef conditions.

For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
condition? These intrinsics are defined in arm/arm_neon.h, right?
They are tested in p64_p128.c

Looking at your patch, it seems some tests are currently missing
for arm: vget_high_p64. I'm not sure why I missed it when I removed
neont-testgen...

Regarding vreinterpret_p128.c, doesn't the existing
effective-target arm_crypto_ok prevent the tests from
running on aarch64?

Thanks,

Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-08 11:21 ` Christophe Lyon
@ 2016-11-08 11:59   ` Tamar Christina
  2016-11-24 11:45     ` Tamar Christina
  0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-08 11:59 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

Hi Christophe,

Thanks for the review!

> 
> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
> vreinterpret.
> Why do you need to create p64.c ?

I originally created it because I had a much smaller set of intrinsics that I wanted to
add initially, this grew and It hadn't occurred to me that I can use the existing file now.

Another reason was the effective-target arm_crypto_ok as you mentioned below.

> 
> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
> easier to maintain than adding them to vcreate.c etc with several #ifdef
> conditions.

Fair enough, I'll move them to p64_p128.c.

> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> condition? These intrinsics are defined in arm/arm_neon.h, right?
> They are tested in p64_p128.c

I should have looked for them, they weren't being tested before so I had
Mistakenly assumed that they weren't available. Now I realize I just need
To add the proper test option to the file to enable crypto. I'll update this as well.

> Looking at your patch, it seems some tests are currently missing for arm:
> vget_high_p64. I'm not sure why I missed it when I removed neont-
> testgen...

I'll adjust the test conditions so they run for ARM as well.

> 
> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> arm_crypto_ok prevent the tests from running on aarch64?

Yes they do, I was comparing the output against a clean version and hasn't noticed
That they weren't running. Thanks!

> 
> Thanks,
> 
> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-08 11:59   ` Tamar Christina
@ 2016-11-24 11:45     ` Tamar Christina
  2016-11-25 14:54       ` Christophe Lyon
  0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-24 11:45 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

[-- Attachment #1: Type: text/plain, Size: 2269 bytes --]

Hi Christoph,

I have combined most of the tests in p64_p128 except for the
vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
of code that would be have to be added to p64_p128 vs having them in those
files isn't worth it. Since a lot of the test setup would have to be copied.

Kind regards,
Tamar
________________________________________
From: Tamar Christina
Sent: Tuesday, November 8, 2016 11:58:46 AM
To: Christophe Lyon
Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC

Hi Christophe,

Thanks for the review!

>
> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
> vreinterpret.
> Why do you need to create p64.c ?

I originally created it because I had a much smaller set of intrinsics that I wanted to
add initially, this grew and It hadn't occurred to me that I can use the existing file now.

Another reason was the effective-target arm_crypto_ok as you mentioned below.

>
> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
> easier to maintain than adding them to vcreate.c etc with several #ifdef
> conditions.

Fair enough, I'll move them to p64_p128.c.

> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> condition? These intrinsics are defined in arm/arm_neon.h, right?
> They are tested in p64_p128.c

I should have looked for them, they weren't being tested before so I had
Mistakenly assumed that they weren't available. Now I realize I just need
To add the proper test option to the file to enable crypto. I'll update this as well.

> Looking at your patch, it seems some tests are currently missing for arm:
> vget_high_p64. I'm not sure why I missed it when I removed neont-
> testgen...

I'll adjust the test conditions so they run for ARM as well.

>
> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> arm_crypto_ok prevent the tests from running on aarch64?

Yes they do, I was comparing the output against a clean version and hasn't noticed
That they weren't running. Thanks!

>
> Thanks,
>
> Christophe

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: poly_test.patch --]
[-- Type: text/x-patch; name="poly_test.patch", Size: 27130 bytes --]

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..174c1948221025b860aaac503354b406fa804007 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
    VECT_VAR(expected, int, 16, 4) -> expected_int16x4
    VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
 */
+/* Some instructions don't exist on ARM.
+   Use this macro to guard against them.  */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
 
 #define xSTR(X) #X
 #define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
   }
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+	       CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
 /* Floating-point variant.  */
 #define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
   {									\
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
 extern ARRAY(expected, uint, 64, 1);
 extern ARRAY(expected, poly, 8, 8);
 extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
 extern ARRAY(expected, hfloat, 16, 4);
 extern ARRAY(expected, hfloat, 32, 2);
 extern ARRAY(expected, hfloat, 64, 1);
@@ -197,11 +214,14 @@ extern ARRAY(expected, uint, 32, 4);
 extern ARRAY(expected, uint, 64, 2);
 extern ARRAY(expected, poly, 8, 16);
 extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
 extern ARRAY(expected, hfloat, 16, 8);
 extern ARRAY(expected, hfloat, 32, 4);
 extern ARRAY(expected, hfloat, 64, 2);
 
-#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment)		\
+#define CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name,EXPECTED,comment)		\
   {									\
     CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment);		\
@@ -228,6 +248,13 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
   }									\
 
+#define CHECK_RESULTS_NAMED_NO_FP16(test_name,EXPECTED,comment)		\
+  {									\
+    CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64(test_name, EXPECTED, comment);		\
+    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
+    CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);	\
+  }									\
+
 /* Check results against EXPECTED.  Operates on all possible vector types.  */
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
 #define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment)			\
@@ -398,6 +425,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 1);
   CLEAN(result, poly, 8, 8);
   CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 4);
 #endif
@@ -413,6 +443,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 2);
   CLEAN(result, poly, 8, 16);
   CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 8);
 #endif
@@ -438,6 +471,13 @@ static void clean_results (void)
 #define DECL_VARIABLE(VAR, T1, W, N)		\
   VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+  DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
 /* Declare only 64 bits signed variants.  */
 #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
   DECL_VARIABLE(VAR, int, 8, 8);			\
@@ -473,6 +513,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 16, 4);		\
   DECL_VARIABLE(VAR, float, 32, 2)
 #else
@@ -481,6 +522,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 32, 2)
 #endif
 
@@ -491,6 +533,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 16, 8);		\
   DECL_VARIABLE(VAR, float, 32, 4)
 #else
@@ -499,6 +542,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 32, 4)
 #endif
 /* Declare all variants.  */
@@ -531,6 +575,13 @@ static void clean_results (void)
 
 /* Helpers to call macros with 1 constant and 5 variable
    arguments.  */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+  MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
   MACRO(VAR, , int, s, 8, 8);					\
   MACRO(VAR, , int, s, 16, 4);					\
@@ -601,13 +652,15 @@ static void clean_results (void)
   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
-  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+  MACRO(VAR1, VAR2, , poly, p, 16, 4);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
 
 #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
-  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
 
 #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 519cffb0125079022e7ba876c1ca657d9e37cac2..f92c820f4c7de0dff2e593559412cf1702e860ff 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -1,8 +1,9 @@
 /* This file contains tests for all the *p64 intrinsics, except for
    vreinterpret which have their own testcase.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -14,7 +15,7 @@ VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
 					      0xfffffff1 };
 
 /* Expected results: vceq.  */
-VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(vceq_expected,poly,64,1) [] = { 0x0 };
 
 /* Expected results: vcombine.  */
 VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
@@ -38,6 +39,17 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -45,6 +57,9 @@ VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
 /* Expected results: vget_low.  */
 VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 
+/* Expected results: vget_high.  */
+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+
 /* Expected results: vld1.  */
 VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -109,6 +124,39 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vldX_lane.  */
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						   0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
 int main (void)
 {
   int i;
@@ -159,24 +207,24 @@ int main (void)
   VECT_VAR(vceq_vector_res, T3, W, N) =					\
     INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N),			\
 		      VECT_VAR(vceq_vector2, T1, W, N));		\
-  vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
+  vst1##Q##_p##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
 
 #define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N)				\
-  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
+  TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N);
 
   DECL_VARIABLE(vceq_vector, poly, 64, 1);
   DECL_VARIABLE(vceq_vector2, poly, 64, 1);
-  DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
+  DECL_VARIABLE(vceq_vector_res, poly, 64, 1);
 
-  CLEAN(result, uint, 64, 1);
+  CLEAN(result, poly, 64, 1);
 
   VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
 
   VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
 
-  TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
+  TEST_VCOMP(vceq, , poly, p, poly, 64, 1);
 
-  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vceq_expected, "");
 
   /* vcombine_p64 tests.  */
 #undef TEST_MSG
@@ -288,6 +336,44 @@ int main (void)
     }
   }
 
+  /* vmov_n_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+
+#define TEST_VMOV(Q, T1, T2, W, N)					\
+  VECT_VAR(vmov_n_vector, T1, W, N) =					\
+    vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
+
+  DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
+  DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i< 3; i++) {
+    CLEAN(result, poly, 64, 1);
+    CLEAN(result, poly, 64, 2);
+
+    TEST_VDUP(, poly, p, 64, 1);
+    TEST_VDUP(q, poly, p, 64, 2);
+
+    switch (i) {
+    case 0:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
+      break;
+    case 1:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
+      break;
+    case 2:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
   /* vexit_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VEXT/VEXTQ"
@@ -341,6 +427,26 @@ int main (void)
 
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
 
+  /* vget_high_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_HIGH"
+
+#define TEST_VGET_HIGH(T1, T2, W, N, N2)					\
+  VECT_VAR(vget_high_vector64, T1, W, N) =				\
+    vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2));		\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
+
+  DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
+  DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+
+  VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
+
+  TEST_VGET_HIGH(poly, p, 64, 1, 2);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
+
   /* vld1_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLD1/VLD1Q"
@@ -432,6 +538,8 @@ int main (void)
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, "");
   CHECK(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, "");
 
+#ifdef __aarch64__
+
   /* vldX_p64 tests.  */
 #define DECL_VLDX(T1, W, N, X)						\
   VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \
@@ -560,6 +668,8 @@ int main (void)
   TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3);
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
 
+#endif /* __aarch64__.  */
+
   /* vsli_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VSLI"
@@ -645,7 +755,7 @@ int main (void)
   VECT_VAR(vst1_lane_vector, T1, W, N) =				\
     vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));			\
   vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N),			\
-			 VECT_VAR(vst1_lane_vector, T1, W, N), L)
+			 VECT_VAR(vst1_lane_vector, T1, W, N), L);
 
   DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
   DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
@@ -659,5 +769,260 @@ int main (void)
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+#ifdef __aarch64__
+
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vector, poly, 64, 1);
+  DECL_VARIABLE(vector, poly, 64, 2);
+
+  VLOAD(vector, buffer,  , poly, p, 64, 1);
+  VLOAD(vector, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
+
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLD_STX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behavior. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				\
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			\
+									\
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		\
+									\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    /* Use dedicated init buffer, of size.  X */			\
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	\
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	\
+			     L);					\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)))
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#undef TEST_EXTRA_CHUNK
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Add some padding to try to catch out of bound accesses.  */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+  VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+  ARRAY1(V##_pad,T,W,N)
+
+#define DECL_ALL_VLD_STX_LANE(X)     \
+  DECL_VLD_STX_LANE(poly, 64, 1, X); \
+  DECL_VLD_STX_LANE(poly, 64, 2, X);
+
+#define TEST_ALL_VLDX_LANE(X)		  \
+  TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
+  TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
+
+#define TEST_ALL_EXTRA_CHUNKS(X,Y)	     \
+  TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
+  TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
+
+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment)	\
+  CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
+  CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
+
+  /* Declare the temporary buffers / variables.  */
+  DECL_ALL_VLD_STX_LANE(2);
+  DECL_ALL_VLD_STX_LANE(3);
+  DECL_ALL_VLD_STX_LANE(4);
+
+  DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+  DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+
+  /* Check vld2_lane/vld2q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+  TEST_ALL_VLDX_LANE(2);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
+
+  /* Check vld3_lane/vld3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+  TEST_ALL_VLDX_LANE(3);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
+
+  /* Check vld4_lane/vld4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+  TEST_ALL_VLDX_LANE(4);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
+
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VSTX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behavior. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)				 \
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				 \
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			 \
+  memset (VECT_VAR(result_bis_##X, T1, W, N), 0,			 \
+	  sizeof(VECT_VAR(result_bis_##X, T1, W, N)));			 \
+									 \
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				 \
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		 \
+									 \
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					 \
+    /* Use dedicated init buffer, of size X.  */			 \
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	 \
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	 \
+			     L);					 \
+  vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		 \
+			   VECT_ARRAY_VAR(vector, T1, W, N, X),		 \
+			   L);						 \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+#define TEST_ALL_VSTX_LANE(X)		  \
+  TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
+  TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
+
+  /* Check vst2_lane/vst2q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+  TEST_ALL_VSTX_LANE(2);
+
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+#undef CMT
+#define CMT " chunk 1"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
+
+  /* Check vst3_lane/vst3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+  TEST_ALL_VSTX_LANE(3);
+
+#undef CMT
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
+
+  /* Check vst4_lane/vst4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+  TEST_ALL_VSTX_LANE(4);
+
+#undef CMT
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+
+#undef CMT
+#define CMT " (chunk 3)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
+
+#endif /* __aarch64__.  */
+
   return 0;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..f192d4dda514287c8417e7fc922bc580b209b163 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,7 +1,8 @@
 /* This file contains tests for the vreinterpret *p128 intrinsics.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
 int main (void)
 {
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..c915fd2fea6b4d8770c9a4aab88caad391105d89 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,7 +1,8 @@
 /* This file contains tests for the vreinterpret *p64 intrinsics.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -121,11 +122,7 @@ int main (void)
   CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-24 11:45     ` Tamar Christina
@ 2016-11-25 14:54       ` Christophe Lyon
  2016-11-25 15:03         ` Christophe Lyon
  0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-25 14:54 UTC (permalink / raw)
  To: Tamar Christina
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

Hi Tamar,

On 24 November 2016 at 12:45, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi Christoph,
>
> I have combined most of the tests in p64_p128 except for the
> vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
> of code that would be have to be added to p64_p128 vs having them in those
> files isn't worth it. Since a lot of the test setup would have to be copied.
>

A few comments about this new version:
* arm-neon-ref.h: why do you create CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
Can't you just add calls to CHECK_CRYPTO in the existing
CHECK_RESULTS_NAMED_NO_FP16?

* p64_p128:
From what I can see ARM and AArch64 differ on the vceq variants
available with poly64.
For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
poly64x1_t __b)
For AArch64, I can't see vceq_p64 in arm_neon.h? ... Actually I've just noticed
the other you submitted while I was writing this, where you add vceq_p64 for
aarch64, but it still returns uint64_t.
Why do you change the vceq_64 test to return poly64_t instead of uint64_t?

Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?

The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE tests

You need to protect the new vmov, vget_high and vget_lane tests with
#ifdef __aarch64__.

Christophe

> Kind regards,
> Tamar
> ________________________________________
> From: Tamar Christina
> Sent: Tuesday, November 8, 2016 11:58:46 AM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> Hi Christophe,
>
> Thanks for the review!
>
>>
>> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
>> vreinterpret.
>> Why do you need to create p64.c ?
>
> I originally created it because I had a much smaller set of intrinsics that I wanted to
> add initially, this grew and It hadn't occurred to me that I can use the existing file now.
>
> Another reason was the effective-target arm_crypto_ok as you mentioned below.
>
>>
>> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
>> easier to maintain than adding them to vcreate.c etc with several #ifdef
>> conditions.
>
> Fair enough, I'll move them to p64_p128.c.
>
>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>> They are tested in p64_p128.c
>
> I should have looked for them, they weren't being tested before so I had
> Mistakenly assumed that they weren't available. Now I realize I just need
> To add the proper test option to the file to enable crypto. I'll update this as well.
>
>> Looking at your patch, it seems some tests are currently missing for arm:
>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>> testgen...
>
> I'll adjust the test conditions so they run for ARM as well.
>
>>
>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>> arm_crypto_ok prevent the tests from running on aarch64?
>
> Yes they do, I was comparing the output against a clean version and hasn't noticed
> That they weren't running. Thanks!
>
>>
>> Thanks,
>>
>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-25 14:54       ` Christophe Lyon
@ 2016-11-25 15:03         ` Christophe Lyon
  2016-11-25 16:01           ` Tamar Christina
  0 siblings, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-25 15:03 UTC (permalink / raw)
  To: Tamar Christina
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

On 25 November 2016 at 15:53, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
> On 24 November 2016 at 12:45, Tamar Christina <Tamar.Christina@arm.com> wrote:
>> Hi Christoph,
>>
>> I have combined most of the tests in p64_p128 except for the
>> vreinterpret_p128 and vreinterpret_p64 ones because I felt the number
>> of code that would be have to be added to p64_p128 vs having them in those
>> files isn't worth it. Since a lot of the test setup would have to be copied.
>>
>
> A few comments about this new version:
> * arm-neon-ref.h: why do you create CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> Can't you just add calls to CHECK_CRYPTO in the existing
> CHECK_RESULTS_NAMED_NO_FP16?
>
> * p64_p128:
> From what I can see ARM and AArch64 differ on the vceq variants
> available with poly64.
> For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> poly64x1_t __b)
> For AArch64, I can't see vceq_p64 in arm_neon.h? ... Actually I've just noticed
> the other you submitted while I was writing this, where you add vceq_p64 for
> aarch64, but it still returns uint64_t.
> Why do you change the vceq_64 test to return poly64_t instead of uint64_t?
>
> Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>
> The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE tests
>
> You need to protect the new vmov, vget_high and vget_lane tests with
> #ifdef __aarch64__.
>

Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.


> Christophe
>
>> Kind regards,
>> Tamar
>> ________________________________________
>> From: Tamar Christina
>> Sent: Tuesday, November 8, 2016 11:58:46 AM
>> To: Christophe Lyon
>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>>
>> Hi Christophe,
>>
>> Thanks for the review!
>>
>>>
>>> A while ago I added p64_p128.c, to contain all the poly64/128 tests except for
>>> vreinterpret.
>>> Why do you need to create p64.c ?
>>
>> I originally created it because I had a much smaller set of intrinsics that I wanted to
>> add initially, this grew and It hadn't occurred to me that I can use the existing file now.
>>
>> Another reason was the effective-target arm_crypto_ok as you mentioned below.
>>
>>>
>>> Similarly, adding tests for vcreate_p64 etc... in p64.c or p64_p128.c might be
>>> easier to maintain than adding them to vcreate.c etc with several #ifdef
>>> conditions.
>>
>> Fair enough, I'll move them to p64_p128.c.
>>
>>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>> They are tested in p64_p128.c
>>
>> I should have looked for them, they weren't being tested before so I had
>> Mistakenly assumed that they weren't available. Now I realize I just need
>> To add the proper test option to the file to enable crypto. I'll update this as well.
>>
>>> Looking at your patch, it seems some tests are currently missing for arm:
>>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>> testgen...
>>
>> I'll adjust the test conditions so they run for ARM as well.
>>
>>>
>>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>> arm_crypto_ok prevent the tests from running on aarch64?
>>
>> Yes they do, I was comparing the output against a clean version and hasn't noticed
>> That they weren't running. Thanks!
>>
>>>
>>> Thanks,
>>>
>>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-25 15:03         ` Christophe Lyon
@ 2016-11-25 16:01           ` Tamar Christina
  2016-11-29  9:50             ` Tamar Christina
  0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-11-25 16:01 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

 >
> > A few comments about this new version:
> > * arm-neon-ref.h: why do you create
> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> > Can't you just add calls to CHECK_CRYPTO in the existing
> > CHECK_RESULTS_NAMED_NO_FP16?

Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
I didn't remove the split. I'll do it now.

> >
> > * p64_p128:
> > From what I can see ARM and AArch64 differ on the vceq variants
> > available with poly64.
> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
> > Actually I've just noticed the other you submitted while I was writing
> > this, where you add vceq_p64 for aarch64, but it still returns
> > uint64_t.
> > Why do you change the vceq_64 test to return poly64_t instead of
> uint64_t?

This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
This patch was already sent. New one coming soon.

> >
> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
> >

This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.

> > The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE
> > tests
> >
> > You need to protect the new vmov, vget_high and vget_lane tests with
> > #ifdef __aarch64__.
> >

vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
test calls VDUP instead of VMOV, which explains why I didn't get a test failure.

Thanks for the feedback,
I'll get these updated.

> 
> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
> 
> 
> > Christophe
> >
> >> Kind regards,
> >> Tamar
> >> ________________________________________
> >> From: Tamar Christina
> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
> >> To: Christophe Lyon
> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
> >> Poly64_t intrinsics to GCC
> >>
> >> Hi Christophe,
> >>
> >> Thanks for the review!
> >>
> >>>
> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
> >>> except for vreinterpret.
> >>> Why do you need to create p64.c ?
> >>
> >> I originally created it because I had a much smaller set of
> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
> me that I can use the existing file now.
> >>
> >> Another reason was the effective-target arm_crypto_ok as you
> mentioned below.
> >>
> >>>
> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
> >>> etc with several #ifdef conditions.
> >>
> >> Fair enough, I'll move them to p64_p128.c.
> >>
> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
> >>> They are tested in p64_p128.c
> >>
> >> I should have looked for them, they weren't being tested before so I
> >> had Mistakenly assumed that they weren't available. Now I realize I
> >> just need To add the proper test option to the file to enable crypto. I'll
> update this as well.
> >>
> >>> Looking at your patch, it seems some tests are currently missing for arm:
> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
> >>> testgen...
> >>
> >> I'll adjust the test conditions so they run for ARM as well.
> >>
> >>>
> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> >>> arm_crypto_ok prevent the tests from running on aarch64?
> >>
> >> Yes they do, I was comparing the output against a clean version and
> >> hasn't noticed That they weren't running. Thanks!
> >>
> >>>
> >>> Thanks,
> >>>
> >>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-25 16:01           ` Tamar Christina
@ 2016-11-29  9:50             ` Tamar Christina
  2016-11-29 10:12               ` Christophe Lyon
  2016-11-29 13:48               ` Kyrill Tkachov
  0 siblings, 2 replies; 17+ messages in thread
From: Tamar Christina @ 2016-11-29  9:50 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

[-- Attachment #1: Type: text/plain, Size: 4464 bytes --]

Hi All,

The new patch contains the proper types for the intrinsics that should be returning uint64x1
and has the rest of the comments by Christophe in them.

Kind Regards,
Tamar

________________________________________
From: Tamar Christina
Sent: Friday, November 25, 2016 4:01:30 PM
To: Christophe Lyon
Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC

 >
> > A few comments about this new version:
> > * arm-neon-ref.h: why do you create
> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
> > Can't you just add calls to CHECK_CRYPTO in the existing
> > CHECK_RESULTS_NAMED_NO_FP16?

Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
I didn't remove the split. I'll do it now.

> >
> > * p64_p128:
> > From what I can see ARM and AArch64 differ on the vceq variants
> > available with poly64.
> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
> > Actually I've just noticed the other you submitted while I was writing
> > this, where you add vceq_p64 for aarch64, but it still returns
> > uint64_t.
> > Why do you change the vceq_64 test to return poly64_t instead of
> uint64_t?

This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
This patch was already sent. New one coming soon.

> >
> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
> >

This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.

> > The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE
> > tests
> >
> > You need to protect the new vmov, vget_high and vget_lane tests with
> > #ifdef __aarch64__.
> >

vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
test calls VDUP instead of VMOV, which explains why I didn't get a test failure.

Thanks for the feedback,
I'll get these updated.

>
> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>
>
> > Christophe
> >
> >> Kind regards,
> >> Tamar
> >> ________________________________________
> >> From: Tamar Christina
> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
> >> To: Christophe Lyon
> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
> >> Poly64_t intrinsics to GCC
> >>
> >> Hi Christophe,
> >>
> >> Thanks for the review!
> >>
> >>>
> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
> >>> except for vreinterpret.
> >>> Why do you need to create p64.c ?
> >>
> >> I originally created it because I had a much smaller set of
> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
> me that I can use the existing file now.
> >>
> >> Another reason was the effective-target arm_crypto_ok as you
> mentioned below.
> >>
> >>>
> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
> >>> etc with several #ifdef conditions.
> >>
> >> Fair enough, I'll move them to p64_p128.c.
> >>
> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
> >>> They are tested in p64_p128.c
> >>
> >> I should have looked for them, they weren't being tested before so I
> >> had Mistakenly assumed that they weren't available. Now I realize I
> >> just need To add the proper test option to the file to enable crypto. I'll
> update this as well.
> >>
> >>> Looking at your patch, it seems some tests are currently missing for arm:
> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
> >>> testgen...
> >>
> >> I'll adjust the test conditions so they run for ARM as well.
> >>
> >>>
> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
> >>> arm_crypto_ok prevent the tests from running on aarch64?
> >>
> >> Yes they do, I was comparing the output against a clean version and
> >> hasn't noticed That they weren't running. Thanks!
> >>
> >>>
> >>> Thanks,
> >>>
> >>> Christophe

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: aarch64-test-ver3.patch --]
[-- Type: text/x-patch; name="aarch64-test-ver3.patch", Size: 24846 bytes --]

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index 462141586b3db7c5256c74b08fa0449210634226..beaf6ac31d5c5affe3702a505ad0df8679229e32 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -32,6 +32,13 @@ extern size_t strlen(const char *);
    VECT_VAR(expected, int, 16, 4) -> expected_int16x4
    VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
 */
+/* Some instructions don't exist on ARM.
+   Use this macro to guard against them.  */
+#ifdef __aarch64__
+#define AARCH64_ONLY(X) X
+#else
+#define AARCH64_ONLY(X)
+#endif
 
 #define xSTR(X) #X
 #define STR(X) xSTR(X)
@@ -92,6 +99,13 @@ extern size_t strlen(const char *);
     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
   }
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+	       CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#else
+#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
+#endif
+
 /* Floating-point variant.  */
 #define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
   {									\
@@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
 extern ARRAY(expected, uint, 64, 1);
 extern ARRAY(expected, poly, 8, 8);
 extern ARRAY(expected, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 1);
+#endif
 extern ARRAY(expected, hfloat, 16, 4);
 extern ARRAY(expected, hfloat, 32, 2);
 extern ARRAY(expected, hfloat, 64, 1);
@@ -197,6 +214,9 @@ extern ARRAY(expected, uint, 32, 4);
 extern ARRAY(expected, uint, 64, 2);
 extern ARRAY(expected, poly, 8, 16);
 extern ARRAY(expected, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+extern ARRAY(expected, poly, 64, 2);
+#endif
 extern ARRAY(expected, hfloat, 16, 8);
 extern ARRAY(expected, hfloat, 32, 4);
 extern ARRAY(expected, hfloat, 64, 2);
@@ -213,6 +233,7 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
+    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
 									\
     CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
@@ -225,6 +246,7 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
+    CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);	\
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
   }									\
 
@@ -398,6 +420,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 1);
   CLEAN(result, poly, 8, 8);
   CLEAN(result, poly, 16, 4);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 1);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 4);
 #endif
@@ -413,6 +438,9 @@ static void clean_results (void)
   CLEAN(result, uint, 64, 2);
   CLEAN(result, poly, 8, 16);
   CLEAN(result, poly, 16, 8);
+#if defined (__ARM_FEATURE_CRYPTO)
+  CLEAN(result, poly, 64, 2);
+#endif
 #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
   CLEAN(result, float, 16, 8);
 #endif
@@ -438,6 +466,13 @@ static void clean_results (void)
 #define DECL_VARIABLE(VAR, T1, W, N)		\
   VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
 
+#if defined (__ARM_FEATURE_CRYPTO)
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
+  DECL_VARIABLE(VAR, T1, W, N)
+#else
+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
+#endif
+
 /* Declare only 64 bits signed variants.  */
 #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR)	\
   DECL_VARIABLE(VAR, int, 8, 8);			\
@@ -473,6 +508,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 16, 4);		\
   DECL_VARIABLE(VAR, float, 32, 2)
 #else
@@ -481,6 +517,7 @@ static void clean_results (void)
   DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 8);		\
   DECL_VARIABLE(VAR, poly, 16, 4);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1);	\
   DECL_VARIABLE(VAR, float, 32, 2)
 #endif
 
@@ -491,6 +528,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 16, 8);		\
   DECL_VARIABLE(VAR, float, 32, 4)
 #else
@@ -499,6 +537,7 @@ static void clean_results (void)
   DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR);	\
   DECL_VARIABLE(VAR, poly, 8, 16);		\
   DECL_VARIABLE(VAR, poly, 16, 8);		\
+  DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2);	\
   DECL_VARIABLE(VAR, float, 32, 4)
 #endif
 /* Declare all variants.  */
@@ -531,6 +570,13 @@ static void clean_results (void)
 
 /* Helpers to call macros with 1 constant and 5 variable
    arguments.  */
+#if defined (__ARM_FEATURE_CRYPTO)
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
+  MACRO(VAR1, VAR2, T1, T2, T3, W, N)
+#else
+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
+#endif
+
 #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)	\
   MACRO(VAR, , int, s, 8, 8);					\
   MACRO(VAR, , int, s, 16, 4);					\
@@ -601,13 +647,15 @@ static void clean_results (void)
   TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, , poly, p, 8, 8);				\
-  MACRO(VAR1, VAR2, , poly, p, 16, 4)
+  MACRO(VAR1, VAR2, , poly, p, 16, 4);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
 
 #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
   MACRO(VAR1, VAR2, q, poly, p, 8, 16);				\
-  MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+  MACRO(VAR1, VAR2, q, poly, p, 16, 8);				\
+  MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
 
 #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2)	\
   TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2);	\
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 519cffb0125079022e7ba876c1ca657d9e37cac2..8907b38cde90b44a8f1501f72b2c4e812cba5707 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -1,8 +1,9 @@
 /* This file contains tests for all the *p64 intrinsics, except for
    vreinterpret which have their own testcase.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -38,6 +39,17 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -45,6 +57,9 @@ VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
 /* Expected results: vget_low.  */
 VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 
+/* Expected results: vget_high.  */
+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
+
 /* Expected results: vld1.  */
 VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -109,6 +124,39 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vldX_lane.  */
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
+						   0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
+						   0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
+						   0xaaaaaaaaaaaaaaaa };
+
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
 int main (void)
 {
   int i;
@@ -341,6 +389,26 @@ int main (void)
 
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
 
+  /* vget_high_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_HIGH"
+
+#define TEST_VGET_HIGH(T1, T2, W, N, N2)					\
+  VECT_VAR(vget_high_vector64, T1, W, N) =				\
+    vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2));		\
+  vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
+
+  DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
+  DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
+
+  CLEAN(result, poly, 64, 1);
+
+  VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
+
+  TEST_VGET_HIGH(poly, p, 64, 1, 2);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
+
   /* vld1_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLD1/VLD1Q"
@@ -645,7 +713,7 @@ int main (void)
   VECT_VAR(vst1_lane_vector, T1, W, N) =				\
     vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N));			\
   vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N),			\
-			 VECT_VAR(vst1_lane_vector, T1, W, N), L)
+			 VECT_VAR(vst1_lane_vector, T1, W, N), L);
 
   DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
   DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
@@ -659,5 +727,298 @@ int main (void)
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+#ifdef __aarch64__
+
+  /* vmov_n_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VMOV/VMOVQ"
+
+#define TEST_VMOV(Q, T1, T2, W, N)					\
+  VECT_VAR(vmov_n_vector, T1, W, N) =					\
+    vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]);		\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
+
+  DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
+  DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
+
+  /* Try to read different places from the input buffer.  */
+  for (i=0; i< 3; i++) {
+    CLEAN(result, poly, 64, 1);
+    CLEAN(result, poly, 64, 2);
+
+    TEST_VMOV(, poly, p, 64, 1);
+    TEST_VMOV(q, poly, p, 64, 2);
+
+    switch (i) {
+    case 0:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
+      break;
+    case 1:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
+      break;
+    case 2:
+      CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
+      CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
+      break;
+    default:
+      abort();
+    }
+  }
+
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vector, poly, 64, 1);
+  DECL_VARIABLE(vector, poly, 64, 2);
+
+  VLOAD(vector, buffer,  , poly, p, 64, 1);
+  VLOAD(vector, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+  /* vldx_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
+
+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
+
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VLD_STX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behavior. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+
+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L)				\
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				\
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			\
+									\
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				\
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		\
+									\
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					\
+    /* Use dedicated init buffer, of size.  X */			\
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	\
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	\
+			     L);					\
+  vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		\
+		      VECT_ARRAY_VAR(vector, T1, W, N, X));		\
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)))
+
+  /* Overwrite "result" with the contents of "result_bis"[Y].  */
+#undef TEST_EXTRA_CHUNK
+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y)		\
+  memcpy(VECT_VAR(result, T1, W, N),			\
+	 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]),	\
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+  /* Add some padding to try to catch out of bound accesses.  */
+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
+#define DUMMY_ARRAY(V, T, W, N, L) \
+  VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
+  ARRAY1(V##_pad,T,W,N)
+
+#define DECL_ALL_VLD_STX_LANE(X)     \
+  DECL_VLD_STX_LANE(poly, 64, 1, X); \
+  DECL_VLD_STX_LANE(poly, 64, 2, X);
+
+#define TEST_ALL_VLDX_LANE(X)		  \
+  TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
+  TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
+
+#define TEST_ALL_EXTRA_CHUNKS(X,Y)	     \
+  TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
+  TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
+
+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment)	\
+  CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
+  CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
+
+  /* Declare the temporary buffers / variables.  */
+  DECL_ALL_VLD_STX_LANE(2);
+  DECL_ALL_VLD_STX_LANE(3);
+  DECL_ALL_VLD_STX_LANE(4);
+
+  DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
+  DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
+
+  /* Check vld2_lane/vld2q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
+  TEST_ALL_VLDX_LANE(2);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
+
+  /* Check vld3_lane/vld3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
+  TEST_ALL_VLDX_LANE(3);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
+
+  /* Check vld4_lane/vld4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
+  TEST_ALL_VLDX_LANE(4);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+  CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
+
+  /* In this case, input variables are arrays of vectors.  */
+#define DECL_VSTX_LANE(T1, W, N, X)					\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X);	\
+  VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X);	\
+  VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
+
+  /* We need to use a temporary result buffer (result_bis), because
+     the one used for other tests is not large enough. A subset of the
+     result data is moved from result_bis to result, and it is this
+     subset which is used to check the actual behavior. The next
+     macro enables to move another chunk of data from result_bis to
+     result.  */
+  /* We also use another extra input buffer (buffer_src), which we
+     fill with 0xAA, and which it used to load a vector from which we
+     read a given lane.  */
+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L)				 \
+  memset (VECT_VAR(buffer_src, T1, W, N), 0xAA,				 \
+	  sizeof(VECT_VAR(buffer_src, T1, W, N)));			 \
+  memset (VECT_VAR(result_bis_##X, T1, W, N), 0,			 \
+	  sizeof(VECT_VAR(result_bis_##X, T1, W, N)));			 \
+									 \
+  VECT_ARRAY_VAR(vector_src, T1, W, N, X) =				 \
+    vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N));		 \
+									 \
+  VECT_ARRAY_VAR(vector, T1, W, N, X) =					 \
+    /* Use dedicated init buffer, of size X.  */			 \
+    vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X),	 \
+			     VECT_ARRAY_VAR(vector_src, T1, W, N, X),	 \
+			     L);					 \
+  vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N),		 \
+			   VECT_ARRAY_VAR(vector, T1, W, N, X),		 \
+			   L);						 \
+  memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
+	 sizeof(VECT_VAR(result, T1, W, N)));
+
+#define TEST_ALL_VSTX_LANE(X)		  \
+  TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
+  TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
+
+  /* Check vst2_lane/vst2q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST2_LANE/VST2Q_LANE"
+  TEST_ALL_VSTX_LANE(2);
+
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(2, 1);
+#undef CMT
+#define CMT " chunk 1"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
+
+  /* Check vst3_lane/vst3q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST3_LANE/VST3Q_LANE"
+  TEST_ALL_VSTX_LANE(3);
+
+#undef CMT
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(3, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(3, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
+
+  /* Check vst4_lane/vst4q_lane.  */
+  clean_results ();
+#undef TEST_MSG
+#define TEST_MSG "VST4_LANE/VST4Q_LANE"
+  TEST_ALL_VSTX_LANE(4);
+
+#undef CMT
+#define CMT " (chunk 0)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 1);
+
+#undef CMT
+#define CMT " (chunk 1)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 2);
+
+#undef CMT
+#define CMT " (chunk 2)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
+
+  TEST_ALL_EXTRA_CHUNKS(4, 3);
+
+#undef CMT
+#define CMT " (chunk 3)"
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
+
+#endif /* __aarch64__.  */
+
   return 0;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
index 808641524c47b2c245ee2f10e74a784a7bccefc9..f192d4dda514287c8417e7fc922bc580b209b163 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c
@@ -1,7 +1,8 @@
 /* This file contains tests for the vreinterpret *p128 intrinsics.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
 int main (void)
 {
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
index 1d8cf9aa69f0b5b0717e98de613e3c350d6395d4..c915fd2fea6b4d8770c9a4aab88caad391105d89 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c
@@ -1,7 +1,8 @@
 /* This file contains tests for the vreinterpret *p64 intrinsics.  */
 
-/* { dg-require-effective-target arm_crypto_ok } */
+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
 
 #include <arm_neon.h>
 #include "arm-neon-ref.h"
@@ -121,11 +122,7 @@ int main (void)
   CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
 
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
-  DECL_VARIABLE(vreint_vector, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector, poly, 64, 2);
   DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
-  DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
 
   clean_results ();
 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-29  9:50             ` Tamar Christina
@ 2016-11-29 10:12               ` Christophe Lyon
  2016-11-29 12:58                 ` Christophe Lyon
  2016-11-29 13:48               ` Kyrill Tkachov
  1 sibling, 1 reply; 17+ messages in thread
From: Christophe Lyon @ 2016-11-29 10:12 UTC (permalink / raw)
  To: Tamar Christina
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

Hi Tamar,


On 29 November 2016 at 10:50, Tamar Christina <Tamar.Christina@arm.com> wrote:
> Hi All,
>
> The new patch contains the proper types for the intrinsics that should be returning uint64x1
> and has the rest of the comments by Christophe in them.
>

LGTM.

One more question: maybe we want to add explicit tests for vdup*_v_p64
even though they are aliases for vmov?

Christophe

> Kind Regards,
> Tamar
>
> ________________________________________
> From: Tamar Christina
> Sent: Friday, November 25, 2016 4:01:30 PM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
>  >
>> > A few comments about this new version:
>> > * arm-neon-ref.h: why do you create
>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>> > Can't you just add calls to CHECK_CRYPTO in the existing
>> > CHECK_RESULTS_NAMED_NO_FP16?
>
> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
> I didn't remove the split. I'll do it now.
>
>> >
>> > * p64_p128:
>> > From what I can see ARM and AArch64 differ on the vceq variants
>> > available with poly64.
>> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>> > Actually I've just noticed the other you submitted while I was writing
>> > this, where you add vceq_p64 for aarch64, but it still returns
>> > uint64_t.
>> > Why do you change the vceq_64 test to return poly64_t instead of
>> uint64_t?
>
> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
> This patch was already sent. New one coming soon.
>
>> >
>> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>> >
>
> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>
>> > The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE
>> > tests
>> >
>> > You need to protect the new vmov, vget_high and vget_lane tests with
>> > #ifdef __aarch64__.
>> >
>
> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>
> Thanks for the feedback,
> I'll get these updated.
>
>>
>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>
>>
>> > Christophe
>> >
>> >> Kind regards,
>> >> Tamar
>> >> ________________________________________
>> >> From: Tamar Christina
>> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
>> >> To: Christophe Lyon
>> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>> >> Poly64_t intrinsics to GCC
>> >>
>> >> Hi Christophe,
>> >>
>> >> Thanks for the review!
>> >>
>> >>>
>> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>> >>> except for vreinterpret.
>> >>> Why do you need to create p64.c ?
>> >>
>> >> I originally created it because I had a much smaller set of
>> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>> me that I can use the existing file now.
>> >>
>> >> Another reason was the effective-target arm_crypto_ok as you
>> mentioned below.
>> >>
>> >>>
>> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>> >>> etc with several #ifdef conditions.
>> >>
>> >> Fair enough, I'll move them to p64_p128.c.
>> >>
>> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>> >>> They are tested in p64_p128.c
>> >>
>> >> I should have looked for them, they weren't being tested before so I
>> >> had Mistakenly assumed that they weren't available. Now I realize I
>> >> just need To add the proper test option to the file to enable crypto. I'll
>> update this as well.
>> >>
>> >>> Looking at your patch, it seems some tests are currently missing for arm:
>> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>> >>> testgen...
>> >>
>> >> I'll adjust the test conditions so they run for ARM as well.
>> >>
>> >>>
>> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>> >>> arm_crypto_ok prevent the tests from running on aarch64?
>> >>
>> >> Yes they do, I was comparing the output against a clean version and
>> >> hasn't noticed That they weren't running. Thanks!
>> >>
>> >>>
>> >>> Thanks,
>> >>>
>> >>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-29 10:12               ` Christophe Lyon
@ 2016-11-29 12:58                 ` Christophe Lyon
  0 siblings, 0 replies; 17+ messages in thread
From: Christophe Lyon @ 2016-11-29 12:58 UTC (permalink / raw)
  To: Tamar Christina
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh, Kyrylo Tkachov, nd

On 29 November 2016 at 11:12, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 10:50, Tamar Christina <Tamar.Christina@arm.com> wrote:
>> Hi All,
>>
>> The new patch contains the proper types for the intrinsics that should be returning uint64x1
>> and has the rest of the comments by Christophe in them.
>>
>
> LGTM.
>
> One more question: maybe we want to add explicit tests for vdup*_v_p64
> even though they are aliases for vmov?
>
Sorry, I meant vdup_n_p64, but the tests are already in place.

So, OK for me, but I can't approve.

Thanks,

Christophe

> Christophe
>
>> Kind Regards,
>> Tamar
>>
>> ________________________________________
>> From: Tamar Christina
>> Sent: Friday, November 25, 2016 4:01:30 PM
>> To: Christophe Lyon
>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>>
>>  >
>>> > A few comments about this new version:
>>> > * arm-neon-ref.h: why do you create
>>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>>> > Can't you just add calls to CHECK_CRYPTO in the existing
>>> > CHECK_RESULTS_NAMED_NO_FP16?
>>
>> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
>> I didn't remove the split. I'll do it now.
>>
>>> >
>>> > * p64_p128:
>>> > From what I can see ARM and AArch64 differ on the vceq variants
>>> > available with poly64.
>>> > For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>>> > poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>>> > Actually I've just noticed the other you submitted while I was writing
>>> > this, where you add vceq_p64 for aarch64, but it still returns
>>> > uint64_t.
>>> > Why do you change the vceq_64 test to return poly64_t instead of
>>> uint64_t?
>>
>> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
>> This patch was already sent. New one coming soon.
>>
>>> >
>>> > Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>>> >
>>
>> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>>
>>> > The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE
>>> > tests
>>> >
>>> > You need to protect the new vmov, vget_high and vget_lane tests with
>>> > #ifdef __aarch64__.
>>> >
>>
>> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
>> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>>
>> Thanks for the feedback,
>> I'll get these updated.
>>
>>>
>>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>>
>>>
>>> > Christophe
>>> >
>>> >> Kind regards,
>>> >> Tamar
>>> >> ________________________________________
>>> >> From: Tamar Christina
>>> >> Sent: Tuesday, November 8, 2016 11:58:46 AM
>>> >> To: Christophe Lyon
>>> >> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>>> >> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>>> >> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>>> >> Poly64_t intrinsics to GCC
>>> >>
>>> >> Hi Christophe,
>>> >>
>>> >> Thanks for the review!
>>> >>
>>> >>>
>>> >>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>>> >>> except for vreinterpret.
>>> >>> Why do you need to create p64.c ?
>>> >>
>>> >> I originally created it because I had a much smaller set of
>>> >> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>>> me that I can use the existing file now.
>>> >>
>>> >> Another reason was the effective-target arm_crypto_ok as you
>>> mentioned below.
>>> >>
>>> >>>
>>> >>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>>> >>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>>> >>> etc with several #ifdef conditions.
>>> >>
>>> >> Fair enough, I'll move them to p64_p128.c.
>>> >>
>>> >>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>> >>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>> >>> They are tested in p64_p128.c
>>> >>
>>> >> I should have looked for them, they weren't being tested before so I
>>> >> had Mistakenly assumed that they weren't available. Now I realize I
>>> >> just need To add the proper test option to the file to enable crypto. I'll
>>> update this as well.
>>> >>
>>> >>> Looking at your patch, it seems some tests are currently missing for arm:
>>> >>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>> >>> testgen...
>>> >>
>>> >> I'll adjust the test conditions so they run for ARM as well.
>>> >>
>>> >>>
>>> >>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>> >>> arm_crypto_ok prevent the tests from running on aarch64?
>>> >>
>>> >> Yes they do, I was comparing the output against a clean version and
>>> >> hasn't noticed That they weren't running. Thanks!
>>> >>
>>> >>>
>>> >>> Thanks,
>>> >>>
>>> >>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-29  9:50             ` Tamar Christina
  2016-11-29 10:12               ` Christophe Lyon
@ 2016-11-29 13:48               ` Kyrill Tkachov
  2016-11-29 13:55                 ` James Greenhalgh
  1 sibling, 1 reply; 17+ messages in thread
From: Kyrill Tkachov @ 2016-11-29 13:48 UTC (permalink / raw)
  To: Tamar Christina, Christophe Lyon
  Cc: GCC Patches, christophe.lyon, Marcus Shawcroft, Richard Earnshaw,
	James Greenhalgh


On 29/11/16 09:50, Tamar Christina wrote:
> Hi All,
>
> The new patch contains the proper types for the intrinsics that should be returning uint64x1
> and has the rest of the comments by Christophe in them.

Ok with an appropriate ChangeLog entry.
Thanks,
Kyrill

> Kind Regards,
> Tamar
>
> ________________________________________
> From: Tamar Christina
> Sent: Friday, November 25, 2016 4:01:30 PM
> To: Christophe Lyon
> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
>   >
>>> A few comments about this new version:
>>> * arm-neon-ref.h: why do you create
>> CHECK_RESULTS_NAMED_NO_FP16_NO_POLY64?
>>> Can't you just add calls to CHECK_CRYPTO in the existing
>>> CHECK_RESULTS_NAMED_NO_FP16?
> Yes, that should be fine, I didn't used to have CHECK_CRYPTO before and when I added it
> I didn't remove the split. I'll do it now.
>
>>> * p64_p128:
>>>  From what I can see ARM and AArch64 differ on the vceq variants
>>> available with poly64.
>>> For ARM, arm_neon.h contains: uint64x1_t vceq_p64 (poly64x1_t __a,
>>> poly64x1_t __b) For AArch64, I can't see vceq_p64 in arm_neon.h? ...
>>> Actually I've just noticed the other you submitted while I was writing
>>> this, where you add vceq_p64 for aarch64, but it still returns
>>> uint64_t.
>>> Why do you change the vceq_64 test to return poly64_t instead of
>> uint64_t?
> This patch is slightly outdated. The correct type is `uint64_t` but when it was noticed
> This patch was already sent. New one coming soon.
>
>>> Why do you add #ifdef __aarch64 before vldX_p64 tests and until vsli_p64?
>>>
> This is wrong, remove them. It was supposed to be around the vldX_lane_p64 tests.
>
>>> The comment /* vget_lane_p64 tests.  */ is wrong before VLDX_LANE
>>> tests
>>>
>>> You need to protect the new vmov, vget_high and vget_lane tests with
>>> #ifdef __aarch64__.
>>>
> vget_lane is already in an #ifdef, vmov you're right, but I also notice that the
> test calls VDUP instead of VMOV, which explains why I didn't get a test failure.
>
> Thanks for the feedback,
> I'll get these updated.
>
>> Actually, vget_high_p64 exists on arm, so no need for the #fidef for it.
>>
>>
>>> Christophe
>>>
>>>> Kind regards,
>>>> Tamar
>>>> ________________________________________
>>>> From: Tamar Christina
>>>> Sent: Tuesday, November 8, 2016 11:58:46 AM
>>>> To: Christophe Lyon
>>>> Cc: GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard
>>>> Earnshaw; James Greenhalgh; Kyrylo Tkachov; nd
>>>> Subject: RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing
>>>> Poly64_t intrinsics to GCC
>>>>
>>>> Hi Christophe,
>>>>
>>>> Thanks for the review!
>>>>
>>>>> A while ago I added p64_p128.c, to contain all the poly64/128 tests
>>>>> except for vreinterpret.
>>>>> Why do you need to create p64.c ?
>>>> I originally created it because I had a much smaller set of
>>>> intrinsics that I wanted to add initially, this grew and It hadn't occurred to
>> me that I can use the existing file now.
>>>> Another reason was the effective-target arm_crypto_ok as you
>> mentioned below.
>>>>> Similarly, adding tests for vcreate_p64 etc... in p64.c or
>>>>> p64_p128.c might be easier to maintain than adding them to vcreate.c
>>>>> etc with several #ifdef conditions.
>>>> Fair enough, I'll move them to p64_p128.c.
>>>>
>>>>> For vdup-vmod.c, why do you add the "&& defined(__aarch64__)"
>>>>> condition? These intrinsics are defined in arm/arm_neon.h, right?
>>>>> They are tested in p64_p128.c
>>>> I should have looked for them, they weren't being tested before so I
>>>> had Mistakenly assumed that they weren't available. Now I realize I
>>>> just need To add the proper test option to the file to enable crypto. I'll
>> update this as well.
>>>>> Looking at your patch, it seems some tests are currently missing for arm:
>>>>> vget_high_p64. I'm not sure why I missed it when I removed neont-
>>>>> testgen...
>>>> I'll adjust the test conditions so they run for ARM as well.
>>>>
>>>>> Regarding vreinterpret_p128.c, doesn't the existing effective-target
>>>>> arm_crypto_ok prevent the tests from running on aarch64?
>>>> Yes they do, I was comparing the output against a clean version and
>>>> hasn't noticed That they weren't running. Thanks!
>>>>
>>>>> Thanks,
>>>>>
>>>>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-29 13:48               ` Kyrill Tkachov
@ 2016-11-29 13:55                 ` James Greenhalgh
  2016-11-30  9:05                   ` Christophe Lyon
  0 siblings, 1 reply; 17+ messages in thread
From: James Greenhalgh @ 2016-11-29 13:55 UTC (permalink / raw)
  To: Kyrill Tkachov
  Cc: Tamar Christina, Christophe Lyon, GCC Patches, christophe.lyon,
	Marcus Shawcroft, Richard Earnshaw, nd

On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
> 
> On 29/11/16 09:50, Tamar Christina wrote:
> >Hi All,
> >
> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
> >and has the rest of the comments by Christophe in them.
> 
> Ok with an appropriate ChangeLog entry.

Also OK from an AArch64 persepctive based on the detailed review from
Christophe.

Thanks,
James

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-29 13:55                 ` James Greenhalgh
@ 2016-11-30  9:05                   ` Christophe Lyon
  2016-11-30  9:40                     ` Tamar Christina
  2016-12-07  4:34                     ` Andrew Pinski
  0 siblings, 2 replies; 17+ messages in thread
From: Christophe Lyon @ 2016-11-30  9:05 UTC (permalink / raw)
  To: Tamar Christina
  Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
	Marcus Shawcroft, Richard Earnshaw, nd

Hi Tamar,


On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>
>> On 29/11/16 09:50, Tamar Christina wrote:
>> >Hi All,
>> >
>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>> >and has the rest of the comments by Christophe in them.
>>
>> Ok with an appropriate ChangeLog entry.
>
> Also OK from an AArch64 persepctive based on the detailed review from
> Christophe.
>
> Thanks,
> James
>

After you committed this patch (r242962), I've noticed some
regressions as follows:
* on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
with errors like
warning: implicit declaration of function 'vreinterpretq_p64_p128
warning: implicit declaration of function 'vreinterpretq_p128_s8
error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
etc...

* on arm configured for armv8-a, several tests fail to link or compile:
vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'

You can have more details at
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html


Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-30  9:05                   ` Christophe Lyon
@ 2016-11-30  9:40                     ` Tamar Christina
  2016-12-07  4:34                     ` Andrew Pinski
  1 sibling, 0 replies; 17+ messages in thread
From: Tamar Christina @ 2016-11-30  9:40 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
	Marcus Shawcroft, Richard Earnshaw, nd

Hi Christophe,

> After you committed this patch (r242962), I've noticed some regressions as
> follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile with
> errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...

Sorry for the screw up. On the last patch I only tested the file p64_p128.c.
I'll fix these asap.

> 
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
> 
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-
> validation/gcc/trunk/242962/report-build-info.html
> 
> 
> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-11-30  9:05                   ` Christophe Lyon
  2016-11-30  9:40                     ` Tamar Christina
@ 2016-12-07  4:34                     ` Andrew Pinski
  2016-12-12 11:29                       ` Tamar Christina
  1 sibling, 1 reply; 17+ messages in thread
From: Andrew Pinski @ 2016-12-07  4:34 UTC (permalink / raw)
  To: Christophe Lyon
  Cc: Tamar Christina, Kyrill Tkachov, James Greenhalgh, GCC Patches,
	christophe.lyon, Marcus Shawcroft, Richard Earnshaw, nd

On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>
>>> On 29/11/16 09:50, Tamar Christina wrote:
>>> >Hi All,
>>> >
>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>> >and has the rest of the comments by Christophe in them.
>>>
>>> Ok with an appropriate ChangeLog entry.
>>
>> Also OK from an AArch64 persepctive based on the detailed review from
>> Christophe.
>>
>> Thanks,
>> James
>>
>
> After you committed this patch (r242962), I've noticed some
> regressions as follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
> with errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...
>
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html

I see the expected_poly64x1 failures also for aarch64:
https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html

FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c   -O0  (test for
excess errors)
Excess errors:
vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'

....
FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c   -O0  (test
for excess errors)
Excess errors:
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x1' undeclared (first use in this function);
did you mean 'expected_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x2' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x1' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x2' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x1' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x2' undeclared (first use in this function);
did you mean 'expected2_poly64x1'?

etc.

>
>
> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-12-07  4:34                     ` Andrew Pinski
@ 2016-12-12 11:29                       ` Tamar Christina
  2016-12-12 23:53                         ` Andrew Pinski
  0 siblings, 1 reply; 17+ messages in thread
From: Tamar Christina @ 2016-12-12 11:29 UTC (permalink / raw)
  To: Andrew Pinski, Christophe Lyon
  Cc: Kyrill Tkachov, James Greenhalgh, GCC Patches, christophe.lyon,
	Marcus Shawcroft, Richard Earnshaw, nd

Hi Andrew,

These should be fixed now.

Thanks,
Tamar

________________________________________
From: Andrew Pinski <pinskia@gmail.com>
Sent: Wednesday, December 7, 2016 4:33:51 AM
To: Christophe Lyon
Cc: Tamar Christina; Kyrill Tkachov; James Greenhalgh; GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; nd
Subject: Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC

On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> Hi Tamar,
>
>
> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>
>>> On 29/11/16 09:50, Tamar Christina wrote:
>>> >Hi All,
>>> >
>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>> >and has the rest of the comments by Christophe in them.
>>>
>>> Ok with an appropriate ChangeLog entry.
>>
>> Also OK from an AArch64 persepctive based on the detailed review from
>> Christophe.
>>
>> Thanks,
>> James
>>
>
> After you committed this patch (r242962), I've noticed some
> regressions as follows:
> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
> with errors like
> warning: implicit declaration of function 'vreinterpretq_p64_p128
> warning: implicit declaration of function 'vreinterpretq_p128_s8
> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
> etc...
>
> * on arm configured for armv8-a, several tests fail to link or compile:
> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>
> You can have more details at
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html

I see the expected_poly64x1 failures also for aarch64:
https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html

FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c   -O0  (test for
excess errors)
Excess errors:
vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'

....
FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c   -O0  (test
for excess errors)
Excess errors:
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x1' undeclared (first use in this function);
did you mean 'expected_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
error: 'expected0_poly64x2' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x1' undeclared (first use in this function);
did you mean 'expected0_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
error: 'expected1_poly64x2' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x1' undeclared (first use in this function);
did you mean 'expected1_poly64x1'?
/home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
error: 'expected2_poly64x2' undeclared (first use in this function);
did you mean 'expected2_poly64x1'?

etc.

>
>
> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
  2016-12-12 11:29                       ` Tamar Christina
@ 2016-12-12 23:53                         ` Andrew Pinski
  0 siblings, 0 replies; 17+ messages in thread
From: Andrew Pinski @ 2016-12-12 23:53 UTC (permalink / raw)
  To: Tamar Christina
  Cc: Christophe Lyon, Kyrill Tkachov, James Greenhalgh, GCC Patches,
	christophe.lyon, Marcus Shawcroft, Richard Earnshaw, nd

On Mon, Dec 12, 2016 at 3:29 AM, Tamar Christina
<Tamar.Christina@arm.com> wrote:
> Hi Andrew,
>
> These should be fixed now.

Yes they are fixed.

Thanks,
Andrew

>
> Thanks,
> Tamar
>
> ________________________________________
> From: Andrew Pinski <pinskia@gmail.com>
> Sent: Wednesday, December 7, 2016 4:33:51 AM
> To: Christophe Lyon
> Cc: Tamar Christina; Kyrill Tkachov; James Greenhalgh; GCC Patches; christophe.lyon@st.com; Marcus Shawcroft; Richard Earnshaw; nd
> Subject: Re: [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC
>
> On Wed, Nov 30, 2016 at 1:04 AM, Christophe Lyon
> <christophe.lyon@linaro.org> wrote:
>> Hi Tamar,
>>
>>
>> On 29 November 2016 at 14:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>>> On Tue, Nov 29, 2016 at 01:48:22PM +0000, Kyrill Tkachov wrote:
>>>>
>>>> On 29/11/16 09:50, Tamar Christina wrote:
>>>> >Hi All,
>>>> >
>>>> >The new patch contains the proper types for the intrinsics that should be returning uint64x1
>>>> >and has the rest of the comments by Christophe in them.
>>>>
>>>> Ok with an appropriate ChangeLog entry.
>>>
>>> Also OK from an AArch64 persepctive based on the detailed review from
>>> Christophe.
>>>
>>> Thanks,
>>> James
>>>
>>
>> After you committed this patch (r242962), I've noticed some
>> regressions as follows:
>> * on aarch64, vreinterpret_p128 and vreinterpret_p64 fail to compile
>> with errors like
>> warning: implicit declaration of function 'vreinterpretq_p64_p128
>> warning: implicit declaration of function 'vreinterpretq_p128_s8
>> error: incompatible types when assigning to type 'poly64x2_t' from type 'int'
>> etc...
>>
>> * on arm configured for armv8-a, several tests fail to link or compile:
>> vbsl.c:(.text+0x24f0): undefined reference to `expected_poly64x1'
>> vdup-vmov.c:227:38: error: 'expected0_poly64x1' undeclared
>> vdup_lane.c:(.text+0x1584): undefined reference to `expected_poly64x1'
>>
>> You can have more details at
>> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/242962/report-build-info.html
>
> I see the expected_poly64x1 failures also for aarch64:
> https://gcc.gnu.org/ml/gcc-testresults/2016-12/msg00738.html
>
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vbsl.c   -O0  (test for
> excess errors)
> Excess errors:
> vbsl.c:(.text+0x1dec): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1df0): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1e20): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x1e24): undefined reference to `expected_poly64x1'
> vbsl.c:(.text+0x2a74): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2a78): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2aa8): undefined reference to `expected_poly64x2'
> vbsl.c:(.text+0x2aac): undefined reference to `expected_poly64x2'
>
> ....
> FAIL: gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c   -O0  (test
> for excess errors)
> Excess errors:
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
> error: 'expected0_poly64x1' undeclared (first use in this function);
> did you mean 'expected_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:175:38:
> error: 'expected0_poly64x2' undeclared (first use in this function);
> did you mean 'expected0_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
> error: 'expected1_poly64x1' undeclared (first use in this function);
> did you mean 'expected0_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:178:38:
> error: 'expected1_poly64x2' undeclared (first use in this function);
> did you mean 'expected1_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
> error: 'expected2_poly64x1' undeclared (first use in this function);
> did you mean 'expected1_poly64x1'?
> /home/jenkins/workspace/BuildThunderX_native_gcc_upstream/gcc/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c:181:38:
> error: 'expected2_poly64x2' undeclared (first use in this function);
> did you mean 'expected2_poly64x1'?
>
>
> etc.
>
>>
>>
>> Christophe

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2016-12-12 23:53 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-07 13:55 [AArch64][ARM][GCC][PATCHv2 3/3] Add tests for missing Poly64_t intrinsics to GCC Tamar Christina
2016-11-08 11:21 ` Christophe Lyon
2016-11-08 11:59   ` Tamar Christina
2016-11-24 11:45     ` Tamar Christina
2016-11-25 14:54       ` Christophe Lyon
2016-11-25 15:03         ` Christophe Lyon
2016-11-25 16:01           ` Tamar Christina
2016-11-29  9:50             ` Tamar Christina
2016-11-29 10:12               ` Christophe Lyon
2016-11-29 12:58                 ` Christophe Lyon
2016-11-29 13:48               ` Kyrill Tkachov
2016-11-29 13:55                 ` James Greenhalgh
2016-11-30  9:05                   ` Christophe Lyon
2016-11-30  9:40                     ` Tamar Christina
2016-12-07  4:34                     ` Andrew Pinski
2016-12-12 11:29                       ` Tamar Christina
2016-12-12 23:53                         ` Andrew Pinski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).