public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch][ARM,AArch64] more poly64 intrinsics and tests
@ 2016-12-12 16:03 Christophe Lyon
  2016-12-14 16:57 ` James Greenhalgh
  0 siblings, 1 reply; 9+ messages in thread
From: Christophe Lyon @ 2016-12-12 16:03 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 470 bytes --]

Hi,

After the recent update from Tamar, I noticed a few discrepancies
between ARM and AArch64 regarding a few poly64 intrinsics.

This patch:
- adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
- adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
( vget_lane_p64 was already there)
- adds the corresponding tests, and moves the vget_lane_p64 ones out
of the #ifdef __aarch64__ zone.

Cross-tested on arm* and aarch64* targets.

OK?

Christophe

[-- Attachment #2: poly64-vget-lane2.chlog.txt --]
[-- Type: text/plain, Size: 845 bytes --]

gcc/ChangeLog:

2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

	* config/aarch64/arm_neon.h (vtst_p64): New.
	(vtstq_p64): New.
	* config/arm/arm_neon.h (vgetq_lane_p64): New.
	(vset_lane_p64): New.
	(vsetq_lane_p64): New.

gcc/testsuite/ChangeLog:

2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
	New.
	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
	(vtst_expected_poly64x2): Move to aarch64-only section.
	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
	(vtst_p64, vtstq_p64): New tests.


[-- Attachment #3: poly64-vget-lane2.patch.txt --]
[-- Type: text/plain, Size: 11218 bytes --]

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b846644..74d163e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10882,6 +10882,13 @@ vtst_p16 (poly16x4_t a, poly16x4_t b)
   return result;
 }
 
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
+}
+
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p8 (poly8x16_t a, poly8x16_t b)
@@ -10906,6 +10913,18 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
   return result;
 }
 
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtstq_p64 (poly64x2_t a, poly64x2_t b)
+{
+  uint64x2_t result;
+  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
+           : "=w"(result)
+           : "w"(a), "w"(b)
+           : /* No clobbers */);
+  return result;
+}
+
 /* End of temporary inline asm implementations.  */
 
 /* Start of temporary inline asm for vldn, vstn and friends.  */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index ab29da7..d199b41 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -5497,6 +5497,15 @@ vgetq_lane_s64 (int64x2_t __a, const int __b)
   return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64_t __attribute__ ((__always_inline__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+  return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
+}
+
+#pragma GCC pop_options
 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
 vgetq_lane_u64 (uint64x2_t __a, const int __b)
 {
@@ -5581,6 +5590,15 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
   return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+#pragma GCC pop_options
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
 vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
 {
@@ -5661,6 +5679,12 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
 
 #pragma GCC push_options
 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
 __extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
 vcreate_p64 (uint64_t __a)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 8907b38..ba8fbeb 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
-/* Expected results: vmov_n.  */
-VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
-						 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
-						 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
-						 0xfffffffffffffff2 };
-
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
+/* Expected results: vset_lane.  */
+VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
+VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
+
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
+
+#ifdef __aarch64__
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vldX_lane.  */
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
 						   0xaaaaaaaaaaaaaaaa };
 
-/* Expected results: vget_lane.  */
-VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
-VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+#endif
 
 int main (void)
 {
@@ -727,6 +739,107 @@ int main (void)
   CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
+
+  VLOAD(vget_lane_vector1, buffer,  , poly, p, 64, 1);
+  VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+
+  /* vset_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSET_LANE/VSETQ_LANE"
+
+#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L)				\
+  VECT_VAR(vset_lane_vector, T1, W, N) =						\
+    vset##Q##_lane_##T2##W(V,						\
+			   VECT_VAR(vset_lane_vector, T1, W, N),			\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
+
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
+
+  VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
+
+  /* Choose value and lane arbitrarily.  */
+  TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
+  TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
+
+
+  /* vtst_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VTST"
+  
+#define TEST_VTST1(INSN, Q, T1, T2, W, N)			\
+  VECT_VAR(vtst_vector_res, uint, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N),		\
+		      VECT_VAR(vtst_vector2, T1, W, N));	\
+    vst1##Q##_u##W(VECT_VAR(result, uint, W, N),		\
+		   VECT_VAR(vtst_vector_res, uint, W, N))
+
+#define TEST_VTST(INSN, Q, T1, T2, W, N)	\
+  TEST_VTST1(INSN, Q, T1, T2, W, N)		\
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vtst_vector, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
+
+  CLEAN(result, uint, 64, 1);
+
+  VLOAD(vtst_vector, buffer,  , poly, p, 64, 1);
+  VDUP(vtst_vector2, , poly, p, 64, 1, 5);
+
+  TEST_VTST(vtst, , poly, p, 64, 1);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
+
+  /* vtstq_p64 is supported by aarch64 only.  */
+#ifdef __aarch64__
+  DECL_VARIABLE(vtst_vector, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
+  CLEAN(result, uint, 64, 2);
+  VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
+  VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
+  TEST_VTST(vtst, q, poly, p, 64, 2);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
+#endif
+
 #ifdef __aarch64__
 
   /* vmov_n_p64 tests.  */
@@ -767,37 +880,6 @@ int main (void)
     }
   }
 
-  /* vget_lane_p64 tests.  */
-#undef TEST_MSG
-#define TEST_MSG "VGET_LANE/VGETQ_LANE"
-
-#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
-  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
-  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
-    fprintf(stderr,							   \
-	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
-	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
-	    TEST_MSG, __FILE__, __LINE__,				   \
-	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
-	    STR(VECT_NAME(T1, W, N)),					   \
-	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
-	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
-    abort ();								   \
-  }
-
-  /* Initialize input values.  */
-  DECL_VARIABLE(vector, poly, 64, 1);
-  DECL_VARIABLE(vector, poly, 64, 2);
-
-  VLOAD(vector, buffer,  , poly, p, 64, 1);
-  VLOAD(vector, buffer, q, poly, p, 64, 2);
-
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
-
-  TEST_VGET_LANE( , poly, p, 64, 1, 0);
-  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
-
   /* vldx_lane_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2016-12-12 16:03 [Patch][ARM,AArch64] more poly64 intrinsics and tests Christophe Lyon
@ 2016-12-14 16:57 ` James Greenhalgh
  2016-12-14 22:15   ` Christophe Lyon
  0 siblings, 1 reply; 9+ messages in thread
From: James Greenhalgh @ 2016-12-14 16:57 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches, nd

On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
> Hi,
> 
> After the recent update from Tamar, I noticed a few discrepancies
> between ARM and AArch64 regarding a few poly64 intrinsics.
> 
> This patch:
> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
> ( vget_lane_p64 was already there)
> - adds the corresponding tests, and moves the vget_lane_p64 ones out
> of the #ifdef __aarch64__ zone.
> 
> Cross-tested on arm* and aarch64* targets.
> 
> OK?

The AArch64 parts of this look fine to me, but I do have one question on
your inline assembly implementation for vtstq_p64:

> +__extension__ extern __inline uint64x2_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
> +{
> +  uint64x2_t result;
> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
> +           : "=w"(result)
> +           : "w"(a), "w"(b)
> +           : /* No clobbers */);
> +  return result;
> +}
> +

Why can this not be written as many of the other vtstq intrinsics are; e.g.:

   __extension__ extern __inline uint64x2_t
  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
  vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
  {
    return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
                          != __AARCH64_INT64_C (0));
  }

Thanks,
James

> gcc/ChangeLog:
> 
> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	* config/aarch64/arm_neon.h (vtst_p64): New.
> 	(vtstq_p64): New.
> 	* config/arm/arm_neon.h (vgetq_lane_p64): New.
> 	(vset_lane_p64): New.
> 	(vsetq_lane_p64): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
> 	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
> 	New.
> 	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
> 	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
> 	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
> 	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
> 	(vtst_expected_poly64x2): Move to aarch64-only section.
> 	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
> 	(vtst_p64, vtstq_p64): New tests.
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2016-12-14 16:57 ` James Greenhalgh
@ 2016-12-14 22:15   ` Christophe Lyon
  2017-01-03 15:47     ` Christophe Lyon
  0 siblings, 1 reply; 9+ messages in thread
From: Christophe Lyon @ 2016-12-14 22:15 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>> Hi,
>>
>> After the recent update from Tamar, I noticed a few discrepancies
>> between ARM and AArch64 regarding a few poly64 intrinsics.
>>
>> This patch:
>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>> ( vget_lane_p64 was already there)
>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>> of the #ifdef __aarch64__ zone.
>>
>> Cross-tested on arm* and aarch64* targets.
>>
>> OK?
>
> The AArch64 parts of this look fine to me, but I do have one question on
> your inline assembly implementation for vtstq_p64:
>
>> +__extension__ extern __inline uint64x2_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>> +{
>> +  uint64x2_t result;
>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>> +           : "=w"(result)
>> +           : "w"(a), "w"(b)
>> +           : /* No clobbers */);
>> +  return result;
>> +}
>> +
>
> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>
>    __extension__ extern __inline uint64x2_t
>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>   {
>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>                           != __AARCH64_INT64_C (0));
>   }
>

I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
just above...


> Thanks,
> James
>
>> gcc/ChangeLog:
>>
>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>
>>       * config/aarch64/arm_neon.h (vtst_p64): New.
>>       (vtstq_p64): New.
>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
>>       (vset_lane_p64): New.
>>       (vsetq_lane_p64): New.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>
>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>>       New.
>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>>       (vtst_expected_poly64x2): Move to aarch64-only section.
>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>>       (vtst_p64, vtstq_p64): New tests.
>>
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2016-12-14 22:15   ` Christophe Lyon
@ 2017-01-03 15:47     ` Christophe Lyon
  2017-01-11 10:13       ` Christophe Lyon
  0 siblings, 1 reply; 9+ messages in thread
From: Christophe Lyon @ 2017-01-03 15:47 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

Ping?


On 14 December 2016 at 23:09, Christophe Lyon
<christophe.lyon@linaro.org> wrote:
> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>>> Hi,
>>>
>>> After the recent update from Tamar, I noticed a few discrepancies
>>> between ARM and AArch64 regarding a few poly64 intrinsics.
>>>
>>> This patch:
>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>>> ( vget_lane_p64 was already there)
>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>>> of the #ifdef __aarch64__ zone.
>>>
>>> Cross-tested on arm* and aarch64* targets.
>>>
>>> OK?
>>
>> The AArch64 parts of this look fine to me, but I do have one question on
>> your inline assembly implementation for vtstq_p64:
>>
>>> +__extension__ extern __inline uint64x2_t
>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>>> +{
>>> +  uint64x2_t result;
>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>>> +           : "=w"(result)
>>> +           : "w"(a), "w"(b)
>>> +           : /* No clobbers */);
>>> +  return result;
>>> +}
>>> +
>>
>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>>
>>    __extension__ extern __inline uint64x2_t
>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>>   {
>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>>                           != __AARCH64_INT64_C (0));
>>   }
>>
>
> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
> just above...
>
>
>> Thanks,
>> James
>>
>>> gcc/ChangeLog:
>>>
>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>
>>>       * config/aarch64/arm_neon.h (vtst_p64): New.
>>>       (vtstq_p64): New.
>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
>>>       (vset_lane_p64): New.
>>>       (vsetq_lane_p64): New.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>
>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>>>       New.
>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>>>       (vtst_expected_poly64x2): Move to aarch64-only section.
>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>>>       (vtst_p64, vtstq_p64): New tests.
>>>
>>
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2017-01-03 15:47     ` Christophe Lyon
@ 2017-01-11 10:13       ` Christophe Lyon
  2017-02-02 20:31         ` Christophe Lyon
  2017-02-04 21:54         ` James Greenhalgh
  0 siblings, 2 replies; 9+ messages in thread
From: Christophe Lyon @ 2017-01-11 10:13 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

Ping?

James, I'm not sure whether your comment was a request for a new
version of my patch or just FYI?


On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> Ping?
>
>
> On 14 December 2016 at 23:09, Christophe Lyon
> <christophe.lyon@linaro.org> wrote:
>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>>>> Hi,
>>>>
>>>> After the recent update from Tamar, I noticed a few discrepancies
>>>> between ARM and AArch64 regarding a few poly64 intrinsics.
>>>>
>>>> This patch:
>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>>>> ( vget_lane_p64 was already there)
>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>>>> of the #ifdef __aarch64__ zone.
>>>>
>>>> Cross-tested on arm* and aarch64* targets.
>>>>
>>>> OK?
>>>
>>> The AArch64 parts of this look fine to me, but I do have one question on
>>> your inline assembly implementation for vtstq_p64:
>>>
>>>> +__extension__ extern __inline uint64x2_t
>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>>>> +{
>>>> +  uint64x2_t result;
>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>>>> +           : "=w"(result)
>>>> +           : "w"(a), "w"(b)
>>>> +           : /* No clobbers */);
>>>> +  return result;
>>>> +}
>>>> +
>>>
>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>>>
>>>    __extension__ extern __inline uint64x2_t
>>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>>>   {
>>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>>>                           != __AARCH64_INT64_C (0));
>>>   }
>>>
>>
>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
>> just above...
>>
>>
>>> Thanks,
>>> James
>>>
>>>> gcc/ChangeLog:
>>>>
>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>
>>>>       * config/aarch64/arm_neon.h (vtst_p64): New.
>>>>       (vtstq_p64): New.
>>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
>>>>       (vset_lane_p64): New.
>>>>       (vsetq_lane_p64): New.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>
>>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>>>>       New.
>>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>>>>       (vtst_expected_poly64x2): Move to aarch64-only section.
>>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>>>>       (vtst_p64, vtstq_p64): New tests.
>>>>
>>>
>>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2017-01-11 10:13       ` Christophe Lyon
@ 2017-02-02 20:31         ` Christophe Lyon
  2017-02-04 21:54         ` James Greenhalgh
  1 sibling, 0 replies; 9+ messages in thread
From: Christophe Lyon @ 2017-02-02 20:31 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

Hello,

Is it too late for this patch?

On 11 January 2017 at 11:13, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> Ping?
>
> James, I'm not sure whether your comment was a request for a new
> version of my patch or just FYI?
>
>
> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>> Ping?
>>
>>
>> On 14 December 2016 at 23:09, Christophe Lyon
>> <christophe.lyon@linaro.org> wrote:
>>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>>>>> Hi,
>>>>>
>>>>> After the recent update from Tamar, I noticed a few discrepancies
>>>>> between ARM and AArch64 regarding a few poly64 intrinsics.
>>>>>
>>>>> This patch:
>>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>>>>> ( vget_lane_p64 was already there)
>>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>>>>> of the #ifdef __aarch64__ zone.
>>>>>
>>>>> Cross-tested on arm* and aarch64* targets.
>>>>>
>>>>> OK?
>>>>
>>>> The AArch64 parts of this look fine to me, but I do have one question on
>>>> your inline assembly implementation for vtstq_p64:
>>>>
>>>>> +__extension__ extern __inline uint64x2_t
>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>>>>> +{
>>>>> +  uint64x2_t result;
>>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>>>>> +           : "=w"(result)
>>>>> +           : "w"(a), "w"(b)
>>>>> +           : /* No clobbers */);
>>>>> +  return result;
>>>>> +}
>>>>> +
>>>>
>>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>>>>
>>>>    __extension__ extern __inline uint64x2_t
>>>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>>>>   {
>>>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>>>>                           != __AARCH64_INT64_C (0));
>>>>   }
>>>>
>>>
>>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
>>> just above...
>>>
>>>
>>>> Thanks,
>>>> James
>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>>
>>>>>       * config/aarch64/arm_neon.h (vtst_p64): New.
>>>>>       (vtstq_p64): New.
>>>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
>>>>>       (vset_lane_p64): New.
>>>>>       (vsetq_lane_p64): New.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>>
>>>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>>>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>>>>>       New.
>>>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>>>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>>>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>>>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>>>>>       (vtst_expected_poly64x2): Move to aarch64-only section.
>>>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>>>>>       (vtst_p64, vtstq_p64): New tests.
>>>>>
>>>>
>>>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2017-01-11 10:13       ` Christophe Lyon
  2017-02-02 20:31         ` Christophe Lyon
@ 2017-02-04 21:54         ` James Greenhalgh
  2017-02-06  8:31           ` Christophe Lyon
  1 sibling, 1 reply; 9+ messages in thread
From: James Greenhalgh @ 2017-02-04 21:54 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: gcc-patches, nd

On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:
> Ping?
> 
> James, I'm not sure whether your comment was a request for a new
> version of my patch or just FYI?

Sorry that this was unclear. I was looking for a new version of the patch
covering this comment. Otherwise we just have debt to go fix it in future.

With the suggested change, the AArch64 parts of this patch are OK - adding
missing intrinsics is very safe (even in Stage 4).

Please post an updated patch, and give Richard and Marcus a reasonable
amount of tiume to object to taking the patch this late. (and you need an
AArch32 OK too).

Thanks,
James

> 
> 
> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> > Ping?
> >
> >
> > On 14 December 2016 at 23:09, Christophe Lyon
> > <christophe.lyon@linaro.org> wrote:
> >> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> >>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
> >>>> Hi,
> >>>>
> >>>> After the recent update from Tamar, I noticed a few discrepancies
> >>>> between ARM and AArch64 regarding a few poly64 intrinsics.
> >>>>
> >>>> This patch:
> >>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
> >>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
> >>>> ( vget_lane_p64 was already there)
> >>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
> >>>> of the #ifdef __aarch64__ zone.
> >>>>
> >>>> Cross-tested on arm* and aarch64* targets.
> >>>>
> >>>> OK?
> >>>
> >>> The AArch64 parts of this look fine to me, but I do have one question on
> >>> your inline assembly implementation for vtstq_p64:
> >>>
> >>>> +__extension__ extern __inline uint64x2_t
> >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
> >>>> +{
> >>>> +  uint64x2_t result;
> >>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
> >>>> +           : "=w"(result)
> >>>> +           : "w"(a), "w"(b)
> >>>> +           : /* No clobbers */);
> >>>> +  return result;
> >>>> +}
> >>>> +
> >>>
> >>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
> >>>
> >>>    __extension__ extern __inline uint64x2_t
> >>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> >>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
> >>>   {
> >>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
> >>>                           != __AARCH64_INT64_C (0));
> >>>   }
> >>>
> >>
> >> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
> >> just above...
> >>
> >>
> >>> Thanks,
> >>> James
> >>>
> >>>> gcc/ChangeLog:
> >>>>
> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
> >>>>
> >>>>       * config/aarch64/arm_neon.h (vtst_p64): New.
> >>>>       (vtstq_p64): New.
> >>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
> >>>>       (vset_lane_p64): New.
> >>>>       (vsetq_lane_p64): New.
> >>>>
> >>>> gcc/testsuite/ChangeLog:
> >>>>
> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
> >>>>
> >>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
> >>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
> >>>>       New.
> >>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
> >>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
> >>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
> >>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
> >>>>       (vtst_expected_poly64x2): Move to aarch64-only section.
> >>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
> >>>>       (vtst_p64, vtstq_p64): New tests.
> >>>>
> >>>
> >>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2017-02-04 21:54         ` James Greenhalgh
@ 2017-02-06  8:31           ` Christophe Lyon
  2017-02-06  9:23             ` Kyrill Tkachov
  0 siblings, 1 reply; 9+ messages in thread
From: Christophe Lyon @ 2017-02-06  8:31 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

[-- Attachment #1: Type: text/plain, Size: 4489 bytes --]

On 4 February 2017 at 22:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:
>> Ping?
>>
>> James, I'm not sure whether your comment was a request for a new
>> version of my patch or just FYI?
>
> Sorry that this was unclear. I was looking for a new version of the patch
> covering this comment. Otherwise we just have debt to go fix it in future.
>
> With the suggested change, the AArch64 parts of this patch are OK - adding
> missing intrinsics is very safe (even in Stage 4).
>
> Please post an updated patch, and give Richard and Marcus a reasonable
> amount of tiume to object to taking the patch this late. (and you need an
> AArch32 OK too).
>
> Thanks,
> James
>

Hi James,

Thanks for the clarification, here is an updated patch.

I had to make a few changes after rebasing, and I also took the opportunity to
rewrite the existing vtst_p8, vtst_p16, vtstq_p8 and vtstq_p16 without an
asm() statement.

As before, the aarch64 and aarch32 updates to arm_neon.h are independent,
but I found it simpler to group them, as they imply updates to the same test.

Tested as usual, cross-testing on several arm* and aarch64* configurations,
no regression.

OK?

Thanks,

Christophe

>>
>>
>> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>> > Ping?
>> >
>> >
>> > On 14 December 2016 at 23:09, Christophe Lyon
>> > <christophe.lyon@linaro.org> wrote:
>> >> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> >>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>> >>>> Hi,
>> >>>>
>> >>>> After the recent update from Tamar, I noticed a few discrepancies
>> >>>> between ARM and AArch64 regarding a few poly64 intrinsics.
>> >>>>
>> >>>> This patch:
>> >>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>> >>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>> >>>> ( vget_lane_p64 was already there)
>> >>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>> >>>> of the #ifdef __aarch64__ zone.
>> >>>>
>> >>>> Cross-tested on arm* and aarch64* targets.
>> >>>>
>> >>>> OK?
>> >>>
>> >>> The AArch64 parts of this look fine to me, but I do have one question on
>> >>> your inline assembly implementation for vtstq_p64:
>> >>>
>> >>>> +__extension__ extern __inline uint64x2_t
>> >>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> >>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>> >>>> +{
>> >>>> +  uint64x2_t result;
>> >>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>> >>>> +           : "=w"(result)
>> >>>> +           : "w"(a), "w"(b)
>> >>>> +           : /* No clobbers */);
>> >>>> +  return result;
>> >>>> +}
>> >>>> +
>> >>>
>> >>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>> >>>
>> >>>    __extension__ extern __inline uint64x2_t
>> >>>   __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> >>>   vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>> >>>   {
>> >>>     return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>> >>>                           != __AARCH64_INT64_C (0));
>> >>>   }
>> >>>
>> >>
>> >> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
>> >> just above...
>> >>
>> >>
>> >>> Thanks,
>> >>> James
>> >>>
>> >>>> gcc/ChangeLog:
>> >>>>
>> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>> >>>>
>> >>>>       * config/aarch64/arm_neon.h (vtst_p64): New.
>> >>>>       (vtstq_p64): New.
>> >>>>       * config/arm/arm_neon.h (vgetq_lane_p64): New.
>> >>>>       (vset_lane_p64): New.
>> >>>>       (vsetq_lane_p64): New.
>> >>>>
>> >>>> gcc/testsuite/ChangeLog:
>> >>>>
>> >>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>> >>>>
>> >>>>       * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>> >>>>       (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>> >>>>       New.
>> >>>>       (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>> >>>>       (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>> >>>>       (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>> >>>>       (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>> >>>>       (vtst_expected_poly64x2): Move to aarch64-only section.
>> >>>>       (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>> >>>>       (vtst_p64, vtstq_p64): New tests.
>> >>>>
>> >>>
>> >>>
>

[-- Attachment #2: poly64-vget-lane3.chlog.txt --]
[-- Type: text/plain, Size: 953 bytes --]

gcc/ChangeLog:

2017-02-06  Christophe Lyon  <christophe.lyon@linaro.org>

	* config/aarch64/arm_neon.h (vtst_p8): Rewrite without asm.
	(vtst_p16): Likewise.
	(vtstq_p8): Likewise.
	(vtstq_p16): Likewise.
	(vtst_p64): New.
	(vtstq_p64): Likewise.
	* config/arm/arm_neon.h (vgetq_lane_p64): New.
	(vset_lane_p64): New.
	(vsetq_lane_p64): New.

gcc/testsuite/ChangeLog:

2017-02-06  Christophe Lyon  <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
	(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
	New.
	(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
	(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
	(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
	(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
	(vtst_expected_poly64x2): Move to aarch64-only section.
	(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
	(vtst_p64, vtstq_p64): New tests.


[-- Attachment #3: poly64-vget-lane3.patch.txt --]
[-- Type: text/plain, Size: 12906 bytes --]

commit d8eebfd0523115ad743a3a273f6dbf79e3d41d5c
Author: Christophe Lyon <christophe.lyon@linaro.org>
Date:   Sun Feb 5 20:43:13 2017 +0000

    ARM/AArch64: add missing poly64 intrinsics (vtst on aarch64, vget_lane on arm)
    
    Change-Id: I334e0fa6ab07d473609ed96d9ab8cb56ebd521ac

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index a54c0be..0753da3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10862,48 +10862,47 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p8 (poly8x8_t a, poly8x8_t b)
 {
-  uint8x8_t result;
-  __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b))
+		       != 0);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtst_p16 (poly16x4_t a, poly16x4_t b)
 {
-  uint16x4_t result;
-  __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b))
+		       != 0);
+}
+
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+  return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p8 (poly8x16_t a, poly8x16_t b)
 {
-  uint8x16_t result;
-  __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b))
+		       != 0);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtstq_p16 (poly16x8_t a, poly16x8_t b)
 {
-  uint16x8_t result;
-  __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
-           : "=w"(result)
-           : "w"(a), "w"(b)
-           : /* No clobbers */);
-  return result;
+  return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b))
+		       != 0);
+}
+
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtstq_p64 (poly64x2_t a, poly64x2_t b)
+{
+  return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b))
+		       != __AARCH64_INT64_C (0));
 }
 
 /* End of temporary inline asm implementations.  */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 16bf8c5..f81d77e 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -6309,6 +6309,16 @@ vgetq_lane_s64 (int64x2_t __a, const int __b)
   return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+  return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
+}
+
+#pragma GCC pop_options
 __extension__ extern __inline uint64_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vgetq_lane_u64 (uint64x2_t __a, const int __b)
@@ -6405,6 +6415,16 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c)
   return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
+{
+  return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c);
+}
+
+#pragma GCC pop_options
 __extension__ extern __inline int8x16_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
@@ -6496,6 +6516,13 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c)
 
 #pragma GCC push_options
 #pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ extern __inline poly64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
+{
+  return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c);
+}
+
 __extension__ extern __inline poly64x1_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vcreate_p64 (uint64_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 7c5bca2..a3210a9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
 						 0xfffffffffffffff2 };
 
-/* Expected results: vmov_n.  */
-VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
-						 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
-						 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
-						 0xfffffffffffffff2 };
-
 /* Expected results: vext.  */
 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
 						   0x3333333333333333 };
 
+/* Expected results: vget_lane.  */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
+/* Expected results: vset_lane.  */
+VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
+VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
+
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
+
+#ifdef __aarch64__
+/* Expected results: vmov_n.  */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+						 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+						 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+						 0xfffffffffffffff2 };
+
 /* Expected results: vldX_lane.  */
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
 VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
 VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
 						   0xaaaaaaaaaaaaaaaa };
 
-/* Expected results: vget_lane.  */
-VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
-VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+/* Expected results: vtst.  */
+VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+#endif
 
 int main (void)
 {
@@ -727,7 +739,105 @@ int main (void)
   CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
   CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
 
+  /* vget_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
+  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
+  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
+    fprintf(stderr,							   \
+	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
+	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
+	    TEST_MSG, __FILE__, __LINE__,				   \
+	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
+	    STR(VECT_NAME(T1, W, N)),					   \
+	    VECT_VAR(vget_lane_vector, T1, W, N),			   \
+	    VECT_VAR(vget_lane_expected, T1, W, N));			   \
+    abort ();								   \
+  }
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
+  DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
+
+  VLOAD(vget_lane_vector1, buffer,  , poly, p, 64, 1);
+  VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
+
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+  TEST_VGET_LANE( , poly, p, 64, 1, 0);
+  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+
+  /* vset_lane_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VSET_LANE/VSETQ_LANE"
+
+#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L)				\
+  VECT_VAR(vset_lane_vector, T1, W, N) =						\
+    vset##Q##_lane_##T2##W(V,						\
+			   VECT_VAR(vset_lane_vector, T1, W, N),			\
+			   L);						\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
+  DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
+
+  CLEAN(result, uint, 64, 1);
+  CLEAN(result, uint, 64, 2);
+
+  VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
+  VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
+
+  /* Choose value and lane arbitrarily.  */
+  TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
+  TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
+
+  CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
+  CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
+
+
+  /* vtst_p64 tests.  */
+#undef TEST_MSG
+#define TEST_MSG "VTST"
+  
+#define TEST_VTST1(INSN, Q, T1, T2, W, N)			\
+  VECT_VAR(vtst_vector_res, uint, W, N) =			\
+    INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N),		\
+		      VECT_VAR(vtst_vector2, T1, W, N));	\
+    vst1##Q##_u##W(VECT_VAR(result, uint, W, N),		\
+		   VECT_VAR(vtst_vector_res, uint, W, N))
+
+#define TEST_VTST(INSN, Q, T1, T2, W, N)	\
+  TEST_VTST1(INSN, Q, T1, T2, W, N)		\
+
+  /* Initialize input values.  */
+  DECL_VARIABLE(vtst_vector, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 1);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
+
+  CLEAN(result, uint, 64, 1);
+
+  VLOAD(vtst_vector, buffer,  , poly, p, 64, 1);
+  VDUP(vtst_vector2, , poly, p, 64, 1, 5);
+
+  TEST_VTST(vtst, , poly, p, 64, 1);
+
+  CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
+
+  /* vtstq_p64 is supported by aarch64 only.  */
 #ifdef __aarch64__
+  DECL_VARIABLE(vtst_vector, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector2, poly, 64, 2);
+  DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
+  CLEAN(result, uint, 64, 2);
+  VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
+  VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
+  TEST_VTST(vtst, q, poly, p, 64, 2);
+  CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
 
   /* vmov_n_p64 tests.  */
 #undef TEST_MSG
@@ -767,37 +877,6 @@ int main (void)
     }
   }
 
-  /* vget_lane_p64 tests.  */
-#undef TEST_MSG
-#define TEST_MSG "VGET_LANE/VGETQ_LANE"
-
-#define TEST_VGET_LANE(Q, T1, T2, W, N, L)				   \
-  VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
-  if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) {		\
-    fprintf(stderr,							   \
-	    "ERROR in %s (%s line %d in result '%s') at type %s "	   \
-	    "got 0x%" PRIx##W " != 0x%" PRIx##W "\n",			   \
-	    TEST_MSG, __FILE__, __LINE__,				   \
-	    STR(VECT_VAR(vget_lane_expected, T1, W, N)),		   \
-	    STR(VECT_NAME(T1, W, N)),					   \
-	    (uint##W##_t)VECT_VAR(vget_lane_vector, T1, W, N),		   \
-	    (uint##W##_t)VECT_VAR(vget_lane_expected, T1, W, N));	   \
-    abort ();								   \
-  }
-
-  /* Initialize input values.  */
-  DECL_VARIABLE(vector, poly, 64, 1);
-  DECL_VARIABLE(vector, poly, 64, 2);
-
-  VLOAD(vector, buffer,  , poly, p, 64, 1);
-  VLOAD(vector, buffer, q, poly, p, 64, 2);
-
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
-  VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
-
-  TEST_VGET_LANE( , poly, p, 64, 1, 0);
-  TEST_VGET_LANE(q, poly, p, 64, 2, 0);
-
   /* vldx_lane_p64 tests.  */
 #undef TEST_MSG
 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Patch][ARM,AArch64] more poly64 intrinsics and tests
  2017-02-06  8:31           ` Christophe Lyon
@ 2017-02-06  9:23             ` Kyrill Tkachov
  0 siblings, 0 replies; 9+ messages in thread
From: Kyrill Tkachov @ 2017-02-06  9:23 UTC (permalink / raw)
  To: Christophe Lyon, James Greenhalgh; +Cc: gcc-patches

Hi Christophe,

On 06/02/17 08:31, Christophe Lyon wrote:
> On 4 February 2017 at 22:54, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Wed, Jan 11, 2017 at 11:13:07AM +0100, Christophe Lyon wrote:
>>> Ping?
>>>
>>> James, I'm not sure whether your comment was a request for a new
>>> version of my patch or just FYI?
>> Sorry that this was unclear. I was looking for a new version of the patch
>> covering this comment. Otherwise we just have debt to go fix it in future.
>>
>> With the suggested change, the AArch64 parts of this patch are OK - adding
>> missing intrinsics is very safe (even in Stage 4).
>>
>> Please post an updated patch, and give Richard and Marcus a reasonable
>> amount of tiume to object to taking the patch this late. (and you need an
>> AArch32 OK too).
>>
>> Thanks,
>> James
>>
> Hi James,
>
> Thanks for the clarification, here is an updated patch.
>
> I had to make a few changes after rebasing, and I also took the opportunity to
> rewrite the existing vtst_p8, vtst_p16, vtstq_p8 and vtstq_p16 without an
> asm() statement.
>
> As before, the aarch64 and aarch32 updates to arm_neon.h are independent,
> but I found it simpler to group them, as they imply updates to the same test.
>
> Tested as usual, cross-testing on several arm* and aarch64* configurations,
> no regression.
>
> OK?

Ok for arm.

Thanks,
Kyrill

> Thanks,
>
> Christophe
>
>>>
>>> On 3 January 2017 at 16:47, Christophe Lyon <christophe.lyon@linaro.org> wrote:
>>>> Ping?
>>>>
>>>>
>>>> On 14 December 2016 at 23:09, Christophe Lyon
>>>> <christophe.lyon@linaro.org> wrote:
>>>>> On 14 December 2016 at 17:55, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>>>>>> On Mon, Dec 12, 2016 at 05:03:31PM +0100, Christophe Lyon wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> After the recent update from Tamar, I noticed a few discrepancies
>>>>>>> between ARM and AArch64 regarding a few poly64 intrinsics.
>>>>>>>
>>>>>>> This patch:
>>>>>>> - adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
>>>>>>> - adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
>>>>>>> ( vget_lane_p64 was already there)
>>>>>>> - adds the corresponding tests, and moves the vget_lane_p64 ones out
>>>>>>> of the #ifdef __aarch64__ zone.
>>>>>>>
>>>>>>> Cross-tested on arm* and aarch64* targets.
>>>>>>>
>>>>>>> OK?
>>>>>> The AArch64 parts of this look fine to me, but I do have one question on
>>>>>> your inline assembly implementation for vtstq_p64:
>>>>>>
>>>>>>> +__extension__ extern __inline uint64x2_t
>>>>>>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>>>> +vtstq_p64 (poly64x2_t a, poly64x2_t b)
>>>>>>> +{
>>>>>>> +  uint64x2_t result;
>>>>>>> +  __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
>>>>>>> +           : "=w"(result)
>>>>>>> +           : "w"(a), "w"(b)
>>>>>>> +           : /* No clobbers */);
>>>>>>> +  return result;
>>>>>>> +}
>>>>>>> +
>>>>>> Why can this not be written as many of the other vtstq intrinsics are; e.g.:
>>>>>>
>>>>>>     __extension__ extern __inline uint64x2_t
>>>>>>    __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>>>>>>    vtstq_p64 (poly64x2_t __a, poly64x2_t __b)
>>>>>>    {
>>>>>>      return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b))
>>>>>>                            != __AARCH64_INT64_C (0));
>>>>>>    }
>>>>>>
>>>>> I don't know, I just followed the pattern used for vtstq_p8 and vtstq_p16
>>>>> just above...
>>>>>
>>>>>
>>>>>> Thanks,
>>>>>> James
>>>>>>
>>>>>>> gcc/ChangeLog:
>>>>>>>
>>>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>>>>
>>>>>>>        * config/aarch64/arm_neon.h (vtst_p64): New.
>>>>>>>        (vtstq_p64): New.
>>>>>>>        * config/arm/arm_neon.h (vgetq_lane_p64): New.
>>>>>>>        (vset_lane_p64): New.
>>>>>>>        (vsetq_lane_p64): New.
>>>>>>>
>>>>>>> gcc/testsuite/ChangeLog:
>>>>>>>
>>>>>>> 2016-12-12  Christophe Lyon  <christophe.lyon@linaro.org>
>>>>>>>
>>>>>>>        * gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
>>>>>>>        (vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
>>>>>>>        New.
>>>>>>>        (vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
>>>>>>>        (expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
>>>>>>>        (expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
>>>>>>>        (expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
>>>>>>>        (vtst_expected_poly64x2): Move to aarch64-only section.
>>>>>>>        (vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
>>>>>>>        (vtst_p64, vtstq_p64): New tests.
>>>>>>>
>>>>>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-02-06  9:23 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-12 16:03 [Patch][ARM,AArch64] more poly64 intrinsics and tests Christophe Lyon
2016-12-14 16:57 ` James Greenhalgh
2016-12-14 22:15   ` Christophe Lyon
2017-01-03 15:47     ` Christophe Lyon
2017-01-11 10:13       ` Christophe Lyon
2017-02-02 20:31         ` Christophe Lyon
2017-02-04 21:54         ` James Greenhalgh
2017-02-06  8:31           ` Christophe Lyon
2017-02-06  9:23             ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).