public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Alex Velenko <Alex.Velenko@arm.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Marcus Shawcroft <Marcus.Shawcroft@arm.com>
Subject: [Patch][AArch64] NEON vdup testcases
Date: Thu, 16 Jan 2014 12:12:00 -0000	[thread overview]
Message-ID: <52D7CCB4.5040105@arm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 704 bytes --]

[AArch64] VDUP testcases

Hi,

This patch implements test cases for following NEON intrinsics:
vdup_lane_f32
vdup_lane_s[8,16]
vdup_lane_s[32,64]
vdup_n_[p,s,u][8,16]
vdup_n_[s,u][32,64]

vdupb_lane_[s,u]8
vduph_lane_[s,u]16
vdupd_lane_[f,s,u]64
vdups_lane_[f,s,u]32

vdupq_lane_[f,s][32,64]
vdupq_lane_s[8,16]
vdup[q]_n_f32
vdupq_n_f64
vdupq_n_[s,p,u][8,16]
vdupq_n_[s,u][32,64]

Tests succeed on both Little-Endian and Big-Eendian.

Ok for trunk?

Thanks,
Alex

gcc/testsuite/

2014-01-16  Alex Velenko  <Alex.Velenko@arm.com>

	        * gcc.target/aarch64/vdup_lane_1.c: New testcase.
	        * gcc.target/aarch64/vdup_lane_2.c: New testcase.
	        * gcc.target/aarch64/vdup_n_1.c: New testcase.

[-- Attachment #2: vdup.patch --]
[-- Type: text/x-patch, Size: 26576 bytes --]

diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..a80e10146a6e45b44c3a09701da949a8e9aa7653
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c
@@ -0,0 +1,409 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_simd(V1)   asm volatile ("orr %0.16b, %1.16b, %1.16b"	\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
+
+int
+__attribute__ ((noinline)) test_vdup_lane_f32 ()
+{
+  float32x2_t a;
+  float32x2_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[2];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 0);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_f32 (a, 1);
+  vst1_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdup_lane_f32 and test_vdup_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_f32 ()
+{
+  float32x2_t a;
+  float32x4_t b;
+  int i = 0;
+  float32_t c[2] = { 0.0E0 , 3.14 };
+  float32_t d[4];
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 0);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_f32 (a, 1);
+  vst1q_f32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_lane_f32 and test_vdupq_lane_s32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s8 ()
+{
+  int8x8_t a;
+  int8x8_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[8];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s8 (a, 0);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  force_simd (a)
+  b = vdup_lane_s8 (a, 4);
+  vst1_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[4] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s8 ()
+{
+  int8x8_t a;
+  int8x16_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  int8_t d[16];
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 0);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s8 (a, 4);
+  vst1q_s8 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (c[4] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdupq_lane_s8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[4\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s16 ()
+{
+  int16x4_t a;
+  int16x4_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[4];
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s16 (a, 0);
+  vst1_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s16 (a, 2);
+  vst1_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[2] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Covers test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Covers test_vdup_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s16 ()
+{
+  int16x4_t a;
+  int16x8_t b;
+  int i = 0;
+  /* Only two first cases are interesting.  */
+  int16_t c[4] = { 0, 1, 2, 3 };
+  int16_t d[8];
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s16 (a, 0);
+  vst1q_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s16 (a, 2);
+  vst1q_s16 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (c[2] != d[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */
+/* Covers test_vdupq_lane_s16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[2\\\]" 1 } } */
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s32 ()
+{
+  int32x2_t a;
+  int32x2_t b;
+  int i = 0;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[2];
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s32 (a, 0);
+  vst1_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s32 (a, 1);
+  vst1_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s32 ()
+{
+  int32x2_t a;
+  int32x4_t b;
+  int i = 0;
+  int32_t c[2] = { 0, 1 };
+  int32_t d[4];
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s32 (a, 0);
+  vst1q_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s32 (a, 1);
+  vst1q_s32 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (c[1] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdup_lane_s64 ()
+{
+  uint64x1_t a;
+  uint64x1_t b;
+  uint64_t c[1];
+  uint64_t d[1];
+
+  c[0] = 0;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s64 (a, 0);
+  vst1_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  if (c[0] != d[0])
+    return 1;
+
+  c[0] = 1;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdup_lane_s64 (a, 0);
+  vst1_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  if (c[0] != d[0])
+    return 1;
+  return 0;
+}
+
+int
+__attribute__ ((noinline)) test_vdupq_lane_s64 ()
+{
+  int64x1_t a;
+  int64x2_t b;
+  int i = 0;
+  int64_t c[1];
+  int64_t d[2];
+  c[0] = 0;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s64 (a, 0);
+  vst1q_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+  i = 0;
+  c[0] = 1;
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  force_simd (a)
+  b = vdupq_lane_s64 (a, 0);
+  vst1q_s64 (d, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (c[0] != d[i])
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_lane_f32 ())
+    abort ();
+  if (test_vdup_lane_s8 ())
+    abort ();
+  if (test_vdup_lane_s16 ())
+    abort ();
+  if (test_vdup_lane_s32 ())
+    abort ();
+  if (test_vdup_lane_s64 ())
+    abort ();
+  if (test_vdupq_lane_f32 ())
+    abort ();
+  if (test_vdupq_lane_s8 ())
+    abort ();
+  if (test_vdupq_lane_s16 ())
+    abort ();
+  if (test_vdupq_lane_s32 ())
+    abort ();
+  if (test_vdupq_lane_s64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..319ce4575c8f78c75ad5ece3acc12ab6e7b2ac24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c
@@ -0,0 +1,259 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_simd(V1)   asm volatile ("orr %0.16b, %1.16b, %1.16b"	\
+	   : "=w"(V1)						\
+	   : "w"(V1)						\
+	   : /* No clobbers */);
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vdups_lane_f32 ()
+{
+  float32x2_t a;
+  float32_t b;
+  float32_t c[2] = { 0.0, 1.0 };
+  a = vld1_f32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_f32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_f32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vdups_lane_f32, vdups_lane_s32, vdups_lane_u32.  */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 3 } } */
+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */
+
+#212 "/work/tempdev//src/gcc/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c"
+int __attribute__ ((noinline))
+test_vdupd_lane_f64 ()
+{
+  float64x1_t a;
+  float64_t b;
+  float64_t c[1] = { 0.0 };
+  a = vld1_f64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_f64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vdups_lane_f64, vdups_lane_s64, vdups_lane_u64.  */
+/* Attempts to make the compiler generate
+   "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[\[0\\\]"
+   are not practical.  */
+
+int __attribute__ ((noinline))
+test_vdupb_lane_s8 ()
+{
+  int8x8_t a;
+  int8_t b;
+  int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  a = vld1_s8 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_s8 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_s8 (a, 4);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[4] != b)
+    return 1;
+
+  return 0;
+}
+
+/* Covers vdupb_lane_s8, vdupb_lane_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[4\\\]" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdupb_lane_u8 ()
+{
+  uint8x8_t a;
+  uint8_t b;
+  uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+  a = vld1_u8 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_u8 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdupb_lane_u8 (a, 4);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[4] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vduph_lane_u16 ()
+{
+  uint16x4_t a;
+  uint16_t b;
+  uint16_t c[4] = { 0, 1, 2, 3 };
+  a = vld1_u16 (c);
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_u16 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_u16 (a, 2);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[2] != b)
+    return 1;
+  return 0;
+}
+
+/* Covers vduph_lane_h16, vduph_lane_h16.  */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[2\\\]" 2 } } */
+int __attribute__ ((noinline))
+test_vduph_lane_s16 ()
+{
+  int16x4_t a;
+  int16_t b;
+  int16_t c[4] = { 0, 1, 2, 3 };
+  a = vld1_s16 (c);
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_s16 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vduph_lane_s16 (a, 2);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[2] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_s32 ()
+{
+  int32x2_t a;
+  int32_t b;
+  int32_t c[2] = { 0, 1 };
+  a = vld1_s32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_s32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_s32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdups_lane_u32 ()
+{
+  uint32x2_t a;
+  uint32_t b;
+  uint32_t c[2] = { 0, 1 };
+  a = vld1_u32 (c);
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_u32 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  INHIB_OPTIMIZATION;
+  b = vdups_lane_u32 (a, 1);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[1] != b)
+    return 1;
+  return 0;
+}
+int __attribute__ ((noinline))
+test_vdupd_lane_u64 ()
+{
+  uint64x1_t a;
+  uint64_t b;
+  uint64_t c[1] = { 0 };
+  a = vld1_u64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_u64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupd_lane_s64 ()
+{
+  int64x1_t a;
+  int64_t b;
+  int64_t c[1] = { 0 };
+  a = vld1_s64 (c);
+  INHIB_OPTIMIZATION;
+  b = vdupd_lane_s64 (a, 0);
+  INHIB_OPTIMIZATION;
+  force_simd (b)
+  if (c[0] != b)
+    return 1;
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdups_lane_f32 ())
+    abort ();
+  if (test_vdupb_lane_s8 ())
+    abort ();
+  if (test_vdupb_lane_u8 ())
+    abort ();
+  if (test_vduph_lane_s16 ())
+    abort ();
+  if (test_vduph_lane_u16 ())
+    abort ();
+  if (test_vdups_lane_s32 ())
+    abort ();
+  if (test_vdups_lane_u32 ())
+    abort ();
+  if (test_vdupd_lane_s64 ())
+    abort ();
+  if (test_vdupd_lane_u64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..06bee4cdd2532dd4302e1c4437a74970e4bc2966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c
@@ -0,0 +1,659 @@
+/* Test vdup_lane intrinsics work correctly.  */
+/* { dg-do run } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <arm_neon.h>
+
+extern void abort (void);
+
+#define INHIB_OPTIMIZATION asm volatile ("" : : : "memory")
+
+#define force_gp(V1)   asm volatile ("orr %0, %1, %1"		\
+	   : "=r"(V1)						\
+	   : "r"(V1)						\
+	   : /* No clobbers */);
+
+int __attribute__ ((noinline))
+test_vdup_n_f32 ()
+{
+  float32_t a;
+  float32x2_t b;
+  float32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = (1.0);
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_f32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_f32 (c, b);
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_f32 ()
+{
+  float32_t a;
+  float32x4_t b;
+  float32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_f32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_f32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdup_n_f64 ()
+{
+  float64_t a;
+  float64x1_t b;
+  float64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_f64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_f64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_f64 ()
+{
+  float64_t a;
+  float64x2_t b;
+  float64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1.0;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_f64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_f64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+        return 1;
+    }
+  return 0;
+}
+
+/* No asm check.  */
+/* Cannot force floating point value in general purpose regester.  */
+
+int __attribute__ ((noinline))
+test_vdup_n_p8 ()
+{
+  poly8_t a;
+  poly8x8_t b;
+  poly8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_p8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_p8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_p8 ()
+{
+  poly8_t a;
+  poly8x16_t b;
+  poly8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_p8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_p8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_s8 ()
+{
+  int8_t a;
+  int8x8_t b;
+  int8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s8 ()
+{
+  int8_t a;
+  int8x16_t b;
+  int8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u8 ()
+{
+  uint8_t a;
+  uint8x8_t b;
+  uint8_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u8 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u8 ()
+{
+  uint8_t a;
+  uint8x16_t b;
+  uint8_t c[16];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u8 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u8 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 16; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_p16 ()
+{
+  poly16_t a;
+  poly16x4_t b;
+  poly16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_p16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_p16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_p16 ()
+{
+  poly16_t a;
+  poly16x8_t b;
+  poly16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_p16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_p16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_s16 ()
+{
+  int16_t a;
+  int16x4_t b;
+  int16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_s16 ()
+{
+  int16_t a;
+  int16x8_t b;
+  int16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_u16 ()
+{
+  uint16_t a;
+  uint16x4_t b;
+  uint16_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u16 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u16 ()
+{
+  uint16_t a;
+  uint16x8_t b;
+  uint16_t c[8];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u16 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u16 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 8; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s32 ()
+{
+  int32_t a;
+  int32x2_t b;
+  int32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_s32, test_vdup_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdupq_n_s32 ()
+{
+  int32_t a;
+  int32x4_t b;
+  int32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_s32, test_vdupq_n_u32.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_u32 ()
+{
+  uint32_t a;
+  uint32x2_t b;
+  uint32_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u32 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+
+int __attribute__ ((noinline))
+test_vdupq_n_u32 ()
+{
+  uint32_t a;
+  uint32x4_t b;
+  uint32_t c[4];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u32 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u32 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 4; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdup_n_s64 ()
+{
+  int64_t a;
+  int64x1_t b;
+  int64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_s64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_s64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdup_n_s64, test_vdup_n_u64.  */
+/* Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+"
+   are not practical.  */
+
+int __attribute__ ((noinline))
+test_vdupq_n_s64 ()
+{
+  int64_t a;
+  int64x2_t b;
+  int64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_s64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_s64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+/* Covers test_vdupq_n_s64, test_vdupq_n_u64.  */
+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */
+
+int __attribute__ ((noinline))
+test_vdup_n_u64 ()
+{
+  uint64_t a;
+  uint64x1_t b;
+  uint64_t c[1];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdup_n_u64 (a);
+  INHIB_OPTIMIZATION;
+  vst1_u64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 1; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int __attribute__ ((noinline))
+test_vdupq_n_u64 ()
+{
+  uint64_t a;
+  uint64x2_t b;
+  uint64_t c[2];
+  int i = 0;
+  INHIB_OPTIMIZATION;
+  a = 1;
+  force_gp (a)
+  INHIB_OPTIMIZATION;
+  b = vdupq_n_u64 (a);
+  INHIB_OPTIMIZATION;
+  vst1q_u64 (c, b);
+  INHIB_OPTIMIZATION;
+  for (; i < 2; i++)
+    {
+      if (a != c[i])
+	return 1;
+    }
+  return 0;
+}
+
+int
+main ()
+{
+  if (test_vdup_n_f32 ())
+    abort ();
+  if (test_vdup_n_f64 ())
+    abort ();
+  if (test_vdup_n_p8 ())
+    abort ();
+  if (test_vdup_n_u8 ())
+    abort ();
+  if (test_vdup_n_s8 ())
+    abort ();
+  if (test_vdup_n_p16 ())
+    abort ();
+  if (test_vdup_n_s16 ())
+    abort ();
+  if (test_vdup_n_u16 ())
+    abort ();
+  if (test_vdup_n_s32 ())
+    abort ();
+  if (test_vdup_n_u32 ())
+    abort ();
+  if (test_vdup_n_s64 ())
+    abort ();
+  if (test_vdup_n_u64 ())
+    abort ();
+  if (test_vdupq_n_f32 ())
+    abort ();
+  if (test_vdupq_n_f64 ())
+    abort ();
+  if (test_vdupq_n_p8 ())
+    abort ();
+  if (test_vdupq_n_u8 ())
+    abort ();
+  if (test_vdupq_n_s8 ())
+    abort ();
+  if (test_vdupq_n_p16 ())
+    abort ();
+  if (test_vdupq_n_s16 ())
+    abort ();
+  if (test_vdupq_n_u16 ())
+    abort ();
+  if (test_vdupq_n_s32 ())
+    abort ();
+  if (test_vdupq_n_u32 ())
+    abort ();
+  if (test_vdupq_n_s64 ())
+    abort ();
+  if (test_vdupq_n_u64 ())
+    abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */

             reply	other threads:[~2014-01-16 12:12 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-16 12:12 Alex Velenko [this message]
2014-01-22 10:27 ` Alex Velenko
2014-01-27 17:47 ` Marcus Shawcroft

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52D7CCB4.5040105@arm.com \
    --to=alex.velenko@arm.com \
    --cc=Marcus.Shawcroft@arm.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).