From: Matthew Wahab <matthew.wahab@foss.arm.com>
To: James Greenhalgh <james.greenhalgh@arm.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [AArch64][PATCH 7/7] Add NEON intrinsics vqrdmlah_lane and vqrdmlsh_lane.
Date: Wed, 25 Nov 2015 10:25:00 -0000 [thread overview]
Message-ID: <56558A51.5040606@foss.arm.com> (raw)
In-Reply-To: <20151123133746.GC11516@arm.com>
[-- Attachment #1: Type: text/plain, Size: 2321 bytes --]
On 23/11/15 13:37, James Greenhalgh wrote:
> On Fri, Oct 23, 2015 at 01:30:46PM +0100, Matthew Wahab wrote:
>> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
>> sqrdmlah and sqrdmlsh. This patch adds the NEON intrinsics vqrdmlah_lane
>> and vqrdmlsh_lane for these instructions. The new intrinsics are of the
>> form vqrdml{as}h[q]_lane_<type>.
>>
>> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
>> index 9e73809..9b68e4a 100644
>> --- a/gcc/config/aarch64/arm_neon.h
>> +++ b/gcc/config/aarch64/arm_neon.h
>> @@ -10675,6 +10675,59 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
>> return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
>> }
>>
>> +#pragma GCC push_options
>> +#pragma GCC target ("arch=armv8.1-a")
>
> Rather than strict alphabetical order, can we group everything which is
> under one set of extensions together, to save on the push_options/pop_options
> pairs.
>
Attached the reworked patch that keeps the ARMv8.1 intrinsics together,
bracketed by a single target pragma.
Retested aarch64-none-elf with cross-compiled check-gcc on an ARMv8.1
emulator. Also re-ran the cross-compiled
gcc.target/aarch64/advsimd-intrinsics tests for aarch64-none-elf on an
ARMv8 emulator.
Matthew
gcc/
2015-11-24 Matthew Wahab <matthew.wahab@arm.com>
* gcc/config/aarch64/arm_neon.h
(vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New.
(vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New.
(vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New.
(vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New.
(vqrdmlah_lane_s16, vqrdmlah_lane_s32): New.
(vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New.
(vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New.
(vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New.
(vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New.
(vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New.
(vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New.
(vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New.
gcc/testsuite
2015-11-24 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file,
support code for vqrdml{as}h_lane tests.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New.
[-- Attachment #2: 0007-Add-neon-intrinsics-vqrdmlah_lane-vqrdmlsh_lane.patch --]
[-- Type: text/x-patch, Size: 19650 bytes --]
From 03cb214eaf07cceb65f0dc07dca1be739bfe5375 Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 27 Aug 2015 14:17:26 +0100
Subject: [PATCH 7/7] Add neon intrinsics: vqrdmlah_lane, vqrdmlsh_lane.
Change-Id: I6d7a372e0a5b83ef0846ab62abbe9b24ada69fc4
---
gcc/config/aarch64/arm_neon.h | 168 +++++++++++++++++++++
.../aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc | 154 +++++++++++++++++++
.../aarch64/advsimd-intrinsics/vqrdmlah_lane.c | 57 +++++++
.../aarch64/advsimd-intrinsics/vqrdmlsh_lane.c | 61 ++++++++
4 files changed, 440 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 63f1627..56db339 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -11264,6 +11264,174 @@ vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+ return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+ return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c)
+{
+ return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16_t __attribute__ ((__always_inline__))
+vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c)
+{
+ return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32_t __attribute__ ((__always_inline__))
+vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d)
+{
+ return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d);
+}
#pragma GCC pop_options
#pragma GCC push_options
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
new file mode 100644
index 0000000..a855502
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc
@@ -0,0 +1,154 @@
+#define FNNAME1(NAME) exec_ ## NAME ## _lane
+#define FNNAME(NAME) FNNAME1 (NAME)
+
+void FNNAME (INSN) (void)
+{
+ /* vector_res = vqrdmlXh_lane (vector, vector2, vector3, lane),
+ then store the result. */
+#define TEST_VQRDMLXH_LANE2(INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \
+ VECT_VAR (vector_res, T1, W, N) = \
+ INSN##Q##_lane_##T2##W (VECT_VAR (vector, T1, W, N), \
+ VECT_VAR (vector2, T1, W, N), \
+ VECT_VAR (vector3, T1, W, N2), \
+ L); \
+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
+ VECT_VAR (vector_res, T1, W, N)); \
+ CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
+
+ /* Two auxliary macros are necessary to expand INSN. */
+#define TEST_VQRDMLXH_LANE1(INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLXH_LANE2 (INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT)
+
+#define TEST_VQRDMLXH_LANE(Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT) \
+ TEST_VQRDMLXH_LANE1 (INSN, Q, T1, T2, W, N, N2, L, \
+ EXPECTED_CUMULATIVE_SAT, CMT)
+
+
+ DECL_VARIABLE (vector, int, 16, 4);
+ DECL_VARIABLE (vector, int, 32, 2);
+ DECL_VARIABLE (vector, int, 16, 8);
+ DECL_VARIABLE (vector, int, 32, 4);
+
+ DECL_VARIABLE (vector_res, int, 16, 4);
+ DECL_VARIABLE (vector_res, int, 32, 2);
+ DECL_VARIABLE (vector_res, int, 16, 8);
+ DECL_VARIABLE (vector_res, int, 32, 4);
+
+ DECL_VARIABLE (vector2, int, 16, 4);
+ DECL_VARIABLE (vector2, int, 32, 2);
+ DECL_VARIABLE (vector2, int, 16, 8);
+ DECL_VARIABLE (vector2, int, 32, 4);
+
+ DECL_VARIABLE (vector3, int, 16, 4);
+ DECL_VARIABLE (vector3, int, 32, 2);
+ DECL_VARIABLE (vector3, int, 16, 8);
+ DECL_VARIABLE (vector3, int, 32, 4);
+
+ clean_results ();
+
+ VLOAD (vector, buffer, , int, s, 16, 4);
+ VLOAD (vector, buffer, , int, s, 32, 2);
+
+ VLOAD (vector, buffer, q, int, s, 16, 8);
+ VLOAD (vector, buffer, q, int, s, 32, 4);
+
+ /* Initialize vector2. */
+ VDUP (vector2, , int, s, 16, 4, 0x5555);
+ VDUP (vector2, , int, s, 32, 2, 0xBB);
+ VDUP (vector2, q, int, s, 16, 8, 0xBB);
+ VDUP (vector2, q, int, s, 32, 4, 0x22);
+
+ /* Initialize vector3. */
+ VDUP (vector3, , int, s, 16, 4, 0x5555);
+ VDUP (vector3, , int, s, 32, 2, 0xBB);
+ VDUP (vector3, q, int, s, 16, 8, 0x33);
+ VDUP (vector3, q, int, s, 32, 4, 0x22);
+
+ /* Choose lane arbitrarily. */
+#define CMT ""
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
+
+ /* Now use input values such that the multiplication causes
+ saturation. */
+#define TEST_MSG_MUL " (check mul cumulative saturation)"
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector2, , int, s, 16, 4, 0x8000);
+ VDUP (vector2, , int, s, 32, 2, 0x80000000);
+ VDUP (vector2, q, int, s, 16, 8, 0x8000);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector3, , int, s, 16, 4, 0x8000);
+ VDUP (vector3, , int, s, 32, 2, 0x80000000);
+ VDUP (vector3, q, int, s, 16, 8, 0x8000);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000000);
+
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul,
+ TEST_MSG_MUL);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
+
+ VDUP (vector, , int, s, 16, 4, 0x8000);
+ VDUP (vector, , int, s, 32, 2, 0x80000000);
+ VDUP (vector, q, int, s, 16, 8, 0x8000);
+ VDUP (vector, q, int, s, 32, 4, 0x80000000);
+
+ VDUP (vector2, , int, s, 16, 4, 0x8001);
+ VDUP (vector2, , int, s, 32, 2, 0x80000001);
+ VDUP (vector2, q, int, s, 16, 8, 0x8001);
+ VDUP (vector2, q, int, s, 32, 4, 0x80000001);
+
+ VDUP (vector3, , int, s, 16, 4, 0x8001);
+ VDUP (vector3, , int, s, 32, 2, 0x80000001);
+ VDUP (vector3, q, int, s, 16, 8, 0x8001);
+ VDUP (vector3, q, int, s, 32, 4, 0x80000001);
+
+ /* Use input values where rounding produces a result equal to the
+ saturation value, but does not set the saturation flag. */
+#define TEST_MSG_ROUND " (check rounding)"
+ TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+ TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round,
+ TEST_MSG_ROUND);
+
+ CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
+}
+
+int
+main (void)
+{
+ FNNAME (INSN) ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
new file mode 100644
index 0000000..ed43e01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c
@@ -0,0 +1,57 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0x006d, 0x006e, 0x006f, 0x0070,
+ 0x0071, 0x0072, 0x0073, 0x0074 };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL (expected_round,int, 16, 8) [] = { 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe,
+ 0xfffe, 0xfffe };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
+ 0xfffffffe, 0xfffffffe };
+
+#define INSN vqrdmlah
+#define TEST_MSG "VQRDMLAH_LANE"
+
+#include "vqrdmlXh_lane.inc"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
new file mode 100644
index 0000000..6010b42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c
@@ -0,0 +1,61 @@
+/* { dg-require-effective-target arm_v8_1a_neon_hw } */
+/* { dg-add-options arm_v8_1a_neon } */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected values of cumulative_saturation flag. */
+int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
+int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
+
+/* Expected results. */
+VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
+VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xff73, 0xff74, 0xff75, 0xff76,
+ 0xff77, 0xff78, 0xff79, 0xff7a };
+VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+
+/* Expected values of cumulative_saturation flag when multiplication
+ saturates. */
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
+
+/* Expected results when multiplication saturates. */
+VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+/* Expected values of cumulative_saturation flag when rounding
+ should not cause saturation. */
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
+int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
+
+/* Expected results when rounding should not cause saturation. */
+VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
+VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000,
+ 0x8000, 0x8000 };
+VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
+ 0x80000000, 0x80000000 };
+
+#define INSN vqrdmlsh
+#define TEST_MSG "VQRDMLSH_LANE"
+
+#include "vqrdmlXh_lane.inc"
--
2.1.4
next prev parent reply other threads:[~2015-11-25 10:15 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-23 12:19 [AArch64][PATCH 1/7] Add support for ARMv8.1 Adv.SIMD,instructions Matthew Wahab
2015-10-23 12:19 ` [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions Matthew Wahab
2015-10-27 11:19 ` James Greenhalgh
2015-10-27 16:12 ` Matthew Wahab
2015-10-27 16:30 ` James Greenhalgh
2015-10-23 12:21 ` [AArch64][PATCH 3/7] Add builtins for ARMv8.1 Adv.SIMD,instructions Matthew Wahab
2015-10-27 11:20 ` James Greenhalgh
2015-10-23 12:24 ` [AArch64][PATCH 4/7] Add ACLE feature macro for ARMv8.1,Adv.SIMD instructions Matthew Wahab
2015-10-27 11:36 ` James Greenhalgh
2015-11-17 13:21 ` James Greenhalgh
2015-10-23 12:24 ` [AArch64][dejagnu][PATCH 5/7] Dejagnu support for ARMv8.1 Adv.SIMD Matthew Wahab
2015-10-24 8:04 ` Bernhard Reutner-Fischer
2015-10-27 15:32 ` Matthew Wahab
2015-11-23 12:34 ` James Greenhalgh
2015-11-23 16:40 ` Matthew Wahab
2015-11-25 10:14 ` Matthew Wahab
2015-11-25 10:57 ` James Greenhalgh
2015-10-23 12:30 ` [AArch64][PATCH 6/7] Add NEON intrinsics vqrdmlah and vqrdmlsh Matthew Wahab
2015-10-30 12:53 ` Christophe Lyon
2015-10-30 15:56 ` Matthew Wahab
2015-11-09 13:31 ` Christophe Lyon
2015-11-09 13:53 ` Matthew Wahab
2015-11-23 13:37 ` James Greenhalgh
2015-11-25 10:15 ` Matthew Wahab
2015-11-25 10:58 ` James Greenhalgh
2015-10-23 12:34 ` [AArch64][PATCH 7/7] Add NEON intrinsics vqrdmlah_lane and vqrdmlsh_lane Matthew Wahab
2015-11-23 13:45 ` James Greenhalgh
2015-11-25 10:25 ` Matthew Wahab [this message]
2015-11-25 11:11 ` James Greenhalgh
2015-10-27 10:54 ` [AArch64][PATCH 1/7] Add support for ARMv8.1 Adv.SIMD,instructions James Greenhalgh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56558A51.5040606@foss.arm.com \
--to=matthew.wahab@foss.arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=james.greenhalgh@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).