From e623828ac2d033a9a51766d9843a650aab9f42e9 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Thu, 27 Aug 2015 13:22:41 +0100 Subject: [PATCH 6/7] Add neon intrinsics: vqrdmlah, vqrdmlsh. Change-Id: I5c7f8d36ee980d280c1d50f6f212b286084c5acf --- gcc/config/aarch64/arm_neon.h | 53 ++++++++ .../aarch64/advsimd-intrinsics/vqrdmlXh.inc | 138 +++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vqrdmlah.c | 57 +++++++++ .../aarch64/advsimd-intrinsics/vqrdmlsh.c | 61 +++++++++ 4 files changed, 309 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 138b108..63f1627 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -11213,6 +11213,59 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); } +/* ARMv8.1 instrinsics. */ +#pragma GCC push_options +#pragma GCC target ("arch=armv8.1-a") + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c); +} +#pragma GCC pop_options + #pragma GCC push_options #pragma GCC target ("+nothing+crypto") /* vaes */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc new file mode 100644 index 0000000..a504ca6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc @@ -0,0 +1,138 @@ +#define FNNAME1(NAME) exec_ ## NAME +#define FNNAME(NAME) FNNAME1 (NAME) + +void FNNAME (INSN) (void) +{ + /* vector_res = vqrdmlah (vector, vector2, vector3, vector4), + then store the result. */ +#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \ + VECT_VAR (vector_res, T1, W, N) = \ + INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N), \ + VECT_VAR (vector2, T1, W, N), \ + VECT_VAR (vector3, T1, W, N)); \ + vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \ + VECT_VAR (vector_res, T1, W, N)); \ + CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, \ + EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + DECL_VARIABLE (vector, int, 16, 4); + DECL_VARIABLE (vector, int, 32, 2); + DECL_VARIABLE (vector, int, 16, 8); + DECL_VARIABLE (vector, int, 32, 4); + + DECL_VARIABLE (vector_res, int, 16, 4); + DECL_VARIABLE (vector_res, int, 32, 2); + DECL_VARIABLE (vector_res, int, 16, 8); + DECL_VARIABLE (vector_res, int, 32, 4); + + DECL_VARIABLE (vector2, int, 16, 4); + DECL_VARIABLE (vector2, int, 32, 2); + DECL_VARIABLE (vector2, int, 16, 8); + DECL_VARIABLE (vector2, int, 32, 4); + + DECL_VARIABLE (vector3, int, 16, 4); + DECL_VARIABLE (vector3, int, 32, 2); + DECL_VARIABLE (vector3, int, 16, 8); + DECL_VARIABLE (vector3, int, 32, 4); + + clean_results (); + + VLOAD (vector, buffer, , int, s, 16, 4); + VLOAD (vector, buffer, , int, s, 32, 2); + VLOAD (vector, buffer, q, int, s, 16, 8); + VLOAD (vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP (vector2, , int, s, 16, 4, 0x5555); + VDUP (vector2, , int, s, 32, 2, 0xBB); + VDUP (vector2, q, int, s, 16, 8, 0xBB); + VDUP (vector2, q, int, s, 32, 4, 0x22); + + /* Initialize vector3. */ + VDUP (vector3, , int, s, 16, 4, 0x5555); + VDUP (vector3, , int, s, 32, 2, 0xBB); + VDUP (vector3, q, int, s, 16, 8, 0x33); + VDUP (vector3, q, int, s, 32, 4, 0x22); + +#define CMT "" + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + + /* Now use input values such that the multiplication causes + saturation. */ +#define TEST_MSG_MUL " (check mul cumulative saturation)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + VDUP (vector2, , int, s, 16, 4, 0x8000); + VDUP (vector2, , int, s, 32, 2, 0x80000000); + VDUP (vector2, q, int, s, 16, 8, 0x8000); + VDUP (vector2, q, int, s, 32, 4, 0x80000000); + VDUP (vector3, , int, s, 16, 4, 0x8000); + VDUP (vector3, , int, s, 32, 2, 0x80000000); + VDUP (vector3, q, int, s, 16, 8, 0x8000); + VDUP (vector3, q, int, s, 32, 4, 0x80000000); + + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); + + /* Use input values where rounding produces a result equal to the + saturation value, but does not set the saturation flag. */ +#define TEST_MSG_ROUND " (check rounding)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + VDUP (vector2, , int, s, 16, 4, 0x8001); + VDUP (vector2, , int, s, 32, 2, 0x80000001); + VDUP (vector2, q, int, s, 16, 8, 0x8001); + VDUP (vector2, q, int, s, 32, 4, 0x80000001); + VDUP (vector3, , int, s, 16, 4, 0x8001); + VDUP (vector3, , int, s, 32, 2, 0x80000001); + VDUP (vector3, q, int, s, 16, 8, 0x8001); + VDUP (vector3, q, int, s, 32, 4, 0x80000001); + + TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \ + TEST_MSG_ROUND); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); +} + +int +main (void) +{ + FNNAME (INSN) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c new file mode 100644 index 0000000..148d94c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe, + 0xfffffffe, 0xfffffffe }; + +#define INSN vqrdmlah +#define TEST_MSG "VQRDMLAH" + +#include "vqrdmlXh.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c new file mode 100644 index 0000000..91c3b34 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c @@ -0,0 +1,61 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, + 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +#define INSN vqrdmlsh +#define TEST_MSG "VQRDMLSH" + +#include "vqrdmlXh.inc" -- 2.1.4