diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e7e8888bbd158d21691791a8d7db8a2616062e50..8d6873a45ad0cdef42f7c632bca38096b9de1787 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2822,6 +2822,79 @@ [(set_attr "type" "neon_fp_mul_")] ) +;; fmulxq_lane_f32, and fmulx_laneq_f32 + +(define_insn "*aarch64_combine_dupfmulx1" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (unspec:VDQSF + [(match_operand:VDQSF 1 "register_operand" "w") + (vec_duplicate:VDQSF + (vec_select: + (match_operand: 2 "register_operand" "w") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] + UNSPEC_FMULX))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, + INTVAL (operands[3]))); + return "fmulx\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_fp_mul__scalar")] +) + +;; fmulxq_laneq_f32, fmulxq_laneq_f64, fmulx_lane_f32 + +(define_insn "*aarch64_combine_dupfmulx2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF + [(match_operand:VDQF 1 "register_operand" "w") + (vec_duplicate:VDQF + (vec_select: + (match_operand:VDQF 2 "register_operand" "w") + (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] + UNSPEC_FMULX))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "fmulx\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "neon_fp_mul_")] +) + +;; fmulxq_lane_f64 + +(define_insn "*aarch64_combine_dupfmulx3" + [(set (match_operand:V2DF 0 "register_operand" "=w") + (unspec:V2DF + [(match_operand:V2DF 1 "register_operand" "w") + (vec_duplicate:V2DF + (match_operand:DF 2 "register_operand" "w"))] + UNSPEC_FMULX))] + "TARGET_SIMD" + { + return "fmulx\t%0.2d, %1.2d, %2.d[0]"; + } + [(set_attr "type" "neon_fp_mul_d_scalar_q")] +) + +;; fmulxs_lane_f32, fmulxs_laneq_f32, fmulxd_lane_f64 == fmulx_lane_f64, +;; fmulxd_laneq_f64 == fmulx_laneq_f64 + +(define_insn "*aarch64_combine_vgetfmulx1" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: + [(match_operand: 1 "register_operand" "w") + (vec_select: + (match_operand:VDQF_DF 2 "register_operand" "w") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + UNSPEC_FMULX))] + "TARGET_SIMD" + { + operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); + return "fmulx\t%0, %1, %2.[%3]"; + } + [(set_attr "type" "fmul")] +) ;; q (define_insn "aarch64_" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 4a3ef455b0945ed7e77fb3e78621d5010cd4c094..0425630faeca0a9196d6232b53a8fea7377b1ac6 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8509,32 +8509,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b) return result; } -#define vmulxq_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vmulxq_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vmvn_p8 (poly8x8_t a) { @@ -17748,6 +17722,78 @@ vmulxd_f64 (float64_t __a, float64_t __b) return __builtin_aarch64_fmulxdf (__a, __b); } +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane) +{ + return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane)); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane) +{ + return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane)); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane) +{ + return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane)); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane) +{ + return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane)); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane) +{ + return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane)); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane) +{ + return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane)); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane) +{ + return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane)); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane) +{ + return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane)); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane) +{ + return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane) +{ + return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane) +{ + return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane) +{ + return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); +} + /* vpmax */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..4f80678b2df32cff3237fb98354bee5754bf88f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f32_1.c @@ -0,0 +1,70 @@ +/* Test the vmulx_lane_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +test_vmulx_lane0_f32 (float32x2_t vec1_1, float32x2_t vec1_2) +{ + return vmulx_lane_f32 (vec1_1, vec1_2, 0); +} + +float32x2_t __attribute__ ((noinline)) +test_vmulx_lane1_f32 (float32x2_t vec1_1, float32x2_t vec1_2) +{ + return vmulx_lane_f32 (vec1_1, vec1_2, 1); +} + +void +test_case (float32_t v1[2], float32_t v2[2], float32_t e1[2], float32_t e2[2]) +{ + int i; + float32x2_t vec1_1 = vld1_f32 (v1); + float32x2_t vec1_2 = vld1_f32 (v2); + + + float32x2_t actual1 = test_vmulx_lane0_f32 (vec1_1, vec1_2); + float32_t actual1_1[2]; + vst1_f32 (actual1_1, actual1); + + for (i = 0; i < 2; ++i) + if (actual1_1[i] != e1[i]) + abort (); + + float32x2_t actual2 = test_vmulx_lane1_f32 (vec1_1, vec1_2); + float32_t actual2_1[2]; + vst1_f32 (actual2_1, actual2); + + for (i = 0; i < 2; ++i) + if (actual2_1[i] != e2[i]) + abort (); +} + +int +main (void) +{ + float32_t v1 = 3.14159265359; + float32_t v2 = 1.383894; + float32_t v3 = -2.71828; + float32_t v4 = -3.4891931; + + float32_t v1_1[] = {v1, v2}; + float32_t v1_2[] = {v3, v4}; + float32_t e1_1[] = {v1 * v3, v2 * v3}; + float32_t e1_2[] = {v1 * v4, v2 * v4}; + test_case (v1_1, v1_2, e1_1, e1_2); + + float32_t v2_1[] = {0, -0.0}; + float32_t v2_2[] = {__builtin_huge_valf (), -__builtin_huge_valf ()}; + float32_t e2_1[] = {2.0, -2.0}; + float32_t e2_2[] = {-2.0, 2.0}; + test_case (v2_1, v2_2, e2_1, e2_2); + + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..0ebdb963f60659843e505f57a2916a5a88f23ec3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_lane_f64_1.c @@ -0,0 +1,62 @@ +/* Test the vmulx_lane_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64x1_t __attribute__ ((noinline)) +test_vmulx_lane_f64 (float64x1_t vec1_1, float64x1_t vec1_2) +{ + return vmulx_lane_f64 (vec1_1, vec1_2, 0); +} + +void +test_case (float64_t v1[], float64_t v2[], float64_t e[]) +{ + float64x1_t vec1_1 = vld1_f64 (v1); + float64x1_t vec1_2 = vld1_f64 (v2); + float64x1_t expected1 = vld1_f64 (e); + + float64x1_t actual1 = test_vmulx_lane_f64 (vec1_1, vec1_2); + float64_t actual[1]; + vst1_f64 (actual, actual1); + if (actual[0] != e[0]) + abort (); +} +int +main (void) +{ + float64_t v1 = 3.14159265359; + float64_t v2 = -2.71828; + + float64_t v1_1[] = {v1}; + float64_t v1_2[] = {v2}; + float64_t e1[] = {v1 * v2}; + test_case (v1_1, v1_2, e1); + + float64_t v2_1[] = {0}; + float64_t v2_2[] = {__builtin_huge_val ()}; + float64_t e2[] = {2.0}; + test_case (v2_1, v2_2, e2); + + float64_t v4_1[] = {0}; + float64_t v4_2[] = {-__builtin_huge_val ()}; + float64_t e4[] = {-2.0}; + test_case (v4_1, v4_2, e4); + + float64_t v5_1[] = {-0.0}; + float64_t v5_2[] = {__builtin_huge_val ()}; + float64_t e5[] = {-2.0}; + test_case (v5_1, v5_2, e5); + + float64_t v6_1[] = {-0.0}; + float64_t v6_2[] = {-__builtin_huge_val ()}; + float64_t e6[] = {2.0}; + test_case (v6_1, v6_2, e6); + + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..3e968b7c277155c20721c45f074b4bfe02431d23 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f32_1.c @@ -0,0 +1,111 @@ +/* Test the vmulx_laneq_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +test_vmulx_laneq_f32_lane0 (float32x2_t vec1_1, float32x4_t vec1_2) +{ + return vmulx_laneq_f32 (vec1_1, vec1_2, 0); +} + +float32x2_t __attribute__ ((noinline)) +test_vmulx_laneq_f32_lane1 (float32x2_t vec1_1, float32x4_t vec1_2) +{ + return vmulx_laneq_f32 (vec1_1, vec1_2, 1); +} + +float32x2_t __attribute__ ((noinline)) +test_vmulx_laneq_f32_lane2 (float32x2_t vec1_1, float32x4_t vec1_2) +{ + return vmulx_laneq_f32 (vec1_1, vec1_2, 2); +} + +float32x2_t __attribute__ ((noinline)) +test_vmulx_laneq_f32_lane3 (float32x2_t vec1_1, float32x4_t vec1_2) +{ + return vmulx_laneq_f32 (vec1_1, vec1_2, 3); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, EXP2, EXP3, I) \ + void set_and_test_case##I () \ + { \ + float32_t vec1_data[] = V1_D; \ + float32x2_t vec1 = vld1_f32 (vec1_data); \ + float32_t vec2_data[] = V2_D; \ + float32x4_t vec2 = vld1q_f32 (vec2_data); \ + \ + float32_t expected_lane0[] = EXP0; \ + float32_t expected_lane1[] = EXP1; \ + float32_t expected_lane2[] = EXP2; \ + float32_t expected_lane3[] = EXP3; \ + \ + float32x2_t actual_lane0_v = \ + test_vmulx_laneq_f32_lane0 (vec1, vec2); \ + float32_t actual_lane0[2]; \ + vst1_f32 (actual_lane0, actual_lane0_v); \ + if (actual_lane0[0] != expected_lane0[0] \ + || actual_lane0[1] != expected_lane0[1]) \ + abort (); \ + \ + float32x2_t actual_lane1_v = \ + test_vmulx_laneq_f32_lane1 (vec1, vec2); \ + float32_t actual_lane1[2]; \ + vst1_f32 (actual_lane1, actual_lane1_v); \ + if (actual_lane1[0] != expected_lane1[0] \ + || actual_lane1[1] != expected_lane1[1]) \ + abort (); \ + \ + float32x2_t actual_lane2_v = \ + test_vmulx_laneq_f32_lane2 (vec1, vec2); \ + float32_t actual_lane2[2]; \ + vst1_f32 (actual_lane2, actual_lane2_v); \ + if (actual_lane2[0] != expected_lane2[0] \ + || actual_lane2[1] != expected_lane2[1]) \ + abort (); \ + \ + float32x2_t actual_lane3_v = \ + test_vmulx_laneq_f32_lane3 (vec1, vec2); \ + float32_t actual_lane3[2]; \ + vst1_f32 (actual_lane3, actual_lane3_v); \ + if (actual_lane3[0] != expected_lane3[0] \ + || actual_lane3[1] != expected_lane3[1]) \ + abort (); \ + \ + } \ + +float32_t v1 = 3.14159265359; +float32_t v2 = 1.383894; +float32_t v3 = -2.71828; +float32_t v4 = -3.4891931; + +float32_t v5 = 0.0; +float32_t v6 = -0.0; +float32_t v7 = __builtin_huge_valf (); +float32_t v8 = -__builtin_huge_valf (); + +SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1, v2, v3, v4), + PASS_ARRAY (v1*v1, v1*v2), PASS_ARRAY (v1*v2, v2*v2), + PASS_ARRAY (v1*v3, v2*v3), PASS_ARRAY (v1*v4, v2*v4), 1) + +SETUP_VEC (PASS_ARRAY (v5, v6), PASS_ARRAY (v5, v6, v7, v8), + PASS_ARRAY (0.0, -0.0), PASS_ARRAY (-0.0, 0.0), + PASS_ARRAY (2.0, -2.0), PASS_ARRAY (-2.0, 2.0), 2) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.2\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..db79d5355bc925098555788c0dd09c99029576c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulx_laneq_f64_1.c @@ -0,0 +1,76 @@ +/* Test the vmulx_laneq_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64x1_t __attribute__ ((noinline)) +test_vmulx_laneq_f64_lane0 (float64x1_t vec1_1, float64x2_t vec1_2) +{ + return vmulx_laneq_f64 (vec1_1, vec1_2, 0); +} + +float64x1_t __attribute__ ((noinline)) +test_vmulx_laneq_f64_lane1 (float64x1_t vec1_1, float64x2_t vec1_2) +{ + return vmulx_laneq_f64 (vec1_1, vec1_2, 1); +} +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \ + void set_and_test_case##I () \ + { \ + float64_t vec1_data[] = V1_D; \ + float64x1_t vec1 = vld1_f64 (vec1_data); \ + float64_t vec2_data[] = V2_D; \ + float64x2_t vec2 = vld1q_f64 (vec2_data); \ + float64_t expected_lane0[] = EXP1; \ + float64_t expected_lane1[] = EXP2; \ + \ + float64x1_t actual_lane0_v = \ + test_vmulx_laneq_f64_lane0 (vec1, vec2); \ + float64_t actual_lane0[1]; \ + vst1_f64 (actual_lane0, actual_lane0_v); \ + if (actual_lane0[0] != expected_lane0[0]) \ + abort (); \ + \ + float64x1_t actual_lane1_v = \ + test_vmulx_laneq_f64_lane1 (vec1, vec2); \ + float64_t actual_lane1[1]; \ + vst1_f64 (actual_lane1, actual_lane1_v); \ + if (actual_lane1[0] != expected_lane1[0]) \ + abort (); \ + } \ + +float64_t v1 = 3.14159265359; +float64_t v2 = 1.383894; +float64_t v3 = -2.71828; + +float64_t v4 = 0.0; +float64_t v5 = __builtin_huge_val (); +float64_t v6 = -__builtin_huge_val (); + +float64_t v7 = -0.0; +float64_t v8 = __builtin_huge_val (); +float64_t v9 = -__builtin_huge_val (); + +SETUP_VEC (PASS_ARRAY (v1), PASS_ARRAY (v2, v3), PASS_ARRAY (v1*v2), + PASS_ARRAY (v1*v3), 1) +SETUP_VEC (PASS_ARRAY (v4), PASS_ARRAY (v5, v6), PASS_ARRAY (2.0), + PASS_ARRAY (-2.0), 2) +SETUP_VEC (PASS_ARRAY (v7), PASS_ARRAY (v8, v9), PASS_ARRAY (-2.0), + PASS_ARRAY (2.0), 3) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b0bf180ef1ac1416f50baa355a095b59505cd5b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_lane_f64_1.c @@ -0,0 +1,54 @@ +/* Test the vmulxd_lane_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64_t __attribute__ ((noinline)) +test_vmulxd_lane_f64_lane0 (float64_t vec1_1, float64x1_t vec1_2) +{ + return vmulxd_lane_f64 (vec1_1, vec1_2, 0); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP1, I) \ + void set_and_test_case##I () \ + { \ + float64_t vec1 = V1_D; \ + float64_t vec2_data[] = V2_D; \ + float64x1_t vec2 = vld1_f64 (vec2_data); \ + float64_t expected_lane0 = EXP1; \ + float64_t actual_lane0 = test_vmulxd_lane_f64_lane0 (vec1, vec2); \ + if (actual_lane0 != expected_lane0) \ + abort (); \ + } \ + +float64_t v1 = 3.14159265359; +float64_t v2 = 1.383894; + +float64_t v4 = 0.0; +float64_t v5 = -0.0; +float64_t v6 = __builtin_huge_val (); +float64_t v7 = -__builtin_huge_val (); + +SETUP_VEC (v1, PASS_ARRAY (v2), v1*v2, 1) +SETUP_VEC (v4, PASS_ARRAY (v6), 2.0, 2) +SETUP_VEC (v4, PASS_ARRAY (v7), -2.0, 3) +SETUP_VEC (v5, PASS_ARRAY (v6), -2.0, 4) +SETUP_VEC (v5, PASS_ARRAY (v7), 2.0, 5) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + set_and_test_case4 (); + set_and_test_case5 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?(?:\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]|\[dD\]\[0-9\])\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..3f8303c574ff40967c5b9ce5a152d70c4a11a9dc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c @@ -0,0 +1,62 @@ +/* Test the vmulxd_laneq_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64_t __attribute__ ((noinline)) +test_vmulxd_laneq_f64_lane0 (float64_t vec1_1, float64x2_t vec1_2) +{ + return vmulxd_laneq_f64 (vec1_1, vec1_2, 0); +} + +float64_t __attribute__ ((noinline)) +test_vmulxd_laneq_f64_lane1 (float64_t vec1_1, float64x2_t vec1_2) +{ + return vmulxd_laneq_f64 (vec1_1, vec1_2, 1); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \ + void set_and_test_case##I () \ + { \ + float64_t vec1 = V1_D; \ + float64_t vec2_data[] = V2_D; \ + float64x2_t vec2 = vld1q_f64 (vec2_data); \ + float64_t expected_lane0 = EXP1; \ + float64_t expected_lane1 = EXP2; \ + float64_t actual_lane0 = test_vmulxd_laneq_f64_lane0 (vec1, vec2); \ + if (actual_lane0 != expected_lane0) \ + abort (); \ + float64_t actual_lane1 = test_vmulxd_laneq_f64_lane1 (vec1, vec2); \ + if (actual_lane1 != expected_lane1) \ + abort (); \ + } \ + +float64_t v1 = 3.14159265359; +float64_t v2 = 1.383894; +float64_t v3 = -2.71828; + +float64_t v4 = 0.0; +float64_t v5 = -0.0; +float64_t v6 = __builtin_huge_val (); +float64_t v7 = -__builtin_huge_val (); + +SETUP_VEC (v1, PASS_ARRAY (v2, v3), v1*v2, v1*v3, 1) +SETUP_VEC (v4, PASS_ARRAY (v6, v7), 2.0, -2.0, 2) +SETUP_VEC (v5, PASS_ARRAY (v6, v7), -2.0, 2.0, 3) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..b5f586019293f6be0b2e6501370883b919bc8ba4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f32_1.c @@ -0,0 +1,79 @@ +/* Test the vmulxq_lane_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_lane_f32_lane0 (float32x4_t vec1_1, float32x2_t vec1_2) +{ + return vmulxq_lane_f32 (vec1_1, vec1_2, 0); +} + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_lane_f32_lane1 (float32x4_t vec1_1, float32x2_t vec1_2) +{ + return vmulxq_lane_f32 (vec1_1, vec1_2, 1); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, I) \ + void set_and_test_case##I () \ + { \ + int i; \ + float32_t vec1_data[] = V1_D; \ + float32x4_t vec1 = vld1q_f32 (vec1_data); \ + float32_t vec2_data[] = V2_D; \ + float32x2_t vec2 = vld1_f32 (vec2_data); \ + \ + float32_t expected_lane0[] = EXP0; \ + float32_t expected_lane1[] = EXP1; \ + \ + float32x4_t actual_lane0_v = \ + test_vmulxq_lane_f32_lane0 (vec1, vec2); \ + float32_t actual_lane0[4]; \ + vst1q_f32 (actual_lane0, actual_lane0_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane0[i] != expected_lane0[i]) \ + abort (); \ + \ + float32x4_t actual_lane1_v = \ + test_vmulxq_lane_f32_lane1 (vec1, vec2); \ + float32_t actual_lane1[4]; \ + vst1q_f32 (actual_lane1, actual_lane1_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane1[i] != expected_lane1[i]) \ + abort (); \ + } \ + +float32_t v1 = 3.14159265359; +float32_t v2 = 1.383894; +float32_t v3 = -2.71828; +float32_t v4 = -3.4891931; + +float32_t v5 = 0.0; +float32_t v6 = -0.0; +float32_t v7 = __builtin_huge_valf (); +float32_t v8 = -__builtin_huge_valf (); + +SETUP_VEC (PASS_ARRAY (v1, v2, v3, v4), PASS_ARRAY (v1, v2), + PASS_ARRAY (v1*v1, v2*v1, v3*v1, v4*v1), + PASS_ARRAY (v1*v2, v2*v2, v3*v2, v4*v2), 1) + +SETUP_VEC (PASS_ARRAY (v5, v6, v7, v8), PASS_ARRAY (v5, v6), + PASS_ARRAY (0.0, -0.0, 2.0, -2.0), + PASS_ARRAY (-0.0, 0.0, -2.0, 2.0), 2) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..e535dce7b75aa7998c937d8568b7674412855afc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_lane_f64_1.c @@ -0,0 +1,61 @@ +/* Test the vmulxq_lane_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64x2_t __attribute__ ((noinline)) +test_vmulxq_lane_f64_lane0 (float64x2_t vec1_1, float64x1_t vec1_2) +{ + return vmulxq_lane_f64 (vec1_1, vec1_2, 0); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP0, I) \ + void set_and_test_case##I () \ + { \ + int i; \ + float64_t vec1_data[] = V1_D; \ + float64x2_t vec1 = vld1q_f64 (vec1_data); \ + float64_t vec2_data[] = V2_D; \ + float64x1_t vec2 = vld1_f64 (vec2_data); \ + \ + float64_t expected_lane0[] = EXP0; \ + float64x2_t actual_lane0_v \ + = test_vmulxq_lane_f64_lane0 (vec1, vec2); \ + float64_t actual_lane0[2]; \ + vst1q_f64 (actual_lane0, actual_lane0_v); \ + for (i = 0; i < 1; ++i) \ + if (actual_lane0[i] != expected_lane0[i]) \ + abort (); \ + } \ + +float64_t v1 = 3.14159265359; +float64_t v2 = 1.383894; + +float64_t v3 = __builtin_huge_val (); +float64_t v4 = -__builtin_huge_val (); + +float64_t v5 = 0.0; +float64_t v6 = -0.0; + + +SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1), PASS_ARRAY (v1*v1, v2*v1), 1) + +SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v5), PASS_ARRAY (2.0, -2.0), 2) + +SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v6), PASS_ARRAY (-2.0, 2.0), 3) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..264c0c2e6167a1e5d26d8516de20cab411b78d8d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f32_1.c @@ -0,0 +1,118 @@ +/* Test the vmulxq_laneq_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_laneq_f32_lane0 (float32x4_t vec1_1, float32x4_t vec1_2) +{ + return vmulxq_laneq_f32 (vec1_1, vec1_2, 0); +} + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_laneq_f32_lane1 (float32x4_t vec1_1, float32x4_t vec1_2) +{ + return vmulxq_laneq_f32 (vec1_1, vec1_2, 1); +} + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_laneq_f32_lane2 (float32x4_t vec1_1, float32x4_t vec1_2) +{ + return vmulxq_laneq_f32 (vec1_1, vec1_2, 2); +} + +float32x4_t __attribute__ ((noinline)) +test_vmulxq_laneq_f32_lane3 (float32x4_t vec1_1, float32x4_t vec1_2) +{ + return vmulxq_laneq_f32 (vec1_1, vec1_2, 3); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, EXP2, EXP3, I) \ + void set_and_test_case##I () \ + { \ + int i; \ + float32_t vec1_data[] = V1_D; \ + float32x4_t vec1 = vld1q_f32 (vec1_data); \ + float32_t vec2_data[] = V2_D; \ + float32x4_t vec2 = vld1q_f32 (vec2_data); \ + \ + float32_t expected_lane0[] = EXP0; \ + float32_t expected_lane1[] = EXP1; \ + float32_t expected_lane2[] = EXP2; \ + float32_t expected_lane3[] = EXP3; \ + \ + float32x4_t actual_lane0_v = \ + test_vmulxq_laneq_f32_lane0 (vec1, vec2); \ + float32_t actual_lane0[4]; \ + vst1q_f32 (actual_lane0, actual_lane0_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane0[i] != expected_lane0[i]) \ + abort (); \ + \ + float32x4_t actual_lane1_v = \ + test_vmulxq_laneq_f32_lane1 (vec1, vec2); \ + float32_t actual_lane1[4]; \ + vst1q_f32 (actual_lane1, actual_lane1_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane1[i] != expected_lane1[i]) \ + abort (); \ + \ + float32x4_t actual_lane2_v = \ + test_vmulxq_laneq_f32_lane2 (vec1, vec2); \ + float32_t actual_lane2[4]; \ + vst1q_f32 (actual_lane2, actual_lane2_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane2[i] != expected_lane2[i]) \ + abort (); \ + \ + float32x4_t actual_lane3_v = \ + test_vmulxq_laneq_f32_lane3 (vec1, vec2); \ + float32_t actual_lane3[4]; \ + vst1q_f32 (actual_lane3, actual_lane3_v); \ + for (i = 0; i < 4; ++i) \ + if (actual_lane3[i] != expected_lane3[i]) \ + abort (); \ + } \ + +float32_t v1 = 3.14159265359; +float32_t v2 = 1.383894; +float32_t v3 = -2.71828; +float32_t v4 = -3.4891931; + +float32_t v5 = 0.0; +float32_t v6 = -0.0; +float32_t v7 = __builtin_huge_valf (); +float32_t v8 = -__builtin_huge_valf (); + +float32_t spec = __builtin_huge_valf () * __builtin_huge_valf (); +float32_t spec_n = -__builtin_huge_valf () * __builtin_huge_valf (); + +SETUP_VEC (PASS_ARRAY (v1, v2, v3, v4), PASS_ARRAY (v1, v2, v3, v4), + PASS_ARRAY (v1*v1, v1*v2, v1*v3, v1*v4), + PASS_ARRAY (v1*v2, v2*v2, v2*v3, v2*v4), + PASS_ARRAY (v1*v3, v2*v3, v3*v3, v4*v3), + PASS_ARRAY (v1*v4, v2*v4, v3*v4, v4*v4), 1) + +SETUP_VEC (PASS_ARRAY (v5, v6, v7, v8), PASS_ARRAY (v5, v6, v7, v8), + PASS_ARRAY (0.0, -0.0, 2.0, -2.0), + PASS_ARRAY (-0.0, 0.0, -2.0, 2.0), + PASS_ARRAY (2.0, -2.0, spec, spec_n), + PASS_ARRAY (-2.0, 2.0, spec_n, spec), 2) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.4\[sS\], ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c new file mode 100644 index 0000000000000000000000000000000000000000..14e9852b32da6a4609117c35bbc85f564f82c350 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxq_laneq_f64_1.c @@ -0,0 +1,78 @@ +/* Test the vmulxq_laneq_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float64x2_t __attribute__ ((noinline)) +test_vmulxq_laneq_f64_lane0 (float64x2_t vec1_1, float64x2_t vec1_2) +{ + return vmulxq_laneq_f64 (vec1_1, vec1_2, 0); +} + +float64x2_t __attribute__ ((noinline)) +test_vmulxq_laneq_f64_lane1 (float64x2_t vec1_1, float64x2_t vec1_2) +{ + return vmulxq_laneq_f64 (vec1_1, vec1_2, 1); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP0, EXP1, I) \ + void set_and_test_case##I () \ + { \ + int i; \ + float64_t vec1_data[] = V1_D; \ + float64x2_t vec1 = vld1q_f64 (vec1_data); \ + float64_t vec2_data[] = V2_D; \ + float64x2_t vec2 = vld1q_f64 (vec2_data); \ + \ + float64_t expected_lane0[] = EXP0; \ + float64_t expected_lane1[] = EXP1; \ + \ + float64x2_t actual_lane0_v = \ + test_vmulxq_laneq_f64_lane0 (vec1, vec2); \ + float64_t actual_lane0[2]; \ + vst1q_f64 (actual_lane0, actual_lane0_v); \ + for (i = 0; i < 2; ++i) \ + if (actual_lane0[i] != expected_lane0[i]) \ + abort (); \ + \ + float64x2_t actual_lane1_v = \ + test_vmulxq_laneq_f64_lane1 (vec1, vec2); \ + float64_t actual_lane1[2]; \ + vst1q_f64 (actual_lane1, actual_lane1_v); \ + for (i = 0; i < 2; ++i) \ + if (actual_lane1[i] != expected_lane1[i]) \ + abort (); \ + \ + } \ + +float64_t v1 = 3.14159265359; +float64_t v2 = 1.383894; + +float64_t v3 = 0.0; +float64_t v4 = -0.0; +float64_t v5 = __builtin_huge_val (); +float64_t v6 = -__builtin_huge_val (); + +float64_t spec = __builtin_huge_val () * __builtin_huge_val (); + +SETUP_VEC (PASS_ARRAY (v1, v2), PASS_ARRAY (v1, v2), PASS_ARRAY (v1*v1, v2*v1), + PASS_ARRAY (v1*v2, v2*v2), 1) + +SETUP_VEC (PASS_ARRAY (v3, v4), PASS_ARRAY (v5, v6), PASS_ARRAY (2.0, -2.0), + PASS_ARRAY (-2.0, 2.0), 2) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.2\[dD\], ?\[vV\]\[0-9\]+\.\[dD\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..124dcd8c4ec187b38ffb03606fad4121d9280451 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_lane_f32_1.c @@ -0,0 +1,61 @@ +/* Test the vmulxs_lane_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32_t __attribute__ ((noinline)) +test_vmulxs_lane_f32_lane0 (float32_t vec1_1, float32x2_t vec1_2) +{ + return vmulxs_lane_f32 (vec1_1, vec1_2, 0); +} + +float32_t __attribute__ ((noinline)) +test_vmulxs_lane_f32_lane1 (float32_t vec1_1, float32x2_t vec1_2) +{ + return vmulxs_lane_f32 (vec1_1, vec1_2, 1); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, I) \ + void set_and_test_case##I () \ + { \ + float32_t vec1 = V1_D; \ + float32_t vec2_data[] = V2_D; \ + float32x2_t vec2 = vld1_f32 (vec2_data); \ + float32_t expected_lane0 = EXP1; \ + float32_t expected_lane1 = EXP2; \ + float32_t actual_lane0 = test_vmulxs_lane_f32_lane0 (vec1, vec2); \ + if (actual_lane0 != expected_lane0) \ + abort (); \ + float32_t actual_lane1 = test_vmulxs_lane_f32_lane1 (vec1, vec2); \ + if (actual_lane1 != expected_lane1) \ + abort (); \ + } \ + +float32_t v1 = 3.14159265359; +float32_t v2 = 1.383894; + +float32_t v4 = 0.0; +float32_t v5 = -0.0; +float32_t v6 = __builtin_huge_valf (); +float32_t v7 = -__builtin_huge_valf (); + +SETUP_VEC (v1, PASS_ARRAY (v1, v2), v1*v1, v1*v2, 1) +SETUP_VEC (v4, PASS_ARRAY (v6, v7), 2.0, -2.0, 2) +SETUP_VEC (v5, PASS_ARRAY (v6, v7), -2.0, 2.0, 3) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..255f0968822ffee7f3429c5997b02e3fcfca68f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c @@ -0,0 +1,85 @@ +/* Test the vmulxs_laneq_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +float32_t __attribute__ ((noinline)) +test_vmulxs_laneq_f32_lane0 (float32_t vec1_1, float32x4_t vec1_2) +{ + return vmulxs_laneq_f32 (vec1_1, vec1_2, 0); +} + +float32_t __attribute__ ((noinline)) +test_vmulxs_laneq_f32_lane1 (float32_t vec1_1, float32x4_t vec1_2) +{ + return vmulxs_laneq_f32 (vec1_1, vec1_2, 1); +} + +float32_t __attribute__ ((noinline)) +test_vmulxs_laneq_f32_lane2 (float32_t vec1_1, float32x4_t vec1_2) +{ + return vmulxs_laneq_f32 (vec1_1, vec1_2, 2); +} + +float32_t __attribute__ ((noinline)) +test_vmulxs_laneq_f32_lane3 (float32_t vec1_1, float32x4_t vec1_2) +{ + return vmulxs_laneq_f32 (vec1_1, vec1_2, 3); +} + +#define PASS_ARRAY(...) {__VA_ARGS__} + +#define SETUP_VEC(V1_D, V2_D, EXP1, EXP2, EXP3, EXP4, I) \ + void set_and_test_case##I () \ + { \ + float32_t vec1 = V1_D; \ + float32_t vec2_data[] = V2_D; \ + float32x4_t vec2 = vld1q_f32 (vec2_data); \ + float32_t expected_lane0 = EXP1; \ + float32_t expected_lane1 = EXP2; \ + float32_t expected_lane2 = EXP3; \ + float32_t expected_lane3 = EXP4; \ + float32_t actual_lane0 = test_vmulxs_laneq_f32_lane0 (vec1, vec2); \ + if (actual_lane0 != expected_lane0) \ + abort (); \ + float32_t actual_lane1 = test_vmulxs_laneq_f32_lane1 (vec1, vec2); \ + if (actual_lane1 != expected_lane1) \ + abort (); \ + float32_t actual_lane2 = test_vmulxs_laneq_f32_lane2 (vec1, vec2); \ + if (actual_lane2 != expected_lane2) \ + abort (); \ + float32_t actual_lane3 = test_vmulxs_laneq_f32_lane3 (vec1, vec2); \ + if (actual_lane3 != expected_lane3) \ + abort (); \ + } \ + +float32_t v1 = 3.14159265359; +float32_t v2 = 1.383894; +float32_t v3 = -2.71828; +float32_t v4 = -3.4891931; + +float32_t v5 = 0.0; +float32_t v6 = -0.0; +float32_t v7 = __builtin_huge_valf (); +float32_t v8 = -__builtin_huge_valf (); + +SETUP_VEC (v1, PASS_ARRAY (v1, v2, v3, v4), v1*v1, v1*v2, v3*v1, v1*v4, 1) +SETUP_VEC (v5, PASS_ARRAY (v5, v6, v7, v8), 0.0, -0.0, 2.0, -2.0, 2) +SETUP_VEC (v6, PASS_ARRAY (v5, v6, v7, v8), -0.0, 0.0, -2.0, 2.0, 3) + +int +main (void) +{ + set_and_test_case1 (); + set_and_test_case2 (); + set_and_test_case3 (); + return 0; +} +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[0\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[1\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[2\\\]\n" 1 } } */ +/* { dg-final { scan-assembler-times "fmulx\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */