diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 6814dae079c9ff40aaa2bb625432bf9eb8906b73..b49f8b79b11cbb1888c503d9a9384424f44bde05 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3414,6 +3414,70 @@ (define_insn "aarch64_faddp" [(set_attr "type" "neon_fp_reduc_add_")] ) +;; For the case where both operands are a subreg we need to use a +;; match_dup since reload cannot enforce that the registers are +;; the same with a constraint in this case. +(define_insn "*aarch64_faddp_scalar2" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (vec_select: + (match_operator: 1 "subreg_lowpart_operator" + [(match_operand:VHSDF 2 "register_operand" "w")]) + (parallel [(match_operand 3 "const_int_operand" "n")])) + (match_dup: 2)))] + "TARGET_SIMD + && ENDIAN_LANE_N (, INTVAL (operands[3])) == 1" + "faddp\t%0, %2.2" + [(set_attr "type" "neon_fp_reduc_add_")] +) + +(define_insn "*aarch64_faddp_scalar" + [(set (match_operand: 0 "register_operand" "=w") + (plus: + (vec_select: + (match_operand:VHSDF 1 "register_operand" "w") + (parallel [(match_operand 2 "const_int_operand" "n")])) + (match_operand: 3 "register_operand" "1")))] + "TARGET_SIMD + && ENDIAN_LANE_N (, INTVAL (operands[2])) == 1 + && SUBREG_P (operands[3]) && !SUBREG_P (operands[1]) + && subreg_lowpart_p (operands[3])" + "faddp\t%0, %1.2" + [(set_attr "type" "neon_fp_reduc_add_")] +) + +;; For the case where both operands are a subreg we need to use a +;; match_dup since reload cannot enforce that the registers are +;; the same with a constraint in this case. +(define_insn "*aarch64_addp_scalar2v2di" + [(set (match_operand:DI 0 "register_operand" "=w") + (plus:DI + (vec_select:DI + (match_operator:DI 1 "subreg_lowpart_operator" + [(match_operand:V2DI 2 "register_operand" "w")]) + (parallel [(match_operand 3 "const_int_operand" "n")])) + (match_dup:DI 2)))] + "TARGET_SIMD + && ENDIAN_LANE_N (2, INTVAL (operands[3])) == 1" + "addp\t%d0, %2.2d" + [(set_attr "type" "neon_reduc_add_long")] +) + +(define_insn "*aarch64_addp_scalarv2di" + [(set (match_operand:DI 0 "register_operand" "=w") + (plus:DI + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "w") + (parallel [(match_operand 2 "const_int_operand" "n")])) + (match_operand:DI 3 "register_operand" "1")))] + "TARGET_SIMD + && ENDIAN_LANE_N (2, INTVAL (operands[2])) == 1 + && SUBREG_P (operands[3]) && !SUBREG_P (operands[1]) + && subreg_lowpart_p (operands[3])" + "addp\t%d0, %1.2d" + [(set_attr "type" "neon_reduc_add_long")] +) + (define_insn "aarch64_reduc_plus_internal" [(set (match_operand:VDQV 0 "register_operand" "=w") (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_addp.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_addp.c new file mode 100644 index 0000000000000000000000000000000000000000..ab904ca6a6392a3a068615f68e6b76c0716344ae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_addp.c @@ -0,0 +1,11 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps -O1 -std=c99" } */ + +typedef long long v2di __attribute__((vector_size (16))); + +long long +foo (v2di x) +{ + return x[1] + x[0]; +} +/* { dg-final { scan-assembler-times {addp\td[0-9]+, v[0-9]+.2d} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c new file mode 100644 index 0000000000000000000000000000000000000000..2c8a05b46d8b4f7a1634bc04cc61426ba7b9ef91 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c @@ -0,0 +1,44 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ +/* { dg-add-options arm_v8_2a_fp16_scalar } */ +/* { dg-additional-options "-save-temps -O1" } */ +/* { dg-final { scan-assembler-times "dup" 4 } } */ + + +typedef double v2df __attribute__((vector_size (16))); +typedef float v4sf __attribute__((vector_size (16))); +typedef __fp16 v8hf __attribute__((vector_size (16))); + +double +foo (v2df x) +{ + return x[1] + x[0]; +} +/* { dg-final { scan-assembler-times {faddp\td[0-9]+, v[0-9]+.2d} 1 } } */ + +float +foo1 (v4sf x) +{ + return x[0] + x[1]; +} +/* { dg-final { scan-assembler-times {faddp\ts[0-9]+, v[0-9]+.2s} 1 } } */ + +__fp16 +foo2 (v8hf x) +{ + return x[0] + x[1]; +} +/* { dg-final { scan-assembler-times {faddp\th[0-9]+, v[0-9]+.2h} 1 } } */ + +float +foo3 (v4sf x) +{ + return x[2] + x[3]; +} + +__fp16 +foo4 (v8hf x) +{ + return x[6] + x[7]; +} + diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp2.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp2.c new file mode 100644 index 0000000000000000000000000000000000000000..b24484da50cd972fe79fca6ecefdc0dbccb16bd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp2.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps -O1 -w" } */ + +typedef __m128i __attribute__((__vector_size__(2 * sizeof(long)))); +double a[]; +*b; +fn1() { + __m128i c; + *(__m128i *)a = c; + *b = a[0] + a[1]; +} + +/* { dg-final { scan-assembler-times "faddp" 1 } } */ +