public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work161-vpair)] Add vector pair optimizations. Date: Thu, 29 Feb 2024 20:31:55 +0000 (GMT) [thread overview] Message-ID: <20240229203155.5A1633858408@sourceware.org> (raw) https://gcc.gnu.org/g:c8384bfe4fe45667cb07d5c2c785ca1df6f83782 commit c8384bfe4fe45667cb07d5c2c785ca1df6f83782 Author: Michael Meissner <meissner@linux.ibm.com> Date: Thu Feb 29 15:30:38 2024 -0500 Add vector pair optimizations. 2024-02-29 Michael Meissner <meissner@linux.ibm.com> gcc/ * config/rs6000/vector-pair.md (vpair_add_neg_<vpair_modename>3): New combiner insn to convert vector plus/neg into a minus operation. (vpair_fma_<vpair_modename>_merge): Optimize multiply, add/subtract, and negation into fma operations if the user specifies to create fmas. (vpair_fma_<vpair_modename>_merge): Likewise. (vpair_fma_<vpair_modename>_merge2): Likewise. (vpair_nfma_<vpair_modename>_merge): Likewise. (vpair_nfms_<vpair_modename>_merge): Likewise. (vpair_nfms_<vpair_modename>_merge2): Likewise. gcc/testsuite/ * gcc.target/powerpc/vector-pair-7.c: New test. * gcc.target/powerpc/vector-pair-8.c: Likewise. * gcc.target/powerpc/vector-pair-9.c: Likewise. * gcc.target/powerpc/vector-pair-10.c: Likewise. * gcc.target/powerpc/vector-pair-11.c: Likewise. * gcc.target/powerpc/vector-pair-12xs.c: Likewise. Diff: --- gcc/config/rs6000/vector-pair.md | 224 ++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-10.c | 61 ++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-11.c | 65 +++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-12.c | 65 +++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-7.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-8.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-9.c | 61 ++++++ 7 files changed, 512 insertions(+) diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 39b419c6814..7a81acbdc05 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -261,6 +261,31 @@ (set (attr "type") (if_then_else (match_test "<VPAIR_OP> == DIV") (const_string "<vpair_divtype>") (const_string "<vpair_type>")))]) + +;; Optimize vector pair add of a negative value into a subtract. +(define_insn_and_split "*vpair_add_neg_<vpair_modename>3" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "wa") + (unspec:OO + [(match_operand:OO 2 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + VPAIR_FP_BINARY))] + "TARGET_MMA" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) \f ;; Vector pair fused-multiply (FMA) operations. The last argument in the ;; UNSPEC is a CONST_INT which identifies what the scalar element is. @@ -354,3 +379,202 @@ } [(set_attr "length" "8") (set_attr "type" "<vpair_type>")]) +\f +;; Optimize vector pair multiply and vector pair add into vector pair fma, +;; providing the compiler would do this optimization for scalar and vectors. +;; Unlike most of the define_insn_and_splits, this can be done before register +;; allocation. +(define_insn_and_split "*vpair_fma_<vpair_modename>_merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Merge multiply and subtract. +(define_insn_and_split "*vpair_fma_<vpair_modename>_merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +(define_insn_and_split "*vpair_fma_<vpair_modename>_merge2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Merge negate, multiply, and add. +(define_insn_and_split "*vpair_nfma_<vpair_modename>_merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Merge negate, multiply, and subtract. +(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +(define_insn_and_split "*vpair_nfms_<vpair_modename>_merge2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c new file mode 100644 index 00000000000..d2ee4dd0dd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */ + +/* Test whether the vector builtin code merges multiply, add/subtract, and + negate into fma operations. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmadd{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + *p = __builtin_vpair_f32_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmsub{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + *p = __builtin_vpair_f32_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmadd{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f32_add (mul, *s); + *p = __builtin_vpair_f32_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmsub{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + __vector_pair muladd = __builtin_vpair_f32_add (mul, neg); + *p = __builtin_vpair_f32_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c new file mode 100644 index 00000000000..e635b599aed --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */ + +/* Test whether the vector builtin code do not merge multiply, add/subtract, + and negate into fma operations if -ffp-contract is off. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvadddp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + *p = __builtin_vpair_f64_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvsubdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + *p = __builtin_vpair_f64_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvadddp, 2 xvnegdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f64_add (mul, *s); + *p = __builtin_vpair_f64_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvsubdp, 2 xvnegdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + __vector_pair muladd = __builtin_vpair_f64_add (mul, neg); + *p = __builtin_vpair_f64_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvadddp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmuldp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvnegdp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvsubdp\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mxvmadd.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvmsub.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmadd.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmsub.dp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c new file mode 100644 index 00000000000..4997279473e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */ + +/* Test whether the vector builtin code do not merge multiply, add/subtract, + and negate into fma operations if -ffp-contract is off. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvaddsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + *p = __builtin_vpair_f32_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvsubsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + *p = __builtin_vpair_f32_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvaddsp, 2 xvnegsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f32_add (mul, *s); + *p = __builtin_vpair_f32_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvsubsp, 2 xvnegsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + __vector_pair muladd = __builtin_vpair_f32_add (mul, neg); + *p = __builtin_vpair_f32_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvaddsp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmulsp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvnegsp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvsubsp\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mxvmadd.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvmsub.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmadd.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmsub.sp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c new file mode 100644 index 00000000000..51a400cb4b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code merges plus and neg into a minus + operation. */ + +void +test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r) +{ + *p = __builtin_vpair_f64_add (*q, __builtin_vpair_f64_neg (*r)); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxvadddp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnegdp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c new file mode 100644 index 00000000000..67957e3bdea --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code merges plus and neg into a minus + operation. */ + +void +test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r) +{ + *p = __builtin_vpair_f32_add (*q, __builtin_vpair_f32_neg (*r)); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxvsubsp\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxvaddsp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnegsp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c new file mode 100644 index 00000000000..eacf8dae9d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */ + +/* Test whether the vector builtin code merges multiply, add/subtract, and + negate into fma operations. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmadd{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + *p = __builtin_vpair_f64_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmsub{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + *p = __builtin_vpair_f64_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmadd{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f64_add (mul, *s); + *p = __builtin_vpair_f64_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmsub{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + __vector_pair muladd = __builtin_vpair_f64_add (mul, neg); + *p = __builtin_vpair_f64_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M} 2 } } */
reply other threads:[~2024-02-29 20:31 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240229203155.5A1633858408@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).