From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 549EB3858402; Tue, 23 Jan 2024 07:10:59 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 549EB3858402 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705993859; bh=2wQ+udF41PcOI6MqDox0dpSarb7o8e5IQabidp3BcW0=; h=From:To:Subject:Date:From; b=hoToPK3KJjj9YyNT5vMzUHcnYr7mWNV483TXgbGuZYS2VdI24t56AS/aIYsBnkXUN ISkQn1gFsKQW3XvIKuK0x0eM+TmuPFf/WsKVx9Gqe9qQSY7xr0ZkGGl0Fov/zg94e6 Sm8dIXXuk5yLbB/NtOQVE/MxrPGviK5C93kwxAwI= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work154-vpair)] Add vector pair optimizations. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work154-vpair X-Git-Oldrev: 0ef492d559dd4505072f9511510f6bba3faeb995 X-Git-Newrev: 845d95ccfd4223aa66bf4abae4044ddc9a5efe61 Message-Id: <20240123071059.549EB3858402@sourceware.org> Date: Tue, 23 Jan 2024 07:10:59 +0000 (GMT) List-Id: https://gcc.gnu.org/g:845d95ccfd4223aa66bf4abae4044ddc9a5efe61 commit 845d95ccfd4223aa66bf4abae4044ddc9a5efe61 Author: Michael Meissner Date: Tue Jan 23 02:10:12 2024 -0500 Add vector pair optimizations. 2024-01-23 Michael Meissner gcc/ * config/rs6000/vector-pair.md (vpair_add_neg_3): New combiner insn to convert vector plus/neg into a minus operation. (vpair_fma__merge): Optimize multiply, add/subtract, and negation into fma operations if the user specifies to create fmas. (vpair_fma__merge): Likewise. (vpair_fma__merge2): Likewise. (vpair_nfma__merge): Likewise. (vpair_nfms__merge): Likewise. (vpair_nfms__merge2): Likewise. gcc/testsuite/ * gcc.target/powerpc/vector-pair-7.c: New test. * gcc.target/powerpc/vector-pair-8.c: Likewise. * gcc.target/powerpc/vector-pair-9.c: Likewise. * gcc.target/powerpc/vector-pair-10.c: Likewise. * gcc.target/powerpc/vector-pair-11.c: Likewise. * gcc.target/powerpc/vector-pair-12xs.c: Likewise. Diff: --- gcc/config/rs6000/vector-pair.md | 224 ++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-10.c | 61 ++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-11.c | 65 +++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-12.c | 65 +++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-7.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-8.c | 18 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-9.c | 61 ++++++ 7 files changed, 512 insertions(+) diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 39b419c6814..7a81acbdc05 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -261,6 +261,31 @@ (set (attr "type") (if_then_else (match_test " == DIV") (const_string "") (const_string "")))]) + +;; Optimize vector pair add of a negative value into a subtract. +(define_insn_and_split "*vpair_add_neg_3" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "wa") + (unspec:OO + [(match_operand:OO 2 "vsx_register_operand" "wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + VPAIR_FP_BINARY))] + "TARGET_MMA" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) ;; Vector pair fused-multiply (FMA) operations. The last argument in the ;; UNSPEC is a CONST_INT which identifies what the scalar element is. @@ -354,3 +379,202 @@ } [(set_attr "length" "8") (set_attr "type" "")]) + +;; Optimize vector pair multiply and vector pair add into vector pair fma, +;; providing the compiler would do this optimization for scalar and vectors. +;; Unlike most of the define_insn_and_splits, this can be done before register +;; allocation. +(define_insn_and_split "*vpair_fma__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Merge multiply and subtract. +(define_insn_and_split "*vpair_fma__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +(define_insn_and_split "*vpair_fma__merge2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Merge negate, multiply, and add. +(define_insn_and_split "*vpair_nfma__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +;; Merge negate, multiply, and subtract. +(define_insn_and_split "*vpair_nfms__merge" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MINUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) + +(define_insn_and_split "*vpair_nfms__merge2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_MULT) + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_PLUS) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA && flag_fp_contract_mode == FP_CONTRACT_FAST" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:OO + [(unspec:OO + [(match_dup 1) + (match_dup 2) + (unspec:OO + [(match_dup 3) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] +{ +} + [(set_attr "length" "8") + (set_attr "type" "")]) diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c new file mode 100644 index 00000000000..d2ee4dd0dd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-10.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */ + +/* Test whether the vector builtin code merges multiply, add/subtract, and + negate into fma operations. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmadd{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + *p = __builtin_vpair_f32_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmsub{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + *p = __builtin_vpair_f32_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmadd{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f32_add (mul, *s); + *p = __builtin_vpair_f32_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmsub{a,m}sp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + __vector_pair muladd = __builtin_vpair_f32_add (mul, neg); + *p = __builtin_vpair_f32_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c new file mode 100644 index 00000000000..e635b599aed --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-11.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */ + +/* Test whether the vector builtin code do not merge multiply, add/subtract, + and negate into fma operations if -ffp-contract is off. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvadddp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + *p = __builtin_vpair_f64_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvsubdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + *p = __builtin_vpair_f64_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvadddp, 2 xvnegdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f64_add (mul, *s); + *p = __builtin_vpair_f64_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmuldp, 2 xvsubdp, 2 xvnegdp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + __vector_pair muladd = __builtin_vpair_f64_add (mul, neg); + *p = __builtin_vpair_f64_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvadddp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmuldp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvnegdp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvsubdp\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mxvmadd.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvmsub.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmadd.dp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmsub.dp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c new file mode 100644 index 00000000000..4997279473e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-12.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffp-contract=off" } */ + +/* Test whether the vector builtin code do not merge multiply, add/subtract, + and negate into fma operations if -ffp-contract is off. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvaddsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + *p = __builtin_vpair_f32_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvsubsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + *p = __builtin_vpair_f32_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvaddsp, 2 xvnegsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f32_add (mul, *s); + *p = __builtin_vpair_f32_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmulsp, 2 xvsubsp, 2 xvnegsp, stxvp. */ + __vector_pair mul = __builtin_vpair_f32_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f32_neg (*s); + __vector_pair muladd = __builtin_vpair_f32_add (mul, neg); + *p = __builtin_vpair_f32_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvaddsp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmulsp\M} 8 } } */ +/* { dg-final { scan-assembler-times {\mxvnegsp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvsubsp\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mxvmadd.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvmsub.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmadd.sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmsub.sp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c new file mode 100644 index 00000000000..51a400cb4b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-7.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code merges plus and neg into a minus + operation. */ + +void +test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r) +{ + *p = __builtin_vpair_f64_add (*q, __builtin_vpair_f64_neg (*r)); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxvadddp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnegdp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c new file mode 100644 index 00000000000..67957e3bdea --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-8.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code merges plus and neg into a minus + operation. */ + +void +test_minus (__vector_pair *p, __vector_pair *q, __vector_pair *r) +{ + *p = __builtin_vpair_f32_add (*q, __builtin_vpair_f32_neg (*r)); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxvsubsp\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxvaddsp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnegsp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c new file mode 100644 index 00000000000..eacf8dae9d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-9.c @@ -0,0 +1,61 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -Ofast -ffp-contract=fast" } */ + +/* Test whether the vector builtin code merges multiply, add/subtract, and + negate into fma operations. */ + +void +test_fma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmadd{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + *p = __builtin_vpair_f64_add (mul, *s); +} + +void +test_fms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvmsub{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + *p = __builtin_vpair_f64_add (mul, neg); +} + +void +test_nfma (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmadd{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair muladd = __builtin_vpair_f64_add (mul, *s); + *p = __builtin_vpair_f64_neg (muladd); +} + +void +test_nfms (__vector_pair *p, + __vector_pair *q, + __vector_pair *r, + __vector_pair *s) +{ + /* lxvp, 2 xvnmsub{a,m}dp, stxvp. */ + __vector_pair mul = __builtin_vpair_f64_mul (*q, *r); + __vector_pair neg = __builtin_vpair_f64_neg (*s); + __vector_pair muladd = __builtin_vpair_f64_add (mul, neg); + *p = __builtin_vpair_f64_neg (muladd); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M} 2 } } */