public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work117)] Do not generate fmaddfp and fnmsubfp Date: Thu, 6 Apr 2023 19:52:20 +0000 (GMT) [thread overview] Message-ID: <20230406195220.B2C0C3858D28@sourceware.org> (raw) https://gcc.gnu.org/g:887189cd66871102fef739a57a3404310f88c499 commit 887189cd66871102fef739a57a3404310f88c499 Author: Michael Meissner <meissner@linux.ibm.com> Date: Thu Apr 6 15:52:05 2023 -0400 Do not generate fmaddfp and fnmsubfp The Altivec instructions fmaddfp and fnmsubfp have different rounding behaviors than the VSX xvmaddsp and xvnmsubsp instructions. In particular, generating these instructions seems to break Eigen. GCC has generated the Altivec fmaddfp and fnmsubfp instructions on VSX systems as an alternative to the xsmadd{a,m}sp and xsnmsub{a,m}sp instructions. The advantage of the Altivec instructions is that they are 4 operand instructions (i.e. the target register does not have to overlap with one of the input registers). The advantage is it can eliminate an extra move instruction. The disadvantage is it does round the same was as the VSX instructions. This patch eliminates the generation of the Altivec fmaddfp and fnmsubfp instructions as alternatives in the VSX instruction insn support, and in the Altivec insns it adds a test to prevent the insn from being used if VSX is available. I also added a test to the regression test suite. I have done bootstrap builds on power9 little endian (with both IEEE long double and IBM long double). I have also done the builds and test on a power8 big endian system (testing both 32-bit and 64-bit code generation). Chip has verified that it fixes the problem that Eigen encountered. Can I check this into the master GCC branch? After a burn-in period, can I check this patch into the active GCC branches? Thanks in advance. 2023-04-06 Michael Meissner <meissner@linux.ibm.com> gcc/ PR target/70243 * config/rs6000/rs6000.md (isa attribute): Add fastmath. (enabled attribute): Add support for fastmath. * config/rs6000/vsx.md (vsx_fmav4sf4): Set the isa attribute to fastmath to disable Altivec instruction generatins normally. (vsx_nfmsv4sf4): Likewise. gcc/testsuite/ PR target/70243 * gcc.target/powerpc/pr70243.c: New test. * gcc.target/powerpc/pr70243-2.c: New test. Diff: --- gcc/config/rs6000/rs6000.md | 6 +++- gcc/config/rs6000/vsx.md | 6 ++-- gcc/testsuite/gcc.target/powerpc/pr70243-2.c | 41 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/pr70243.c | 41 ++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 44f7dd509cb..7fea6a40e0c 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -354,7 +354,7 @@ (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) ;; The ISA we implement. -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10" +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,fastmath" (const_string "any")) ;; Is this alternative enabled for the current CPU/ISA/etc.? @@ -402,6 +402,10 @@ (and (eq_attr "isa" "p10") (match_test "TARGET_POWER10")) (const_int 1) + + (and (eq_attr "isa" "fastmath") + (match_test "flag_unsafe_math_optimizations")) + (const_int 1) ] (const_int 0))) ;; If this instruction is microcoded on the CELL processor diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0865608f94a..85d4ac5082f 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -2025,7 +2025,8 @@ xvmaddasp %x0,%x1,%x2 xvmaddmsp %x0,%x1,%x3 vmaddfp %0,%1,%2,%3" - [(set_attr "type" "vecfloat")]) + [(set_attr "type" "vecfloat") + (set_attr "isa" "*,*,fastmath")]) (define_insn "*vsx_fmav2df4" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") @@ -2078,7 +2079,8 @@ xvnmsubasp %x0,%x1,%x2 xvnmsubmsp %x0,%x1,%x3 vnmsubfp %0,%1,%2,%3" - [(set_attr "type" "vecfloat")]) + [(set_attr "type" "vecfloat") + (set_attr "isa" "*,*,fastmath")]) (define_insn "*vsx_nfmsv2df4" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") diff --git a/gcc/testsuite/gcc.target/powerpc/pr70243-2.c b/gcc/testsuite/gcc.target/powerpc/pr70243-2.c new file mode 100644 index 00000000000..27460150631 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr70243-2.c @@ -0,0 +1,41 @@ +/* { dg-do compile */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-Ofast -mvsx" } */ + +/* PR 70423, Make sure we don't generate fmaddfp or fnmsubfp unless -ffast-math + is used. These instructions do not round the same way the normal VSX + instructions do. These tests are written where the 3 inputs and target are + all separate registers where the register allocator would prefer to issue + the 4 argument FMA instruction over the 3 argument instruction plus an extra + move. */ + +#include <altivec.h> + +vector float +do_add1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return (a * b) + c; +} + +vector float +do_nsub1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return -((a * b) - c); +} + +vector float +do_add2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_madd (a, b, c); +} + +vector float +do_nsub2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_nmsub (a, b, c); +} + +/* { dg-final { scan-assembler-not {\mxvmadd[am]sp\M} } } */ +/* { dg-final { scan-assembler-not {\mxvnmsub[am]sp\M} } } */ +/* { dg-final { scan-assembler {\mfmaddfp\M} } } */ +/* { dg-final { scan-assembler {\mfnmsubfp\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr70243.c b/gcc/testsuite/gcc.target/powerpc/pr70243.c new file mode 100644 index 00000000000..91b75b68986 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr70243.c @@ -0,0 +1,41 @@ +/* { dg-do compile */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx" } */ + +/* PR 70423, Make sure we don't generate fmaddfp or fnmsubfp unless -ffast-math + is used. These instructions do not round the same way the normal VSX + instructions do. These tests are written where the 3 inputs and target are + all separate registers where the register allocator would prefer to issue + the 4 argument FMA instruction over the 3 argument instruction plus an extra + move. */ + +#include <altivec.h> + +vector float +do_add1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return (a * b) + c; +} + +vector float +do_nsub1 (vector float dummy, vector float a, vector float b, vector float c) +{ + return -((a * b) - c); +} + +vector float +do_add2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_madd (a, b, c); +} + +vector float +do_nsub2 (vector float dummy, vector float a, vector float b, vector float c) +{ + return vec_nmsub (a, b, c); +} + +/* { dg-final { scan-assembler {\mxvmadd[am]sp\M} } } */ +/* { dg-final { scan-assembler {\mxvnmsub[am]sp\M} } } */ +/* { dg-final { scan-assembler-not {\mfmaddfp\M} } } */ +/* { dg-final { scan-assembler-not {\mfnmsubfp\M} } } */
next reply other threads:[~2023-04-06 19:52 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-04-06 19:52 Michael Meissner [this message] -- strict thread matches above, loose matches on Subject: below -- 2023-04-06 20:32 Michael Meissner 2023-04-06 18:48 Michael Meissner 2023-04-06 17:20 Michael Meissner 2023-04-06 3:14 Michael Meissner 2023-04-06 2:48 Michael Meissner 2023-04-05 23:20 Michael Meissner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230406195220.B2C0C3858D28@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).