* [3.4-bib, Patch] rs6000 floating point multiply-add instructions
@ 2002-11-18 17:18 Andrew Pinski
2002-11-18 21:51 ` David Edelsohn
2002-11-19 10:44 ` Geoff Keating
0 siblings, 2 replies; 13+ messages in thread
From: Andrew Pinski @ 2002-11-18 17:18 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 666 bytes --]
Since I was see some of the rs6000 floating point multiply-add
instructions being used in the fsf version of the compiler but they
were generated by the Apple compiler, I thought I would look to
see what is going on. I saw that in the md file of Apple's version it
contains more define_insn for the instructions so they could be
matched with different ways of saying them.
ChangeLog:
2002-11-18 Andrew Pinski <pinskia@physics.uc.edu>
* config/rs6000/rs6000.md (fmadd): Add a comment
(fmsub): New patterns. (fnmadd): New patterns.
(fnmsub): New patterns. (fmadds): Add a comment
(fmsubs): New patterns. (fnmadds): New patterns.
(fnmsubs): New patterns.
[-- Attachment #2: ppc-fused-multiply-add.patch --]
[-- Type: application/octet-stream, Size: 12598 bytes --]
Index: config/rs6000/rs6000.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.md,v
retrieving revision 1.222
diff -u -d -b -w -u -b -B -d -p -r1.222 rs6000.md
--- config/rs6000/rs6000.md 16 Nov 2002 18:01:51 -0000 1.222
+++ config/rs6000/rs6000.md 19 Nov 2002 00:46:25 -0000
@@ -5239,6 +5239,7 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
+;; fmadd: D = (A * B) + C
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5257,6 +5258,7 @@
"{fma|fmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fmsub 1: D = (A * B) - C
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5275,6 +5277,69 @@
"{fms|fmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fmsub 2: D = -C + (A * B)
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fmsub 3: D = - ((-A * B) + C)
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (plus:SF (mult:SF
+ (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (plus:SF (mult:SF
+ (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fmsub 4: D = - (C - (A * B))
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF
+ (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF
+ (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f")))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+
+;; fnmadd 1: D = - (A * B + C)
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5293,6 +5358,45 @@
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fnmadd 2: D = (-A * B) - C
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fnmadds %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmadd 3: D = - C - (A * B)
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fnmadds %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 1: D = - (A * B - C)
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(neg:SF (minus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5311,6 +5415,63 @@
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fnmsub 2: D = C - (A * B)
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fnmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (minus:SF (match_operand:SF 3 "gpc_reg_operand" "f")
+ (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 3: D = - (-C + (A * B))
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fnmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (neg:SF (plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 4: D = (- A * B) + C
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (plus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "fnmsubs %0,%1,%2,%3"
+ [(set_attr "type" "fp")])
+
+(define_insn ""
+ [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+ (plus:SF (mult:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "%f"))
+ (match_operand:SF 2 "gpc_reg_operand" "f"))
+ (match_operand:SF 3 "gpc_reg_operand" "f")))]
+ "! TARGET_POWERPC && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
(define_expand "sqrtsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(sqrt:SF (match_operand:SF 1 "gpc_reg_operand" "")))]
@@ -5501,6 +5662,7 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
+;; fmadd: D = (A * B) + C
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
@@ -5510,6 +5672,7 @@
"{fma|fmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fmsub 1: D = (A * B) - C
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
@@ -5519,6 +5682,39 @@
"{fms|fmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fmsub 2: D = -C + (A * B)
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fmsub 3: D = - ((-A * B) + C)
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (neg:DF (plus:DF (mult:DF
+ (neg:DF (match_operand:DF 1 "gpc_reg_operand" "%f"))
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fmsub 4: D = - (C - (A * B))
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (neg:DF (minus:DF (match_operand:DF 3 "gpc_reg_operand" "f")
+ (mult:DF
+ (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f")))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fms|fmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmadd 1: D = - (A * B + C)
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
@@ -5528,11 +5724,62 @@
"{fnma|fnmadd} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
+;; fnmadd 2: D = (-A * B) - C
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (minus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "%f"))
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (match_operand:DF 3 "gpc_reg_operand" "f")))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmadd 3: D = - C - (A * B)
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (minus:DF (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))
+ (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnma|fnmadd} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 1: D = - (A * B - C)
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(neg:DF (minus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
(match_operand:DF 2 "gpc_reg_operand" "f"))
(match_operand:DF 3 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 2: D = C - (A * B)
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (minus:DF (match_operand:DF 3 "gpc_reg_operand" "f")
+ (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f"))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 3: D = - (-C + (A * B))
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (neg:DF (plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
+ "{fnms|fnmsub} %0,%1,%2,%3"
+ [(set_attr "type" "dmul")])
+
+;; fnmsub 4: D = (- A * B) + C
+(define_insn ""
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+ (plus:DF (mult:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "%f"))
+ (match_operand:DF 2 "gpc_reg_operand" "f"))
+ (match_operand:DF 3 "gpc_reg_operand" "f")))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_FUSED_MADD"
"{fnms|fnmsub} %0,%1,%2,%3"
[(set_attr "type" "dmul")])
[-- Attachment #3: Type: text/plain, Size: 173 bytes --]
testsuite/ChangeLog:
2002-11-18 Andrew Pinski <pinskia@physics.uc.edu>
* gcc.dg/ppc-fused-multiply-add-1.c: New test.
* gcc.dg/ppc-fused-multiply-add-2.c: New test.
[-- Attachment #4: ppc-fused-multiply-add-1.c --]
[-- Type: text/plain, Size: 259 bytes --]
/* { dg-do run { target powerpc* } } */
/* { dg-options "-O3" } */
float temp=10;
float temp2=10;
float temp3=20;
int main()
{
temp=temp-temp2*temp3;
}
/* { dg-final { scan-assembler-not "fmul" } } */
/* { dg-final { scan-assembler-not "fsub" } } */
[-- Attachment #5: ppc-fused-multiply-add-2.c --]
[-- Type: text/plain, Size: 266 bytes --]
/* { dg-do run { target powerpc* } } */
/* { dg-options "-O3" } */
double temp=10;
double temp2=10;
double temp3=20;
int main()
{
temp=temp-temp2*temp3;
}
/* { dg-final { scan-assembler-not "fmul" } } */
/* { dg-final { scan-assembler-not "fsub" } } */
[-- Attachment #6: Type: text/plain, Size: 22 bytes --]
Thanks,
Andrew Pinski
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-18 17:18 [3.4-bib, Patch] rs6000 floating point multiply-add instructions Andrew Pinski
@ 2002-11-18 21:51 ` David Edelsohn
2002-11-19 10:44 ` Geoff Keating
1 sibling, 0 replies; 13+ messages in thread
From: David Edelsohn @ 2002-11-18 21:51 UTC (permalink / raw)
To: Andrew Pinski; +Cc: gcc-patches
>>>>> Andrew Pinski writes:
> Since I was see some of the rs6000 floating point multiply-add
> instructions being used in the fsf version of the compiler but they
> were generated by the Apple compiler, I thought I would look to
> see what is going on. I saw that in the md file of Apple's version it
> contains more define_insn for the instructions so they could be
> matched with different ways of saying them.
Please go back and read the repeated discussions in the GCC
mailinglist archives that Apple's additional patterns are not the right
way to solve this problem. Not to mention, a number of the
transformations are mathematically unsafe and should not be applied by
default.
David
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-18 17:18 [3.4-bib, Patch] rs6000 floating point multiply-add instructions Andrew Pinski
2002-11-18 21:51 ` David Edelsohn
@ 2002-11-19 10:44 ` Geoff Keating
2002-11-19 14:15 ` David Edelsohn
1 sibling, 1 reply; 13+ messages in thread
From: Geoff Keating @ 2002-11-19 10:44 UTC (permalink / raw)
To: Andrew Pinski; +Cc: gcc-patches
Andrew Pinski <pinskia@physics.uc.edu> writes:
> Since I was see some of the rs6000 floating point multiply-add
> instructions being used in the fsf version of the compiler but they
> were generated by the Apple compiler, I thought I would look to
> see what is going on. I saw that in the md file of Apple's version it
> contains more define_insn for the instructions so they could be
> matched with different ways of saying them.
>
>
> ChangeLog:
> 2002-11-18 Andrew Pinski <pinskia@physics.uc.edu>
>
> * config/rs6000/rs6000.md (fmadd): Add a comment
> (fmsub): New patterns. (fnmadd): New patterns.
> (fnmsub): New patterns. (fmadds): Add a comment
> (fmsubs): New patterns. (fnmadds): New patterns.
> (fnmsubs): New patterns.
>
>
>
>
> testsuite/ChangeLog:
> 2002-11-18 Andrew Pinski <pinskia@physics.uc.edu>
>
> * gcc.dg/ppc-fused-multiply-add-1.c: New test.
> * gcc.dg/ppc-fused-multiply-add-2.c: New test.
Your testcases seem to be identical, did you miss something?
Anyway, the first one,
> temp=temp-temp2*temp3;
can't be optimised to a fnmsub in the absence of -ffast-math. If
'temp', 'temp2', and 'temp3' are all +0, the original expression gives
+0, but fnmsub gives -0.
The original discussion of this patch happened at
<http://gcc.gnu.org/ml/gcc-patches/2001-12/msg00368.html>.
--
- Geoffrey Keating <geoffk@geoffk.org>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-19 10:44 ` Geoff Keating
@ 2002-11-19 14:15 ` David Edelsohn
2002-11-19 14:45 ` Geoff Keating
2002-11-28 13:29 ` Segher Boessenkool
0 siblings, 2 replies; 13+ messages in thread
From: David Edelsohn @ 2002-11-19 14:15 UTC (permalink / raw)
To: Geoff Keating
Cc: Andrew Pinski, Segher Boessenkool, Dale Johannesen, gcc-patches
It unfortunately looks more complicated than I expected for GCC
combine to try to recognize the transformed, non-canonical versions of the
instructions. I still am uncomfortable with a design that duplicates
instructions themselves.
Is it practical to represent the alternate RTL forms of the
instructions as GCC define_split so that the non-canonical forms are
converted to the canonical RTL instead of recognizing the non-canonical
RTL as instructions?
Thanks, David
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-19 14:15 ` David Edelsohn
@ 2002-11-19 14:45 ` Geoff Keating
2002-11-19 14:59 ` David Edelsohn
2002-11-28 13:29 ` Segher Boessenkool
1 sibling, 1 reply; 13+ messages in thread
From: Geoff Keating @ 2002-11-19 14:45 UTC (permalink / raw)
To: dje; +Cc: pinskia, segher, dalej, gcc-patches
> Cc: Andrew Pinski <pinskia@physics.uc.edu>,
> Segher Boessenkool <segher@chello.nl>, Dale Johannesen <dalej@apple.com>,
> gcc-patches@gcc.gnu.org
> Date: Tue, 19 Nov 2002 17:14:26 -0500
> From: David Edelsohn <dje@watson.ibm.com>
> It unfortunately looks more complicated than I expected for GCC
> combine to try to recognize the transformed, non-canonical versions of the
> instructions.
What goes wrong? I'd think it'd just be a change in combine_simplify_rtx.
--
- Geoffrey Keating <geoffk@geoffk.org>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-19 14:45 ` Geoff Keating
@ 2002-11-19 14:59 ` David Edelsohn
0 siblings, 0 replies; 13+ messages in thread
From: David Edelsohn @ 2002-11-19 14:59 UTC (permalink / raw)
To: Geoff Keating; +Cc: pinskia, segher, dalej, gcc-patches
>>>>> Geoff Keating writes:
Geoff> What goes wrong? I'd think it'd just be a change in
Geoff> combine_simplify_rtx.
Segher originally proposed a patch at the top-level of combine:
http://gcc.gnu.org/ml/gcc-patches/2002-04/msg00479.html
Richard recommended that it be placed under combine_simplify_rtx(), but it
is not clear how to stuff the patch into combine_simplify_rtx(). I
definitely would like to resurrect that approach, if it is practical.
David
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-19 14:15 ` David Edelsohn
2002-11-19 14:45 ` Geoff Keating
@ 2002-11-28 13:29 ` Segher Boessenkool
2002-11-28 13:59 ` David Edelsohn
` (2 more replies)
1 sibling, 3 replies; 13+ messages in thread
From: Segher Boessenkool @ 2002-11-28 13:29 UTC (permalink / raw)
To: David Edelsohn
Cc: Geoff Keating, Andrew Pinski, Segher Boessenkool,
Dale Johannesen, gcc-patches
[Before this issue gets abandoned, unresolved, again, I'd better try to
keep it alive this time... So:]
David Edelsohn wrote:
>
> It unfortunately looks more complicated than I expected for GCC
> combine to try to recognize the transformed, non-canonical versions of the
> instructions.
I still think my original patch (the one that patches rs6000.md) is the best
solution. If -ffast-math is in effect, the "original" patterns are not
canonical, and if it's not in effect, the "new" patterns can't ever match.
So in either case only one of the patterns can ever match.
> I still am uncomfortable with a design that duplicates
> instructions themselves.
Why is that?
> Is it practical to represent the alternate RTL forms of the
> instructions as GCC define_split so that the non-canonical forms are
> converted to the canonical RTL instead of recognizing the non-canonical
> RTL as instructions?
Maybe; I'll try and find out, if you really can't be moved to just do the
machine description thing.
Segher
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-28 13:29 ` Segher Boessenkool
@ 2002-11-28 13:59 ` David Edelsohn
2002-11-28 14:47 ` David Edelsohn
2002-11-28 14:13 ` Segher Boessenkool
2002-11-30 3:43 ` Geoff Keating
2 siblings, 1 reply; 13+ messages in thread
From: David Edelsohn @ 2002-11-28 13:59 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Geoff Keating, Andrew Pinski, Dale Johannesen, gcc-patches
Why not try moving your combine patch to combine_simplify_rtx()?
David
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-28 13:29 ` Segher Boessenkool
2002-11-28 13:59 ` David Edelsohn
@ 2002-11-28 14:13 ` Segher Boessenkool
2002-11-30 3:43 ` Geoff Keating
2 siblings, 0 replies; 13+ messages in thread
From: Segher Boessenkool @ 2002-11-28 14:13 UTC (permalink / raw)
To: gcc-patches
[Before this issue gets abandoned, unresolved, again, I'd better try to
keep it alive this time... So:]
David Edelsohn wrote:
>
> It unfortunately looks more complicated than I expected for GCC
> combine to try to recognize the transformed, non-canonical versions of the
> instructions.
I still think my original patch (the one that patches rs6000.md) is the best
solution. If -ffast-math is in effect, the "original" patterns are not
canonical, and if it's not in effect, the "new" patterns can't ever match.
So in either case only one of the patterns can ever match.
> I still am uncomfortable with a design that duplicates
> instructions themselves.
Why is that?
> Is it practical to represent the alternate RTL forms of the
> instructions as GCC define_split so that the non-canonical forms are
> converted to the canonical RTL instead of recognizing the non-canonical
> RTL as instructions?
Maybe; I'll try and find out, if you really can't be moved to just do the
machine description thing.
Segher
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-28 13:59 ` David Edelsohn
@ 2002-11-28 14:47 ` David Edelsohn
0 siblings, 0 replies; 13+ messages in thread
From: David Edelsohn @ 2002-11-28 14:47 UTC (permalink / raw)
To: gcc-patches
Why not try moving your combine patch to combine_simplify_rtx()?
David
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-28 13:29 ` Segher Boessenkool
2002-11-28 13:59 ` David Edelsohn
2002-11-28 14:13 ` Segher Boessenkool
@ 2002-11-30 3:43 ` Geoff Keating
2002-11-30 6:15 ` Geoff Keating
2002-12-01 18:49 ` Segher Boessenkool
2 siblings, 2 replies; 13+ messages in thread
From: Geoff Keating @ 2002-11-30 3:43 UTC (permalink / raw)
To: segher; +Cc: dje, pinskia, segher, dalej, gcc-patches
> Date: Thu, 28 Nov 2002 03:40:04 +0100
> From: Segher Boessenkool <segher@koffie.nl>
> [Before this issue gets abandoned, unresolved, again, I'd better try to
> keep it alive this time... So:]
>
> David Edelsohn wrote:
> >
> > It unfortunately looks more complicated than I expected for GCC
> > combine to try to recognize the transformed, non-canonical versions of the
> > instructions.
>
> I still think my original patch (the one that patches rs6000.md) is the best
> solution. If -ffast-math is in effect, the "original" patterns are not
> canonical, and if it's not in effect, the "new" patterns can't ever match.
> So in either case only one of the patterns can ever match.
It's not a question of technical correctness; I'm sure your patch
works. It's a question of what is the best way to design a
multi-target compiler.
> > I still am uncomfortable with a design that duplicates
> > instructions themselves.
>
> Why is that?
>
> > Is it practical to represent the alternate RTL forms of the
> > instructions as GCC define_split so that the non-canonical forms are
> > converted to the canonical RTL instead of recognizing the non-canonical
> > RTL as instructions?
>
> Maybe; I'll try and find out, if you really can't be moved to just do the
> machine description thing.
Please just try to make combine_simplify_rtx do the right thing. It
is really not hard.
--
- Geoffrey Keating <geoffk@geoffk.org>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-30 3:43 ` Geoff Keating
@ 2002-11-30 6:15 ` Geoff Keating
2002-12-01 18:49 ` Segher Boessenkool
1 sibling, 0 replies; 13+ messages in thread
From: Geoff Keating @ 2002-11-30 6:15 UTC (permalink / raw)
To: gcc-patches
> Date: Thu, 28 Nov 2002 03:40:04 +0100
> From: Segher Boessenkool <segher@koffie.nl>
> [Before this issue gets abandoned, unresolved, again, I'd better try to
> keep it alive this time... So:]
>
> David Edelsohn wrote:
> >
> > It unfortunately looks more complicated than I expected for GCC
> > combine to try to recognize the transformed, non-canonical versions of the
> > instructions.
>
> I still think my original patch (the one that patches rs6000.md) is the best
> solution. If -ffast-math is in effect, the "original" patterns are not
> canonical, and if it's not in effect, the "new" patterns can't ever match.
> So in either case only one of the patterns can ever match.
It's not a question of technical correctness; I'm sure your patch
works. It's a question of what is the best way to design a
multi-target compiler.
> > I still am uncomfortable with a design that duplicates
> > instructions themselves.
>
> Why is that?
>
> > Is it practical to represent the alternate RTL forms of the
> > instructions as GCC define_split so that the non-canonical forms are
> > converted to the canonical RTL instead of recognizing the non-canonical
> > RTL as instructions?
>
> Maybe; I'll try and find out, if you really can't be moved to just do the
> machine description thing.
Please just try to make combine_simplify_rtx do the right thing. It
is really not hard.
--
- Geoffrey Keating <geoffk@geoffk.org>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [3.4-bib, Patch] rs6000 floating point multiply-add instructions
2002-11-30 3:43 ` Geoff Keating
2002-11-30 6:15 ` Geoff Keating
@ 2002-12-01 18:49 ` Segher Boessenkool
1 sibling, 0 replies; 13+ messages in thread
From: Segher Boessenkool @ 2002-12-01 18:49 UTC (permalink / raw)
To: Geoff Keating; +Cc: dje, pinskia, segher, dalej, gcc-patches
Geoff Keating wrote:
> It's not a question of technical correctness; I'm sure your patch
> works. It's a question of what is the best way to design a
> multi-target compiler.
Yes; and I think the situation we're in is quite target specific.
So a target-specific solution would be fine in my opinion. But...
> Please just try to make combine_simplify_rtx do the right thing. It
> is really not hard.
...heh. Well I had to change one case, and add three. It now does
the "right thing"(tm) with everything I threw at it (the .md patch
did not, as combine actually worked _against_ the fma's). You might
want some of this patch to be conditionalized on a
TARGET_FMA_HAS_THE_MULTIPLY_AT_THE_LEFT_OF_THE_ADDITION flag.
Tested *but not regression checked* on powerpc-unknown-linux-gnu.
Cheers,
Segher
2002-12-02 Segher Boessenkool <segher@koffie.nl>
* combine.c (combine_simplify_rtx): Simplify fused multiply-adds.
*** ../../gcc-20021007/gcc/combine.c Mon Sep 30 05:26:47 2002
--- ./combine.c Mon Dec 2 02:58:42 2002
*************** combine_simplify_rtx (x, op0_mode, last,
*** 4025,4033 ****
both +0, (minus Y X) is the same as (minus X Y). If the rounding
mode is towards +infinity (or -infinity) then the two expressions
will be rounded differently. */
if (GET_CODE (XEXP (x, 0)) == MINUS
&& !HONOR_SIGNED_ZEROS (mode)
! && !HONOR_SIGN_DEPENDENT_ROUNDING (mode))
return gen_binary (MINUS, mode, XEXP (XEXP (x, 0), 1),
XEXP (XEXP (x, 0), 0));
--- 4025,4037 ----
both +0, (minus Y X) is the same as (minus X Y). If the rounding
mode is towards +infinity (or -infinity) then the two expressions
will be rounded differently. */
+ /* Don't do this when we have (neg (minus (mult A B) C)), as this is
+ a valid fused multiply-add, and the result wouldn't be. */
if (GET_CODE (XEXP (x, 0)) == MINUS
&& !HONOR_SIGNED_ZEROS (mode)
! && !HONOR_SIGN_DEPENDENT_ROUNDING (mode)
! && (GET_CODE (XEXP (XEXP (x, 0), 0)) != MULT
! || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT))
return gen_binary (MINUS, mode, XEXP (XEXP (x, 0), 1),
XEXP (XEXP (x, 0), 0));
*************** combine_simplify_rtx (x, op0_mode, last,
*** 4332,4337 ****
--- 4336,4361 ----
gen_binary (MINUS, mode, XEXP (x, 0),
XEXP (XEXP (x, 1), 0)),
XEXP (XEXP (x, 1), 1));
+
+ /* (minus A (mult B C)) becomes (neg (minus (mult B C) A)),
+ because that is a valid fused multiply-add and the
+ original is not. */
+ if (FLOAT_MODE_P (mode) && flag_unsafe_math_optimizations
+ && GET_CODE (XEXP (x, 1)) == MULT && GET_CODE (XEXP (x, 0)) != MULT)
+ return simplify_gen_unary (NEG, mode,
+ gen_binary (MINUS, mode,
+ XEXP (x, 1),
+ XEXP (x, 0)),
+ mode);
+
+ /* (minus (neg A) B) becomes (neg (plus A B)). */
+ if ((! FLOAT_MODE_P (mode) || flag_unsafe_math_optimizations)
+ && GET_CODE (XEXP (x, 0)) == NEG)
+ return simplify_gen_unary (NEG, mode,
+ gen_binary (PLUS, mode,
+ XEXP (XEXP (x, 0), 0),
+ XEXP (x, 1)),
+ mode);
break;
case MULT:
*************** combine_simplify_rtx (x, op0_mode, last,
*** 4362,4367 ****
--- 4386,4400 ----
if (tem)
return gen_binary (DIV, mode, tem, XEXP (XEXP (x, 0), 1));
}
+
+ /* (mult (neg A) B) becomes (neg (mult A B)). */
+ if ((! FLOAT_MODE_P (mode) || flag_unsafe_math_optimizations)
+ && GET_CODE (XEXP (x, 0)) == NEG)
+ return simplify_gen_unary (NEG, mode,
+ gen_binary (MULT, mode,
+ XEXP (XEXP (x, 0), 0),
+ XEXP (x, 1)),
+ mode);
break;
case UDIV:
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2002-12-02 2:49 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-11-18 17:18 [3.4-bib, Patch] rs6000 floating point multiply-add instructions Andrew Pinski
2002-11-18 21:51 ` David Edelsohn
2002-11-19 10:44 ` Geoff Keating
2002-11-19 14:15 ` David Edelsohn
2002-11-19 14:45 ` Geoff Keating
2002-11-19 14:59 ` David Edelsohn
2002-11-28 13:29 ` Segher Boessenkool
2002-11-28 13:59 ` David Edelsohn
2002-11-28 14:47 ` David Edelsohn
2002-11-28 14:13 ` Segher Boessenkool
2002-11-30 3:43 ` Geoff Keating
2002-11-30 6:15 ` Geoff Keating
2002-12-01 18:49 ` Segher Boessenkool
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).