public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] amdgcn: Fix addsub bug
@ 2023-04-27 16:38 Andrew Stubbs
  0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2023-04-27 16:38 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 420 bytes --]

I've committed this patch to fix a couple of bugs introduced in the 
recent CMul patch.

First, the fmsubadd insn was accidentally all adds and no substracts.

Second, there were input dependencies on the undefined output register 
which caused the compiler to reserve unnecessary slots in the stack-frame.

Both issues are now fixed.

This patch is already committed to OG12. I'll backport it to GCC 13 shortly.

Andrew

[-- Attachment #2: 230427-fix-addsub.patch --]
[-- Type: text/plain, Size: 4276 bytes --]

amdgcn: Fix addsub bug

The vec_fmsubadd instuction actually had add twice, by mistake.

Also improve code-gen for all the complex patterns by using properly
undefined values.  Mostly this just prevents the compiler reserving space
in the stack frame.

gcc/ChangeLog:

	* config/gcn/gcn-valu.md (cmul<conj_op><mode>3): Use gcn_gen_undef.
	(cml<addsub_as><mode>4): Likewise.
	(vec_addsub<mode>3): Likewise.
	(cadd<rot><mode>3): Likewise.
	(vec_fmaddsub<mode>4): Likewise.
	(vec_fmsubadd<mode>4): Likewise, and use sub for the odd lanes.

diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 44c48468dd6..7290cdc2fd0 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -2323,8 +2323,9 @@ (define_expand "cmul<conj_op><mode>3"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm, dest, even));
-                                                             // a*c-b*d 0
+    emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
+                                              gcn_gen_undef (<MODE>mode),
+                                              even));            // a*c-b*d 0
 
     rtx t2_perm = gen_reg_rtx (<MODE>mode);
     emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2));          // b*c a*d
@@ -2368,7 +2369,8 @@ (define_expand "cml<addsub_as><mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm, dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
+                                    gcn_gen_undef (<MODE>mode), even));
 
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
@@ -2392,7 +2394,8 @@ (define_expand "vec_addsub<mode>3"
     rtx dest = operands[0];
     rtx x = operands[1];
     rtx y = operands[2];
-    emit_insn (gen_sub<mode>3_exec (dest, x, y, dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
+                                    even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
@@ -2419,7 +2422,9 @@ (define_expand "cadd<rot><mode>3"
 
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
-    emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y, dest, even));
+    emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
+                                              gcn_gen_undef (<MODE>mode),
+                                              even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
@@ -2439,7 +2444,8 @@ (define_expand "vec_fmaddsub<mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, even));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
+                                    gcn_gen_undef (<MODE>mode), even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
     emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
@@ -2459,10 +2465,11 @@ (define_expand "vec_fmsubadd<mode>4"
     rtx even = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (even, get_exec (0x5555555555555555UL));
     rtx dest = operands[0];
-    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, even));
+    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
+                                    gcn_gen_undef (<MODE>mode), even));
     rtx odd = gen_rtx_REG (DImode, EXEC_REG);
     emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
-    emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
+    emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
 
     DONE;
   })

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-27 16:38 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-27 16:38 [committed] amdgcn: Fix addsub bug Andrew Stubbs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).