public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-7323] i386: Fix up copysign/xorsign expansion [PR104612]
@ 2022-02-22 9:42 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2022-02-22 9:42 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:7e691189ca9c04fdba71ceada1faba62afbc1463
commit r12-7323-g7e691189ca9c04fdba71ceada1faba62afbc1463
Author: Jakub Jelinek <jakub@redhat.com>
Date: Tue Feb 22 10:38:37 2022 +0100
i386: Fix up copysign/xorsign expansion [PR104612]
We ICE on the following testcase for -m32 since r12-3435. because
operands[2] is (subreg:SF (reg:DI ...) 0) and
lowpart_subreg (V4SFmode, operands[2], SFmode)
returns NULL, and that is what we use in AND etc. insns we emit.
My earlier version of the patch fixes that by calling force_reg for the
input operands, to make sure they are really REGs and so lowpart_subreg
will succeed on them - even for theoretical MEMs using REGs there seems
desirable, we don't want to read following memory slots for the paradoxical
subreg. For the outputs, I thought we'd get better code by always computing
result into a new pseudo and them move lowpart of that pseudo into dest.
Unfortunately it regressed
FAIL: gcc.target/i386/pr89984-2.c scan-assembler-not vmovaps
on which the patch changes:
vandps .LC0(%rip), %xmm1, %xmm1
- vxorps %xmm0, %xmm1, %xmm0
+ vxorps %xmm0, %xmm1, %xmm1
+ vmovaps %xmm1, %xmm0
ret
The RA sees:
(insn 8 4 9 2 (set (reg:V4SF 85)
(and:V4SF (subreg:V4SF (reg:SF 90) 0)
(mem/u/c:V4SF (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S16 A128]))) "pr89984-2.c":7:12 2838 {*andv4sf3}
(expr_list:REG_DEAD (reg:SF 90)
(nil)))
(insn 9 8 10 2 (set (reg:V4SF 87)
(xor:V4SF (reg:V4SF 85)
(subreg:V4SF (reg:SF 89) 0))) "pr89984-2.c":7:12 2842 {*xorv4sf3}
(expr_list:REG_DEAD (reg:SF 89)
(expr_list:REG_DEAD (reg:V4SF 85)
(nil))))
(insn 10 9 14 2 (set (reg:SF 82 [ <retval> ])
(subreg:SF (reg:V4SF 87) 0)) "pr89984-2.c":7:12 142 {*movsf_internal}
(expr_list:REG_DEAD (reg:V4SF 87)
(nil)))
(insn 14 10 15 2 (set (reg/i:SF 20 xmm0)
(reg:SF 82 [ <retval> ])) "pr89984-2.c":8:1 142 {*movsf_internal}
(expr_list:REG_DEAD (reg:SF 82 [ <retval> ])
(nil)))
(insn 15 14 0 2 (use (reg/i:SF 20 xmm0)) "pr89984-2.c":8:1 -1
(nil))
and doesn't know that if it would use xmm0 not just for pseudo 82
but also for pseudo 87, it could create a noop move in insn 10 and
so could avoid an extra register copy and nothing later on is able
to figure that out either. I don't know how the RA should know
that though.
So that we don't regress, this version of the patch
will do this stuff (i.e. use fresh vector pseudo as destination and
then move lowpart of that to dest) over what it used before (i.e.
use paradoxical subreg of the dest) only if lowpart_subreg returns NULL.
2022-02-22 Jakub Jelinek <jakub@redhat.com>
PR target/104612
* config/i386/i386-expand.cc (ix86_expand_copysign): Call force_reg
on input operands before calling lowpart_subreg on it. For output
operand, use a vmode pseudo as destination and then move its lowpart
subreg into operands[0] if lowpart_subreg fails on dest.
(ix86_expand_xorsign): Likewise.
* gcc.dg/pr104612.c: New test.
Diff:
---
gcc/config/i386/i386-expand.cc | 38 +++++++++++++++++++++++++++-----------
gcc/testsuite/gcc.dg/pr104612.c | 27 +++++++++++++++++++++++++++
2 files changed, 54 insertions(+), 11 deletions(-)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6cf1a0b9cb6..7f7055bcb43 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2153,7 +2153,7 @@ void
ix86_expand_copysign (rtx operands[])
{
machine_mode mode, vmode;
- rtx dest, op0, op1, mask, op2, op3;
+ rtx dest, vdest, op0, op1, mask, op2, op3;
mode = GET_MODE (operands[0]);
@@ -2174,8 +2174,13 @@ ix86_expand_copysign (rtx operands[])
return;
}
- dest = lowpart_subreg (vmode, operands[0], mode);
- op1 = lowpart_subreg (vmode, operands[2], mode);
+ dest = operands[0];
+ vdest = lowpart_subreg (vmode, dest, mode);
+ if (vdest == NULL_RTX)
+ vdest = gen_reg_rtx (vmode);
+ else
+ dest = NULL_RTX;
+ op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
mask = ix86_build_signbit_mask (vmode, 0, 0);
if (CONST_DOUBLE_P (operands[1]))
@@ -2184,7 +2189,9 @@ ix86_expand_copysign (rtx operands[])
/* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
if (op0 == CONST0_RTX (mode))
{
- emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
+ emit_move_insn (vdest, gen_rtx_AND (vmode, mask, op1));
+ if (dest)
+ emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
return;
}
@@ -2193,7 +2200,7 @@ ix86_expand_copysign (rtx operands[])
op0 = force_reg (vmode, op0);
}
else
- op0 = lowpart_subreg (vmode, operands[1], mode);
+ op0 = lowpart_subreg (vmode, force_reg (mode, operands[1]), mode);
op2 = gen_reg_rtx (vmode);
op3 = gen_reg_rtx (vmode);
@@ -2201,7 +2208,9 @@ ix86_expand_copysign (rtx operands[])
gen_rtx_NOT (vmode, mask),
op0));
emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
- emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
+ emit_move_insn (vdest, gen_rtx_IOR (vmode, op2, op3));
+ if (dest)
+ emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
}
/* Expand an xorsign operation. */
@@ -2210,7 +2219,7 @@ void
ix86_expand_xorsign (rtx operands[])
{
machine_mode mode, vmode;
- rtx dest, op0, op1, mask, x, temp;
+ rtx dest, vdest, op0, op1, mask, x, temp;
dest = operands[0];
op0 = operands[1];
@@ -2230,15 +2239,22 @@ ix86_expand_xorsign (rtx operands[])
temp = gen_reg_rtx (vmode);
mask = ix86_build_signbit_mask (vmode, 0, 0);
- op1 = lowpart_subreg (vmode, op1, mode);
+ op1 = lowpart_subreg (vmode, force_reg (mode, op1), mode);
x = gen_rtx_AND (vmode, op1, mask);
emit_insn (gen_rtx_SET (temp, x));
- op0 = lowpart_subreg (vmode, op0, mode);
+ op0 = lowpart_subreg (vmode, force_reg (mode, op0), mode);
x = gen_rtx_XOR (vmode, temp, op0);
- dest = lowpart_subreg (vmode, dest, mode);
- emit_insn (gen_rtx_SET (dest, x));
+ vdest = lowpart_subreg (vmode, dest, mode);
+ if (vdest == NULL_RTX)
+ vdest = gen_reg_rtx (vmode);
+ else
+ dest = NULL_RTX;
+ emit_insn (gen_rtx_SET (vdest, x));
+
+ if (dest)
+ emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode));
}
static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1);
diff --git a/gcc/testsuite/gcc.dg/pr104612.c b/gcc/testsuite/gcc.dg/pr104612.c
new file mode 100644
index 00000000000..7d055ed871b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104612.c
@@ -0,0 +1,27 @@
+/* PR target/104612 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2 -mfpmath=sse" { target i?86-*-* x86_64-*-* } } */
+
+struct V { float x, y; };
+
+struct V
+foo (struct V v)
+{
+ struct V ret;
+ ret.x = __builtin_copysignf (1.0e+0, v.x);
+ ret.y = __builtin_copysignf (1.0e+0, v.y);
+ return ret;
+}
+
+float
+bar (struct V v)
+{
+ return __builtin_copysignf (v.x, v.y);
+}
+
+float
+baz (struct V v)
+{
+ return v.x * __builtin_copysignf (1.0f, v.y);
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-02-22 9:42 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-22 9:42 [gcc r12-7323] i386: Fix up copysign/xorsign expansion [PR104612] Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).