* [PATCH] [i386] Remove copysign post_reload splitter for scalar modes.
@ 2021-09-09 7:54 liuhongt
2021-09-09 8:00 ` Hongtao Liu
0 siblings, 1 reply; 3+ messages in thread
From: liuhongt @ 2021-09-09 7:54 UTC (permalink / raw)
To: gcc-patches; +Cc: crazylht, hjl.tools, jakub
Hi:
As a follow up of [1], the patch removes all scalar mode copysign related
post_reload splitter/define_insn and expand copysign directly into below using
paradoxical subregs.
op3 = op1 & ~mask;
op4 = op2 & mask;
dest = op3 | op4;
It can sometimes generate better code just like avx512dq-abs-copysign-1.c
shows.
Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
gcc/ChangeLog:
* config/i386/i386-expand.c (ix86_expand_copysign): Expand
right into ANDNOT + AND + IOR, using paradoxical subregs.
(ix86_split_copysign_const): Remove.
(ix86_split_copysign_var): Ditto.
* config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.
(ix86_split_copysign_var): Ditto.
* config/i386/i386.md (@copysign<mode>3_const): Ditto.
(@copysign<mode>3_var): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.
* gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.
---
gcc/config/i386/i386-expand.c | 152 +++---------------
gcc/config/i386/i386-protos.h | 2 -
gcc/config/i386/i386.md | 44 -----
.../gcc.target/i386/avx512dq-abs-copysign-1.c | 4 +-
.../gcc.target/i386/avx512vl-abs-copysign-1.c | 4 +-
5 files changed, 30 insertions(+), 176 deletions(-)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index badbacc19d8..a0262a8f47d 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -2115,13 +2115,9 @@ void
ix86_expand_copysign (rtx operands[])
{
machine_mode mode, vmode;
- rtx dest, op0, op1, mask;
+ rtx dest, op0, op1, mask, op2, op3;
- dest = operands[0];
- op0 = operands[1];
- op1 = operands[2];
-
- mode = GET_MODE (dest);
+ mode = GET_MODE (operands[0]);
if (mode == SFmode)
vmode = V4SFmode;
@@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])
else
gcc_unreachable ();
- mask = ix86_build_signbit_mask (vmode, 0, 0);
-
- if (CONST_DOUBLE_P (op0))
+ if (rtx_equal_p (operands[1], operands[2]))
{
- if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
- op0 = simplify_unary_operation (ABS, mode, op0, mode);
-
- if (mode == SFmode || mode == DFmode)
- {
- if (op0 == CONST0_RTX (mode))
- op0 = CONST0_RTX (vmode);
- else
- {
- rtx v = ix86_build_const_vector (vmode, false, op0);
-
- op0 = force_reg (vmode, v);
- }
- }
- else if (op0 != CONST0_RTX (mode))
- op0 = force_reg (mode, op0);
-
- emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
- }
- else
- {
- rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
-
- emit_insn (gen_copysign3_var
- (mode, dest, NULL_RTX, op0, op1, nmask, mask));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
- be a constant, and so has already been expanded into a vector constant. */
-
-void
-ix86_split_copysign_const (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, op0, mask, x;
-
- dest = operands[0];
- op0 = operands[1];
- mask = operands[3];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- dest = lowpart_subreg (vmode, dest, mode);
- x = gen_rtx_AND (vmode, dest, mask);
- emit_insn (gen_rtx_SET (dest, x));
-
- if (op0 != CONST0_RTX (vmode))
- {
- x = gen_rtx_IOR (vmode, dest, op0);
- emit_insn (gen_rtx_SET (dest, x));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
- so we have to do two masks. */
-
-void
-ix86_split_copysign_var (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, scratch, op0, op1, mask, nmask, x;
-
- dest = operands[0];
- scratch = operands[1];
- op0 = operands[2];
- op1 = operands[3];
- nmask = operands[4];
- mask = operands[5];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- if (rtx_equal_p (op0, op1))
- {
- /* Shouldn't happen often (it's useless, obviously), but when it does
- we'd generate incorrect code if we continue below. */
- emit_move_insn (dest, op0);
+ emit_move_insn (operands[0], operands[1]);
return;
}
- if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
- {
- gcc_assert (REGNO (op1) == REGNO (scratch));
-
- x = gen_rtx_AND (vmode, scratch, mask);
- emit_insn (gen_rtx_SET (scratch, x));
+ dest = lowpart_subreg (vmode, operands[0], mode);
+ op1 = lowpart_subreg (vmode, operands[2], mode);
+ mask = ix86_build_signbit_mask (vmode, 0, 0);
- dest = mask;
- op0 = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op0);
- emit_insn (gen_rtx_SET (dest, x));
- }
- else
+ if (CONST_DOUBLE_P (operands[1]))
{
- if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
- {
- x = gen_rtx_AND (vmode, scratch, mask);
- }
- else /* alternative 2,4 */
+ op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
+ /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
+ if (op0 == CONST0_RTX (mode))
{
- gcc_assert (REGNO (mask) == REGNO (scratch));
- op1 = lowpart_subreg (vmode, op1, mode);
- x = gen_rtx_AND (vmode, scratch, op1);
+ emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
+ return;
}
- emit_insn (gen_rtx_SET (scratch, x));
- if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
- {
- dest = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_AND (vmode, dest, nmask);
- }
- else /* alternative 3,4 */
- {
- gcc_assert (REGNO (nmask) == REGNO (dest));
- dest = nmask;
- op0 = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_AND (vmode, dest, op0);
- }
- emit_insn (gen_rtx_SET (dest, x));
+ if (GET_MODE_SIZE (mode) < 16)
+ op0 = ix86_build_const_vector (vmode, false, op0);
+ op0 = force_reg (vmode, op0);
}
-
- x = gen_rtx_IOR (vmode, dest, scratch);
- emit_insn (gen_rtx_SET (dest, x));
+ else
+ op0 = lowpart_subreg (vmode, operands[1], mode);
+
+ op2 = gen_reg_rtx (vmode);
+ op3 = gen_reg_rtx (vmode);
+ emit_move_insn (op2, gen_rtx_AND (vmode,
+ gen_rtx_NOT (vmode, mask),
+ op0));
+ emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
+ emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
}
/* Expand an xorsign operation. */
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 72644e33a92..dcae34b915e 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,
extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
rtx[]);
extern void ix86_expand_copysign (rtx []);
-extern void ix86_split_copysign_const (rtx []);
-extern void ix86_split_copysign_var (rtx []);
extern void ix86_expand_xorsign (rtx []);
extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
extern bool ix86_match_ccmode (rtx, machine_mode);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6b4ceb2bce3..ba0058dad81 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"
|| (TARGET_SSE && (<MODE>mode == TFmode))"
"ix86_expand_copysign (operands); DONE;")
-(define_insn_and_split "@copysign<mode>3_const"
- [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
- (unspec:SSEMODEF
- [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
- (match_operand:SSEMODEF 2 "register_operand" "0")
- (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
- UNSPEC_COPYSIGN))]
- "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE && (<MODE>mode == TFmode))"
- "#"
- "&& reload_completed"
- [(const_int 0)]
- "ix86_split_copysign_const (operands); DONE;")
-
-(define_insn "@copysign<mode>3_var"
- [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
- (unspec:SSEMODEF
- [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")
- (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")
- (match_operand:<ssevecmodef> 4
- "nonimmediate_operand" "X,Yvm,Yvm,0,0")
- (match_operand:<ssevecmodef> 5
- "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
- UNSPEC_COPYSIGN))
- (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
- "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE && (<MODE>mode == TFmode))"
- "#")
-
-(define_split
- [(set (match_operand:SSEMODEF 0 "register_operand")
- (unspec:SSEMODEF
- [(match_operand:SSEMODEF 2 "register_operand")
- (match_operand:SSEMODEF 3 "register_operand")
- (match_operand:<ssevecmodef> 4)
- (match_operand:<ssevecmodef> 5)]
- UNSPEC_COPYSIGN))
- (clobber (match_scratch:<ssevecmodef> 1))]
- "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE && (<MODE>mode == TFmode)))
- && reload_completed"
- [(const_int 0)]
- "ix86_split_copysign_var (operands); DONE;")
-
(define_expand "xorsign<mode>3"
[(match_operand:MODEF 0 "register_operand")
(match_operand:MODEF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
index cb542d09058..0107df7741a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
@@ -64,8 +64,8 @@ f6 (double x)
}
/* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
-/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
/* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
/* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
-/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */
+/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
/* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
index b375c5fad80..b27335b9d99 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
@@ -64,8 +64,8 @@ f6 (double x)
}
/* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */
-/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */
+/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
/* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */
/* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */
-/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */
+/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
/* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */
--
2.27.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [i386] Remove copysign post_reload splitter for scalar modes.
2021-09-09 7:54 [PATCH] [i386] Remove copysign post_reload splitter for scalar modes liuhongt
@ 2021-09-09 8:00 ` Hongtao Liu
2021-09-10 5:04 ` Hongtao Liu
0 siblings, 1 reply; 3+ messages in thread
From: Hongtao Liu @ 2021-09-09 8:00 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, H. J. Lu, Jakub Jelinek
On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Hi:
> As a follow up of [1], the patch removes all scalar mode copysign related
> post_reload splitter/define_insn and expand copysign directly into below using
> paradoxical subregs.
[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html
>
> op3 = op1 & ~mask;
> op4 = op2 & mask;
> dest = op3 | op4;
>
> It can sometimes generate better code just like avx512dq-abs-copysign-1.c
> shows.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
>
> gcc/ChangeLog:
>
> * config/i386/i386-expand.c (ix86_expand_copysign): Expand
> right into ANDNOT + AND + IOR, using paradoxical subregs.
> (ix86_split_copysign_const): Remove.
> (ix86_split_copysign_var): Ditto.
> * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.
> (ix86_split_copysign_var): Ditto.
> * config/i386/i386.md (@copysign<mode>3_const): Ditto.
> (@copysign<mode>3_var): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.
> * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.
> ---
> gcc/config/i386/i386-expand.c | 152 +++---------------
> gcc/config/i386/i386-protos.h | 2 -
> gcc/config/i386/i386.md | 44 -----
> .../gcc.target/i386/avx512dq-abs-copysign-1.c | 4 +-
> .../gcc.target/i386/avx512vl-abs-copysign-1.c | 4 +-
> 5 files changed, 30 insertions(+), 176 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index badbacc19d8..a0262a8f47d 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -2115,13 +2115,9 @@ void
> ix86_expand_copysign (rtx operands[])
> {
> machine_mode mode, vmode;
> - rtx dest, op0, op1, mask;
> + rtx dest, op0, op1, mask, op2, op3;
>
> - dest = operands[0];
> - op0 = operands[1];
> - op1 = operands[2];
> -
> - mode = GET_MODE (dest);
> + mode = GET_MODE (operands[0]);
>
> if (mode == SFmode)
> vmode = V4SFmode;
> @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])
> else
> gcc_unreachable ();
>
> - mask = ix86_build_signbit_mask (vmode, 0, 0);
> -
> - if (CONST_DOUBLE_P (op0))
> + if (rtx_equal_p (operands[1], operands[2]))
> {
> - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
> - op0 = simplify_unary_operation (ABS, mode, op0, mode);
> -
> - if (mode == SFmode || mode == DFmode)
> - {
> - if (op0 == CONST0_RTX (mode))
> - op0 = CONST0_RTX (vmode);
> - else
> - {
> - rtx v = ix86_build_const_vector (vmode, false, op0);
> -
> - op0 = force_reg (vmode, v);
> - }
> - }
> - else if (op0 != CONST0_RTX (mode))
> - op0 = force_reg (mode, op0);
> -
> - emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
> - }
> - else
> - {
> - rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
> -
> - emit_insn (gen_copysign3_var
> - (mode, dest, NULL_RTX, op0, op1, nmask, mask));
> - }
> -}
> -
> -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
> - be a constant, and so has already been expanded into a vector constant. */
> -
> -void
> -ix86_split_copysign_const (rtx operands[])
> -{
> - machine_mode mode, vmode;
> - rtx dest, op0, mask, x;
> -
> - dest = operands[0];
> - op0 = operands[1];
> - mask = operands[3];
> -
> - mode = GET_MODE (dest);
> - vmode = GET_MODE (mask);
> -
> - dest = lowpart_subreg (vmode, dest, mode);
> - x = gen_rtx_AND (vmode, dest, mask);
> - emit_insn (gen_rtx_SET (dest, x));
> -
> - if (op0 != CONST0_RTX (vmode))
> - {
> - x = gen_rtx_IOR (vmode, dest, op0);
> - emit_insn (gen_rtx_SET (dest, x));
> - }
> -}
> -
> -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
> - so we have to do two masks. */
> -
> -void
> -ix86_split_copysign_var (rtx operands[])
> -{
> - machine_mode mode, vmode;
> - rtx dest, scratch, op0, op1, mask, nmask, x;
> -
> - dest = operands[0];
> - scratch = operands[1];
> - op0 = operands[2];
> - op1 = operands[3];
> - nmask = operands[4];
> - mask = operands[5];
> -
> - mode = GET_MODE (dest);
> - vmode = GET_MODE (mask);
> -
> - if (rtx_equal_p (op0, op1))
> - {
> - /* Shouldn't happen often (it's useless, obviously), but when it does
> - we'd generate incorrect code if we continue below. */
> - emit_move_insn (dest, op0);
> + emit_move_insn (operands[0], operands[1]);
> return;
> }
>
> - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
> - {
> - gcc_assert (REGNO (op1) == REGNO (scratch));
> -
> - x = gen_rtx_AND (vmode, scratch, mask);
> - emit_insn (gen_rtx_SET (scratch, x));
> + dest = lowpart_subreg (vmode, operands[0], mode);
> + op1 = lowpart_subreg (vmode, operands[2], mode);
> + mask = ix86_build_signbit_mask (vmode, 0, 0);
>
> - dest = mask;
> - op0 = lowpart_subreg (vmode, op0, mode);
> - x = gen_rtx_NOT (vmode, dest);
> - x = gen_rtx_AND (vmode, x, op0);
> - emit_insn (gen_rtx_SET (dest, x));
> - }
> - else
> + if (CONST_DOUBLE_P (operands[1]))
> {
> - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
> - {
> - x = gen_rtx_AND (vmode, scratch, mask);
> - }
> - else /* alternative 2,4 */
> + op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
> + /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
> + if (op0 == CONST0_RTX (mode))
> {
> - gcc_assert (REGNO (mask) == REGNO (scratch));
> - op1 = lowpart_subreg (vmode, op1, mode);
> - x = gen_rtx_AND (vmode, scratch, op1);
> + emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
> + return;
> }
> - emit_insn (gen_rtx_SET (scratch, x));
>
> - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
> - {
> - dest = lowpart_subreg (vmode, op0, mode);
> - x = gen_rtx_AND (vmode, dest, nmask);
> - }
> - else /* alternative 3,4 */
> - {
> - gcc_assert (REGNO (nmask) == REGNO (dest));
> - dest = nmask;
> - op0 = lowpart_subreg (vmode, op0, mode);
> - x = gen_rtx_AND (vmode, dest, op0);
> - }
> - emit_insn (gen_rtx_SET (dest, x));
> + if (GET_MODE_SIZE (mode) < 16)
> + op0 = ix86_build_const_vector (vmode, false, op0);
> + op0 = force_reg (vmode, op0);
> }
> -
> - x = gen_rtx_IOR (vmode, dest, scratch);
> - emit_insn (gen_rtx_SET (dest, x));
> + else
> + op0 = lowpart_subreg (vmode, operands[1], mode);
> +
> + op2 = gen_reg_rtx (vmode);
> + op3 = gen_reg_rtx (vmode);
> + emit_move_insn (op2, gen_rtx_AND (vmode,
> + gen_rtx_NOT (vmode, mask),
> + op0));
> + emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
> + emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
> }
>
> /* Expand an xorsign operation. */
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 72644e33a92..dcae34b915e 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,
> extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
> rtx[]);
> extern void ix86_expand_copysign (rtx []);
> -extern void ix86_split_copysign_const (rtx []);
> -extern void ix86_split_copysign_var (rtx []);
> extern void ix86_expand_xorsign (rtx []);
> extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
> extern bool ix86_match_ccmode (rtx, machine_mode);
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 6b4ceb2bce3..ba0058dad81 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"
> || (TARGET_SSE && (<MODE>mode == TFmode))"
> "ix86_expand_copysign (operands); DONE;")
>
> -(define_insn_and_split "@copysign<mode>3_const"
> - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
> - (unspec:SSEMODEF
> - [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
> - (match_operand:SSEMODEF 2 "register_operand" "0")
> - (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
> - UNSPEC_COPYSIGN))]
> - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> - || (TARGET_SSE && (<MODE>mode == TFmode))"
> - "#"
> - "&& reload_completed"
> - [(const_int 0)]
> - "ix86_split_copysign_const (operands); DONE;")
> -
> -(define_insn "@copysign<mode>3_var"
> - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
> - (unspec:SSEMODEF
> - [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")
> - (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")
> - (match_operand:<ssevecmodef> 4
> - "nonimmediate_operand" "X,Yvm,Yvm,0,0")
> - (match_operand:<ssevecmodef> 5
> - "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
> - UNSPEC_COPYSIGN))
> - (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
> - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> - || (TARGET_SSE && (<MODE>mode == TFmode))"
> - "#")
> -
> -(define_split
> - [(set (match_operand:SSEMODEF 0 "register_operand")
> - (unspec:SSEMODEF
> - [(match_operand:SSEMODEF 2 "register_operand")
> - (match_operand:SSEMODEF 3 "register_operand")
> - (match_operand:<ssevecmodef> 4)
> - (match_operand:<ssevecmodef> 5)]
> - UNSPEC_COPYSIGN))
> - (clobber (match_scratch:<ssevecmodef> 1))]
> - "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> - || (TARGET_SSE && (<MODE>mode == TFmode)))
> - && reload_completed"
> - [(const_int 0)]
> - "ix86_split_copysign_var (operands); DONE;")
> -
> (define_expand "xorsign<mode>3"
> [(match_operand:MODEF 0 "register_operand")
> (match_operand:MODEF 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> index cb542d09058..0107df7741a 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> @@ -64,8 +64,8 @@ f6 (double x)
> }
>
> /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
> -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */
> +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
> /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
> -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */
> +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> index b375c5fad80..b27335b9d99 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> @@ -64,8 +64,8 @@ f6 (double x)
> }
>
> /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */
> -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */
> +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */
> /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */
> -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */
> +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */
> --
> 2.27.0
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [i386] Remove copysign post_reload splitter for scalar modes.
2021-09-09 8:00 ` Hongtao Liu
@ 2021-09-10 5:04 ` Hongtao Liu
0 siblings, 0 replies; 3+ messages in thread
From: Hongtao Liu @ 2021-09-10 5:04 UTC (permalink / raw)
To: liuhongt; +Cc: GCC Patches, H. J. Lu, Jakub Jelinek
On Thu, Sep 9, 2021 at 4:00 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Thu, Sep 9, 2021 at 3:54 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > Hi:
> > As a follow up of [1], the patch removes all scalar mode copysign related
> > post_reload splitter/define_insn and expand copysign directly into below using
> > paradoxical subregs.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579057.html
> >
> > op3 = op1 & ~mask;
> > op4 = op2 & mask;
> > dest = op3 | op4;
> >
> > It can sometimes generate better code just like avx512dq-abs-copysign-1.c
> > shows.
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> >
Committed.
> > gcc/ChangeLog:
> >
> > * config/i386/i386-expand.c (ix86_expand_copysign): Expand
> > right into ANDNOT + AND + IOR, using paradoxical subregs.
> > (ix86_split_copysign_const): Remove.
> > (ix86_split_copysign_var): Ditto.
> > * config/i386/i386-protos.h (ix86_split_copysign_const): Dotto.
> > (ix86_split_copysign_var): Ditto.
> > * config/i386/i386.md (@copysign<mode>3_const): Ditto.
> > (@copysign<mode>3_var): Ditto.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/avx512dq-abs-copysign-1.c: Adjust testcase.
> > * gcc.target/i386/avx512vl-abs-copysign-1.c: Adjust testcase.
> > ---
> > gcc/config/i386/i386-expand.c | 152 +++---------------
> > gcc/config/i386/i386-protos.h | 2 -
> > gcc/config/i386/i386.md | 44 -----
> > .../gcc.target/i386/avx512dq-abs-copysign-1.c | 4 +-
> > .../gcc.target/i386/avx512vl-abs-copysign-1.c | 4 +-
> > 5 files changed, 30 insertions(+), 176 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> > index badbacc19d8..a0262a8f47d 100644
> > --- a/gcc/config/i386/i386-expand.c
> > +++ b/gcc/config/i386/i386-expand.c
> > @@ -2115,13 +2115,9 @@ void
> > ix86_expand_copysign (rtx operands[])
> > {
> > machine_mode mode, vmode;
> > - rtx dest, op0, op1, mask;
> > + rtx dest, op0, op1, mask, op2, op3;
> >
> > - dest = operands[0];
> > - op0 = operands[1];
> > - op1 = operands[2];
> > -
> > - mode = GET_MODE (dest);
> > + mode = GET_MODE (operands[0]);
> >
> > if (mode == SFmode)
> > vmode = V4SFmode;
> > @@ -2132,136 +2128,40 @@ ix86_expand_copysign (rtx operands[])
> > else
> > gcc_unreachable ();
> >
> > - mask = ix86_build_signbit_mask (vmode, 0, 0);
> > -
> > - if (CONST_DOUBLE_P (op0))
> > + if (rtx_equal_p (operands[1], operands[2]))
> > {
> > - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
> > - op0 = simplify_unary_operation (ABS, mode, op0, mode);
> > -
> > - if (mode == SFmode || mode == DFmode)
> > - {
> > - if (op0 == CONST0_RTX (mode))
> > - op0 = CONST0_RTX (vmode);
> > - else
> > - {
> > - rtx v = ix86_build_const_vector (vmode, false, op0);
> > -
> > - op0 = force_reg (vmode, v);
> > - }
> > - }
> > - else if (op0 != CONST0_RTX (mode))
> > - op0 = force_reg (mode, op0);
> > -
> > - emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
> > - }
> > - else
> > - {
> > - rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
> > -
> > - emit_insn (gen_copysign3_var
> > - (mode, dest, NULL_RTX, op0, op1, nmask, mask));
> > - }
> > -}
> > -
> > -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
> > - be a constant, and so has already been expanded into a vector constant. */
> > -
> > -void
> > -ix86_split_copysign_const (rtx operands[])
> > -{
> > - machine_mode mode, vmode;
> > - rtx dest, op0, mask, x;
> > -
> > - dest = operands[0];
> > - op0 = operands[1];
> > - mask = operands[3];
> > -
> > - mode = GET_MODE (dest);
> > - vmode = GET_MODE (mask);
> > -
> > - dest = lowpart_subreg (vmode, dest, mode);
> > - x = gen_rtx_AND (vmode, dest, mask);
> > - emit_insn (gen_rtx_SET (dest, x));
> > -
> > - if (op0 != CONST0_RTX (vmode))
> > - {
> > - x = gen_rtx_IOR (vmode, dest, op0);
> > - emit_insn (gen_rtx_SET (dest, x));
> > - }
> > -}
> > -
> > -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
> > - so we have to do two masks. */
> > -
> > -void
> > -ix86_split_copysign_var (rtx operands[])
> > -{
> > - machine_mode mode, vmode;
> > - rtx dest, scratch, op0, op1, mask, nmask, x;
> > -
> > - dest = operands[0];
> > - scratch = operands[1];
> > - op0 = operands[2];
> > - op1 = operands[3];
> > - nmask = operands[4];
> > - mask = operands[5];
> > -
> > - mode = GET_MODE (dest);
> > - vmode = GET_MODE (mask);
> > -
> > - if (rtx_equal_p (op0, op1))
> > - {
> > - /* Shouldn't happen often (it's useless, obviously), but when it does
> > - we'd generate incorrect code if we continue below. */
> > - emit_move_insn (dest, op0);
> > + emit_move_insn (operands[0], operands[1]);
> > return;
> > }
> >
> > - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
> > - {
> > - gcc_assert (REGNO (op1) == REGNO (scratch));
> > -
> > - x = gen_rtx_AND (vmode, scratch, mask);
> > - emit_insn (gen_rtx_SET (scratch, x));
> > + dest = lowpart_subreg (vmode, operands[0], mode);
> > + op1 = lowpart_subreg (vmode, operands[2], mode);
> > + mask = ix86_build_signbit_mask (vmode, 0, 0);
> >
> > - dest = mask;
> > - op0 = lowpart_subreg (vmode, op0, mode);
> > - x = gen_rtx_NOT (vmode, dest);
> > - x = gen_rtx_AND (vmode, x, op0);
> > - emit_insn (gen_rtx_SET (dest, x));
> > - }
> > - else
> > + if (CONST_DOUBLE_P (operands[1]))
> > {
> > - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
> > - {
> > - x = gen_rtx_AND (vmode, scratch, mask);
> > - }
> > - else /* alternative 2,4 */
> > + op0 = simplify_unary_operation (ABS, mode, operands[1], mode);
> > + /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */
> > + if (op0 == CONST0_RTX (mode))
> > {
> > - gcc_assert (REGNO (mask) == REGNO (scratch));
> > - op1 = lowpart_subreg (vmode, op1, mode);
> > - x = gen_rtx_AND (vmode, scratch, op1);
> > + emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1));
> > + return;
> > }
> > - emit_insn (gen_rtx_SET (scratch, x));
> >
> > - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
> > - {
> > - dest = lowpart_subreg (vmode, op0, mode);
> > - x = gen_rtx_AND (vmode, dest, nmask);
> > - }
> > - else /* alternative 3,4 */
> > - {
> > - gcc_assert (REGNO (nmask) == REGNO (dest));
> > - dest = nmask;
> > - op0 = lowpart_subreg (vmode, op0, mode);
> > - x = gen_rtx_AND (vmode, dest, op0);
> > - }
> > - emit_insn (gen_rtx_SET (dest, x));
> > + if (GET_MODE_SIZE (mode) < 16)
> > + op0 = ix86_build_const_vector (vmode, false, op0);
> > + op0 = force_reg (vmode, op0);
> > }
> > -
> > - x = gen_rtx_IOR (vmode, dest, scratch);
> > - emit_insn (gen_rtx_SET (dest, x));
> > + else
> > + op0 = lowpart_subreg (vmode, operands[1], mode);
> > +
> > + op2 = gen_reg_rtx (vmode);
> > + op3 = gen_reg_rtx (vmode);
> > + emit_move_insn (op2, gen_rtx_AND (vmode,
> > + gen_rtx_NOT (vmode, mask),
> > + op0));
> > + emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1));
> > + emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3));
> > }
> >
> > /* Expand an xorsign operation. */
> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > index 72644e33a92..dcae34b915e 100644
> > --- a/gcc/config/i386/i386-protos.h
> > +++ b/gcc/config/i386/i386-protos.h
> > @@ -135,8 +135,6 @@ extern void ix86_expand_fp_absneg_operator (enum rtx_code, machine_mode,
> > extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode,
> > rtx[]);
> > extern void ix86_expand_copysign (rtx []);
> > -extern void ix86_split_copysign_const (rtx []);
> > -extern void ix86_split_copysign_var (rtx []);
> > extern void ix86_expand_xorsign (rtx []);
> > extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
> > extern bool ix86_match_ccmode (rtx, machine_mode);
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 6b4ceb2bce3..ba0058dad81 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -10861,50 +10861,6 @@ (define_expand "copysign<mode>3"
> > || (TARGET_SSE && (<MODE>mode == TFmode))"
> > "ix86_expand_copysign (operands); DONE;")
> >
> > -(define_insn_and_split "@copysign<mode>3_const"
> > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv")
> > - (unspec:SSEMODEF
> > - [(match_operand:<ssevecmodef> 1 "nonimm_or_0_operand" "YvmC")
> > - (match_operand:SSEMODEF 2 "register_operand" "0")
> > - (match_operand:<ssevecmodef> 3 "nonimmediate_operand" "Yvm")]
> > - UNSPEC_COPYSIGN))]
> > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > - || (TARGET_SSE && (<MODE>mode == TFmode))"
> > - "#"
> > - "&& reload_completed"
> > - [(const_int 0)]
> > - "ix86_split_copysign_const (operands); DONE;")
> > -
> > -(define_insn "@copysign<mode>3_var"
> > - [(set (match_operand:SSEMODEF 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv")
> > - (unspec:SSEMODEF
> > - [(match_operand:SSEMODEF 2 "register_operand" "Yv,0,0,Yv,Yv")
> > - (match_operand:SSEMODEF 3 "register_operand" "1,1,Yv,1,Yv")
> > - (match_operand:<ssevecmodef> 4
> > - "nonimmediate_operand" "X,Yvm,Yvm,0,0")
> > - (match_operand:<ssevecmodef> 5
> > - "nonimmediate_operand" "0,Yvm,1,Yvm,1")]
> > - UNSPEC_COPYSIGN))
> > - (clobber (match_scratch:<ssevecmodef> 1 "=Yv,Yv,Yv,Yv,Yv"))]
> > - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > - || (TARGET_SSE && (<MODE>mode == TFmode))"
> > - "#")
> > -
> > -(define_split
> > - [(set (match_operand:SSEMODEF 0 "register_operand")
> > - (unspec:SSEMODEF
> > - [(match_operand:SSEMODEF 2 "register_operand")
> > - (match_operand:SSEMODEF 3 "register_operand")
> > - (match_operand:<ssevecmodef> 4)
> > - (match_operand:<ssevecmodef> 5)]
> > - UNSPEC_COPYSIGN))
> > - (clobber (match_scratch:<ssevecmodef> 1))]
> > - "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
> > - || (TARGET_SSE && (<MODE>mode == TFmode)))
> > - && reload_completed"
> > - [(const_int 0)]
> > - "ix86_split_copysign_var (operands); DONE;")
> > -
> > (define_expand "xorsign<mode>3"
> > [(match_operand:MODEF 0 "register_operand")
> > (match_operand:MODEF 1 "register_operand")
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > index cb542d09058..0107df7741a 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512dq-abs-copysign-1.c
> > @@ -64,8 +64,8 @@ f6 (double x)
> > }
> >
> > /* { dg-final { scan-assembler "vandps\[^\n\r\]*xmm16" } } */
> > -/* { dg-final { scan-assembler "vorps\[^\n\r\]*xmm16" } } */
> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> > /* { dg-final { scan-assembler "vxorps\[^\n\r\]*xmm16" } } */
> > /* { dg-final { scan-assembler "vandpd\[^\n\r\]*xmm18" } } */
> > -/* { dg-final { scan-assembler "vorpd\[^\n\r\]*xmm18" } } */
> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> > /* { dg-final { scan-assembler "vxorpd\[^\n\r\]*xmm18" } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > index b375c5fad80..b27335b9d99 100644
> > --- a/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-abs-copysign-1.c
> > @@ -64,8 +64,8 @@ f6 (double x)
> > }
> >
> > /* { dg-final { scan-assembler "vpandd\[^\n\r\]*xmm16" } } */
> > -/* { dg-final { scan-assembler "vpord\[^\n\r\]*xmm16" } } */
> > +/* { dg-final { scan-assembler "vpternlogd\[^\n\r\]*xmm16" } } */
> > /* { dg-final { scan-assembler "vpxord\[^\n\r\]*xmm16" } } */
> > /* { dg-final { scan-assembler "vpandq\[^\n\r\]*xmm18" } } */
> > -/* { dg-final { scan-assembler "vporq\[^\n\r\]*xmm18" } } */
> > +/* { dg-final { scan-assembler "vpternlogq\[^\n\r\]*xmm18" } } */
> > /* { dg-final { scan-assembler "vpxorq\[^\n\r\]*xmm18" } } */
> > --
> > 2.27.0
> >
>
>
> --
> BR,
> Hongtao
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-09-10 4:59 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-09 7:54 [PATCH] [i386] Remove copysign post_reload splitter for scalar modes liuhongt
2021-09-09 8:00 ` Hongtao Liu
2021-09-10 5:04 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).