* [PATCH v2] LoongArch: Remove redundant sign extension instructions caused by SLT instructions.
@ 2023-08-25 9:31 Lulu Cheng
2023-08-28 2:37 ` [pushed][PATCH " chenglulu
0 siblings, 1 reply; 2+ messages in thread
From: Lulu Cheng @ 2023-08-25 9:31 UTC (permalink / raw)
To: gcc-patches; +Cc: xry111, i, xuchenghua, Lulu Cheng
v1 -> v2:
1. Modify description information
Since the SLT instruction does not distinguish between 64-bit operations and 32-bit
operations under the 64-bit LoongArch architecture, if the operand of slt is SImode,
the sign extension of the operand needs to be displayed.
But similar to the test case below, the sign extension is redundant:
extern int src1, src2, src3;
int
test (void)
{
int data1 = src1 + src2;
int data2 = src1 + src3;
return data1 > data2 ? data1 : data2;
}
Assembly code before optimization:
...
add.w $r4,$r4,$r14
add.w $r13,$r13,$r14
slli.w $r12,$r4,0
slli.w $r14,$r13,0
slt $r12,$r12,$r14
masknez $r4,$r4,$r12
maskeqz $r12,$r13,$r12
or $r4,$r4,$r12
slli.w $r4,$r4,0
...
After optimization:
...
add.w $r12,$r12,$r14
add.w $r13,$r13,$r14
slt $r4,$r12,$r13
masknez $r12,$r12,$r4
maskeqz $r4,$r13,$r4
or $r4,$r12,$r4
...
Similar to this test example, the two operands of SLT are obtained by the
addition operation, and add.w implicitly sign-extends, so the two operands
of SLT do not require sign-extend.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Optimize the function implementation.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/slt-sign-extend.c: New test.
---
gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
.../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
2 files changed, 63 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 86d58784113..1905599b9e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
enum rtx_code code = GET_CODE (operands[1]);
rtx op0 = XEXP (operands[1], 0);
rtx op1 = XEXP (operands[1], 1);
+ rtx op0_extend = op0;
+ rtx op1_extend = op1;
+
+ /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
+ bool promote_p = false;
+ machine_mode mode = GET_MODE (operands[0]);
if (FLOAT_MODE_P (GET_MODE (op1)))
loongarch_emit_float_compare (&code, &op0, &op1);
else
{
+ if ((REGNO (op0) == REGNO (operands[2])
+ || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+ && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+ {
+ mode = word_mode;
+ promote_p = true;
+ }
+
loongarch_extend_comparands (code, &op0, &op1);
op0 = force_reg (word_mode, op0);
+ op0_extend = op0;
+ op1_extend = force_reg (word_mode, op1);
if (code == EQ || code == NE)
{
@@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
&& register_operand (operands[2], VOIDmode)
&& register_operand (operands[3], VOIDmode))
{
- machine_mode mode = GET_MODE (operands[0]);
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+
+ if (promote_p)
+ {
+ if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+ op2 = op0_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op2, &const0_rtx);
+ op2 = force_reg (mode, op2);
+ }
+
+ if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+ op3 = op1_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op3, &const0_rtx);
+ op3 = force_reg (mode, op3);
+ }
+ }
+
rtx temp = gen_reg_rtx (mode);
rtx temp2 = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (temp,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[2], const0_rtx)));
+ op2, const0_rtx)));
/* Flip the test for the second operand. */
cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
emit_insn (gen_rtx_SET (temp2,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[3], const0_rtx)));
+ op3, const0_rtx)));
/* Merge the two results, at least one is guaranteed to be zero. */
- emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+ if (promote_p)
+ {
+ rtx temp3 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+ temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+ loongarch_emit_move (operands[0], temp3);
+ }
+ else
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
}
else
emit_insn (gen_rtx_SET (operands[0],
diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
new file mode 100644
index 00000000000..ea6b28b7c45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+ int data1 = src1 + src2;
+ int data2 = src1 + src3;
+
+ return data1 > data2 ? data1 : data2;
+}
--
2.31.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [pushed][PATCH v2] LoongArch: Remove redundant sign extension instructions caused by SLT instructions.
2023-08-25 9:31 [PATCH v2] LoongArch: Remove redundant sign extension instructions caused by SLT instructions Lulu Cheng
@ 2023-08-28 2:37 ` chenglulu
0 siblings, 0 replies; 2+ messages in thread
From: chenglulu @ 2023-08-28 2:37 UTC (permalink / raw)
To: gcc-patches; +Cc: xry111, i, xuchenghua
Pushed to r14-3511.
在 2023/8/25 下午5:31, Lulu Cheng 写道:
> v1 -> v2:
> 1. Modify description information
>
>
> Since the SLT instruction does not distinguish between 64-bit operations and 32-bit
> operations under the 64-bit LoongArch architecture, if the operand of slt is SImode,
> the sign extension of the operand needs to be displayed.
>
> But similar to the test case below, the sign extension is redundant:
>
> extern int src1, src2, src3;
>
> int
> test (void)
> {
> int data1 = src1 + src2;
> int data2 = src1 + src3;
> return data1 > data2 ? data1 : data2;
> }
> Assembly code before optimization:
> ...
> add.w $r4,$r4,$r14
> add.w $r13,$r13,$r14
> slli.w $r12,$r4,0
> slli.w $r14,$r13,0
> slt $r12,$r12,$r14
> masknez $r4,$r4,$r12
> maskeqz $r12,$r13,$r12
> or $r4,$r4,$r12
> slli.w $r4,$r4,0
> ...
>
> After optimization:
> ...
> add.w $r12,$r12,$r14
> add.w $r13,$r13,$r14
> slt $r4,$r12,$r13
> masknez $r12,$r12,$r4
> maskeqz $r4,$r13,$r4
> or $r4,$r12,$r4
> ...
>
> Similar to this test example, the two operands of SLT are obtained by the
> addition operation, and add.w implicitly sign-extends, so the two operands
> of SLT do not require sign-extend.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
> Optimize the function implementation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/loongarch/slt-sign-extend.c: New test.
> ---
> gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
> .../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
> 2 files changed, 63 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 86d58784113..1905599b9e8 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
> enum rtx_code code = GET_CODE (operands[1]);
> rtx op0 = XEXP (operands[1], 0);
> rtx op1 = XEXP (operands[1], 1);
> + rtx op0_extend = op0;
> + rtx op1_extend = op1;
> +
> + /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
> + bool promote_p = false;
> + machine_mode mode = GET_MODE (operands[0]);
>
> if (FLOAT_MODE_P (GET_MODE (op1)))
> loongarch_emit_float_compare (&code, &op0, &op1);
> else
> {
> + if ((REGNO (op0) == REGNO (operands[2])
> + || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
> + && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
> + {
> + mode = word_mode;
> + promote_p = true;
> + }
> +
> loongarch_extend_comparands (code, &op0, &op1);
>
> op0 = force_reg (word_mode, op0);
> + op0_extend = op0;
> + op1_extend = force_reg (word_mode, op1);
>
> if (code == EQ || code == NE)
> {
> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
> && register_operand (operands[2], VOIDmode)
> && register_operand (operands[3], VOIDmode))
> {
> - machine_mode mode = GET_MODE (operands[0]);
> + rtx op2 = operands[2];
> + rtx op3 = operands[3];
> +
> + if (promote_p)
> + {
> + if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
> + op2 = op0_extend;
> + else
> + {
> + loongarch_extend_comparands (code, &op2, &const0_rtx);
> + op2 = force_reg (mode, op2);
> + }
> +
> + if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
> + op3 = op1_extend;
> + else
> + {
> + loongarch_extend_comparands (code, &op3, &const0_rtx);
> + op3 = force_reg (mode, op3);
> + }
> + }
> +
> rtx temp = gen_reg_rtx (mode);
> rtx temp2 = gen_reg_rtx (mode);
>
> emit_insn (gen_rtx_SET (temp,
> gen_rtx_IF_THEN_ELSE (mode, cond,
> - operands[2], const0_rtx)));
> + op2, const0_rtx)));
>
> /* Flip the test for the second operand. */
> cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
>
> emit_insn (gen_rtx_SET (temp2,
> gen_rtx_IF_THEN_ELSE (mode, cond,
> - operands[3], const0_rtx)));
> + op3, const0_rtx)));
>
> /* Merge the two results, at least one is guaranteed to be zero. */
> - emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> + if (promote_p)
> + {
> + rtx temp3 = gen_reg_rtx (mode);
> + emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
> + temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
> + loongarch_emit_move (operands[0], temp3);
> + }
> + else
> + emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> }
> else
> emit_insn (gen_rtx_SET (operands[0],
> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> new file mode 100644
> index 00000000000..ea6b28b7c45
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-not "slli.w" } } */
> +
> +extern int src1, src2, src3;
> +
> +int
> +test (void)
> +{
> + int data1 = src1 + src2;
> + int data2 = src1 + src3;
> +
> + return data1 > data2 ? data1 : data2;
> +}
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-08-28 2:37 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-25 9:31 [PATCH v2] LoongArch: Remove redundant sign extension instructions caused by SLT instructions Lulu Cheng
2023-08-28 2:37 ` [pushed][PATCH " chenglulu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).