public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.
@ 2023-08-25  4:01 Lulu Cheng
  2023-08-25  4:16 ` WANG Xuerui
  0 siblings, 1 reply; 3+ messages in thread
From: Lulu Cheng @ 2023-08-25  4:01 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua, Lulu Cheng

Since the slt instruction does not distinguish between 32-bit and 64-bit operations
under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
expansion is required before operation.

But similar to the following test case, symbol expansion can be omitted:

	extern int src1, src2, src3;

	int
	test (void)
	{
	  int data1 = src1 + src2;
	  int data2 = src1 + src3;
	  return test1 > test2 ? test1 : test2;
	}
Assembly code before optimization:
 	...
	add.w	$r4,$r4,$r14
	add.w	$r13,$r13,$r14
	slli.w	$r12,$r4,0
	slli.w	$r14,$r13,0
	slt	$r12,$r12,$r14
	masknez	$r4,$r4,$r12
	maskeqz	$r12,$r13,$r12
	or	$r4,$r4,$r12
	slli.w	$r4,$r4,0
	...

After optimization:
	...
	add.w	$r12,$r12,$r14
	add.w	$r13,$r13,$r14
	slt	$r4,$r12,$r13
	masknez	$r12,$r12,$r4
	maskeqz	$r4,$r13,$r4
	or	$r4,$r12,$r4
	...

Similar to this test example, the two operands of SLT are obtained by the
addition operation, and the addition operation "add.w" is an implicit
symbolic extension function, so the two operands of SLT do not require
symbolic expansion.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
	Optimize the function implementation.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/slt-sign-extend.c: New test.
---
 gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
 .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
 2 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 86d58784113..1905599b9e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
   enum rtx_code code = GET_CODE (operands[1]);
   rtx op0 = XEXP (operands[1], 0);
   rtx op1 = XEXP (operands[1], 1);
+  rtx op0_extend = op0;
+  rtx op1_extend = op1;
+
+  /* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */
+  bool promote_p = false;
+  machine_mode mode = GET_MODE (operands[0]);
 
   if (FLOAT_MODE_P (GET_MODE (op1)))
     loongarch_emit_float_compare (&code, &op0, &op1);
   else
     {
+      if ((REGNO (op0) == REGNO (operands[2])
+	   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+	{
+	  mode = word_mode;
+	  promote_p = true;
+	}
+
       loongarch_extend_comparands (code, &op0, &op1);
 
       op0 = force_reg (word_mode, op0);
+      op0_extend = op0;
+      op1_extend = force_reg (word_mode, op1);
 
       if (code == EQ || code == NE)
 	{
@@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
       && register_operand (operands[2], VOIDmode)
       && register_operand (operands[3], VOIDmode))
     {
-      machine_mode mode = GET_MODE (operands[0]);
+      rtx op2 = operands[2];
+      rtx op3 = operands[3];
+
+      if (promote_p)
+	{
+	  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+	    op2 = op0_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op2, &const0_rtx);
+	      op2 = force_reg (mode, op2);
+	    }
+
+	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+	    op3 = op1_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op3, &const0_rtx);
+	      op3 = force_reg (mode, op3);
+	    }
+	}
+
       rtx temp = gen_reg_rtx (mode);
       rtx temp2 = gen_reg_rtx (mode);
 
       emit_insn (gen_rtx_SET (temp,
 			      gen_rtx_IF_THEN_ELSE (mode, cond,
-						    operands[2], const0_rtx)));
+						    op2, const0_rtx)));
 
       /* Flip the test for the second operand.  */
       cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
 
       emit_insn (gen_rtx_SET (temp2,
 			      gen_rtx_IF_THEN_ELSE (mode, cond,
-						    operands[3], const0_rtx)));
+						    op3, const0_rtx)));
 
       /* Merge the two results, at least one is guaranteed to be zero.  */
-      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+      if (promote_p)
+	{
+	  rtx temp3 = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+	  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+	  loongarch_emit_move (operands[0], temp3);
+	}
+      else
+	emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
     }
   else
     emit_insn (gen_rtx_SET (operands[0],
diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
new file mode 100644
index 00000000000..3863db79aaf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+  int data1 = src1 + src2;
+  int data2 = src1 + src3;
+
+  return test1 > test2 ? test1 : test2;
+}
-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.
  2023-08-25  4:01 [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive Lulu Cheng
@ 2023-08-25  4:16 ` WANG Xuerui
  2023-08-25  6:05   ` chenglulu
  0 siblings, 1 reply; 3+ messages in thread
From: WANG Xuerui @ 2023-08-25  4:16 UTC (permalink / raw)
  To: Lulu Cheng, gcc-patches; +Cc: xry111, xuchenghua

On 8/25/23 12:01, Lulu Cheng wrote:
> Since the slt instruction does not distinguish between 32-bit and 64-bit operations
> under the LoongArch 64-bit architecture, if the operands of slt are of SImode, symbol
> expansion is required before operation.
Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb), 
not "symbol expansion".
>
> But similar to the following test case, symbol expansion can be omitted:
>
> 	extern int src1, src2, src3;
>
> 	int
> 	test (void)
> 	{
> 	  int data1 = src1 + src2;
> 	  int data2 = src1 + src3;
> 	  return test1 > test2 ? test1 : test2;
> 	}
> Assembly code before optimization:
>   	...
> 	add.w	$r4,$r4,$r14
> 	add.w	$r13,$r13,$r14
> 	slli.w	$r12,$r4,0
> 	slli.w	$r14,$r13,0
> 	slt	$r12,$r12,$r14
> 	masknez	$r4,$r4,$r12
> 	maskeqz	$r12,$r13,$r12
> 	or	$r4,$r4,$r12
> 	slli.w	$r4,$r4,0
> 	...
>
> After optimization:
> 	...
> 	add.w	$r12,$r12,$r14
> 	add.w	$r13,$r13,$r14
> 	slt	$r4,$r12,$r13
> 	masknez	$r12,$r12,$r4
> 	maskeqz	$r4,$r13,$r4
> 	or	$r4,$r12,$r4
> 	...
>
> Similar to this test example, the two operands of SLT are obtained by the
> addition operation, and the addition operation "add.w" is an implicit
> symbolic extension function, so the two operands of SLT do not require

more naturally: "and add.w implicitly sign-extends" -- brevity are often 
desired and clearer ;-)

> symbolic expansion.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
> 	Optimize the function implementation.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/slt-sign-extend.c: New test.
> ---
>   gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
>   .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
>   2 files changed, 63 insertions(+), 4 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 86d58784113..1905599b9e8 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx *operands)
>     enum rtx_code code = GET_CODE (operands[1]);
>     rtx op0 = XEXP (operands[1], 0);
>     rtx op1 = XEXP (operands[1], 1);
> +  rtx op0_extend = op0;
> +  rtx op1_extend = op1;
> +
> +  /* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */
> +  bool promote_p = false;
> +  machine_mode mode = GET_MODE (operands[0]);
>   
>     if (FLOAT_MODE_P (GET_MODE (op1)))
>       loongarch_emit_float_compare (&code, &op0, &op1);
>     else
>       {
> +      if ((REGNO (op0) == REGNO (operands[2])
> +	   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
> +	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
> +	{
> +	  mode = word_mode;
> +	  promote_p = true;
> +	}
> +
>         loongarch_extend_comparands (code, &op0, &op1);
>   
>         op0 = force_reg (word_mode, op0);
> +      op0_extend = op0;
> +      op1_extend = force_reg (word_mode, op1);
>   
>         if (code == EQ || code == NE)
>   	{
> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx *operands)
>         && register_operand (operands[2], VOIDmode)
>         && register_operand (operands[3], VOIDmode))
>       {
> -      machine_mode mode = GET_MODE (operands[0]);
> +      rtx op2 = operands[2];
> +      rtx op3 = operands[3];
> +
> +      if (promote_p)
> +	{
> +	  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
> +	    op2 = op0_extend;
> +	  else
> +	    {
> +	      loongarch_extend_comparands (code, &op2, &const0_rtx);
> +	      op2 = force_reg (mode, op2);
> +	    }
> +
> +	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
> +	    op3 = op1_extend;
> +	  else
> +	    {
> +	      loongarch_extend_comparands (code, &op3, &const0_rtx);
> +	      op3 = force_reg (mode, op3);
> +	    }
> +	}
> +
>         rtx temp = gen_reg_rtx (mode);
>         rtx temp2 = gen_reg_rtx (mode);
>   
>         emit_insn (gen_rtx_SET (temp,
>   			      gen_rtx_IF_THEN_ELSE (mode, cond,
> -						    operands[2], const0_rtx)));
> +						    op2, const0_rtx)));
>   
>         /* Flip the test for the second operand.  */
>         cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
>   
>         emit_insn (gen_rtx_SET (temp2,
>   			      gen_rtx_IF_THEN_ELSE (mode, cond,
> -						    operands[3], const0_rtx)));
> +						    op3, const0_rtx)));
>   
>         /* Merge the two results, at least one is guaranteed to be zero.  */
> -      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
> +      if (promote_p)
> +	{
> +	  rtx temp3 = gen_reg_rtx (mode);
> +	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
> +	  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
> +	  loongarch_emit_move (operands[0], temp3);
> +	}
> +      else
> +	emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
>       }
>     else
>       emit_insn (gen_rtx_SET (operands[0],
> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> new file mode 100644
> index 00000000000..3863db79aaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mabi=lp64d -O2" } */
> +/* { dg-final { scan-assembler-not "slli.w" } } */
> +
> +extern int src1, src2, src3;
> +
> +int
> +test (void)
> +{
> +  int data1 = src1 + src2;
> +  int data2 = src1 + src3;
> +
> +  return test1 > test2 ? test1 : test2;
> +}
Otherwise I think this is okay, and nice catch! ;-)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive.
  2023-08-25  4:16 ` WANG Xuerui
@ 2023-08-25  6:05   ` chenglulu
  0 siblings, 0 replies; 3+ messages in thread
From: chenglulu @ 2023-08-25  6:05 UTC (permalink / raw)
  To: WANG Xuerui, gcc-patches; +Cc: xry111, xuchenghua


在 2023/8/25 下午12:16, WANG Xuerui 写道:
> On 8/25/23 12:01, Lulu Cheng wrote:
>> Since the slt instruction does not distinguish between 32-bit and 
>> 64-bit operations
>> under the LoongArch 64-bit architecture, if the operands of slt are 
>> of SImode, symbol
>> expansion is required before operation.
> Hint:“符号扩展” is "sign extension" (as noun) or "sign-extend" (as verb), 
> not "symbol expansion".
>>
>> But similar to the following test case, symbol expansion can be omitted:
>>
>>     extern int src1, src2, src3;
>>
>>     int
>>     test (void)
>>     {
>>       int data1 = src1 + src2;
>>       int data2 = src1 + src3;
>>       return test1 > test2 ? test1 : test2;
>>     }
>> Assembly code before optimization:
>>       ...
>>     add.w    $r4,$r4,$r14
>>     add.w    $r13,$r13,$r14
>>     slli.w    $r12,$r4,0
>>     slli.w    $r14,$r13,0
>>     slt    $r12,$r12,$r14
>>     masknez    $r4,$r4,$r12
>>     maskeqz    $r12,$r13,$r12
>>     or    $r4,$r4,$r12
>>     slli.w    $r4,$r4,0
>>     ...
>>
>> After optimization:
>>     ...
>>     add.w    $r12,$r12,$r14
>>     add.w    $r13,$r13,$r14
>>     slt    $r4,$r12,$r13
>>     masknez    $r12,$r12,$r4
>>     maskeqz    $r4,$r13,$r4
>>     or    $r4,$r12,$r4
>>     ...
>>
>> Similar to this test example, the two operands of SLT are obtained by 
>> the
>> addition operation, and the addition operation "add.w" is an implicit
>> symbolic extension function, so the two operands of SLT do not require
>
> more naturally: "and add.w implicitly sign-extends" -- brevity are 
> often desired and clearer ;-)

Sorry I'll revise it soon!

Thanks!:-)

>
>> symbolic expansion.
>>
>> gcc/ChangeLog:
>>
>>     * config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
>>     Optimize the function implementation.
>>
>> gcc/testsuite/ChangeLog:
>>
>>     * gcc.target/loongarch/slt-sign-extend.c: New test.
>> ---
>>   gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
>>   .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
>>   2 files changed, 63 insertions(+), 4 deletions(-)
>>   create mode 100644 
>> gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>>
>> diff --git a/gcc/config/loongarch/loongarch.cc 
>> b/gcc/config/loongarch/loongarch.cc
>> index 86d58784113..1905599b9e8 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -4384,14 +4384,30 @@ loongarch_expand_conditional_move (rtx 
>> *operands)
>>     enum rtx_code code = GET_CODE (operands[1]);
>>     rtx op0 = XEXP (operands[1], 0);
>>     rtx op1 = XEXP (operands[1], 1);
>> +  rtx op0_extend = op0;
>> +  rtx op1_extend = op1;
>> +
>> +  /* Record whether operands[2] and operands[3] modes are promoted 
>> to word_mode.  */
>> +  bool promote_p = false;
>> +  machine_mode mode = GET_MODE (operands[0]);
>>       if (FLOAT_MODE_P (GET_MODE (op1)))
>>       loongarch_emit_float_compare (&code, &op0, &op1);
>>     else
>>       {
>> +      if ((REGNO (op0) == REGNO (operands[2])
>> +       || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
>> +      && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
>> +    {
>> +      mode = word_mode;
>> +      promote_p = true;
>> +    }
>> +
>>         loongarch_extend_comparands (code, &op0, &op1);
>>           op0 = force_reg (word_mode, op0);
>> +      op0_extend = op0;
>> +      op1_extend = force_reg (word_mode, op1);
>>           if (code == EQ || code == NE)
>>       {
>> @@ -4418,23 +4434,52 @@ loongarch_expand_conditional_move (rtx 
>> *operands)
>>         && register_operand (operands[2], VOIDmode)
>>         && register_operand (operands[3], VOIDmode))
>>       {
>> -      machine_mode mode = GET_MODE (operands[0]);
>> +      rtx op2 = operands[2];
>> +      rtx op3 = operands[3];
>> +
>> +      if (promote_p)
>> +    {
>> +      if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
>> +        op2 = op0_extend;
>> +      else
>> +        {
>> +          loongarch_extend_comparands (code, &op2, &const0_rtx);
>> +          op2 = force_reg (mode, op2);
>> +        }
>> +
>> +      if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
>> +        op3 = op1_extend;
>> +      else
>> +        {
>> +          loongarch_extend_comparands (code, &op3, &const0_rtx);
>> +          op3 = force_reg (mode, op3);
>> +        }
>> +    }
>> +
>>         rtx temp = gen_reg_rtx (mode);
>>         rtx temp2 = gen_reg_rtx (mode);
>>           emit_insn (gen_rtx_SET (temp,
>>                     gen_rtx_IF_THEN_ELSE (mode, cond,
>> -                            operands[2], const0_rtx)));
>> +                            op2, const0_rtx)));
>>           /* Flip the test for the second operand.  */
>>         cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE 
>> (op0), op0, op1);
>>           emit_insn (gen_rtx_SET (temp2,
>>                     gen_rtx_IF_THEN_ELSE (mode, cond,
>> -                            operands[3], const0_rtx)));
>> +                            op3, const0_rtx)));
>>           /* Merge the two results, at least one is guaranteed to be 
>> zero.  */
>> -      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, 
>> temp2)));
>> +      if (promote_p)
>> +    {
>> +      rtx temp3 = gen_reg_rtx (mode);
>> +      emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
>> +      temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
>> +      loongarch_emit_move (operands[0], temp3);
>> +    }
>> +      else
>> +    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, 
>> temp2)));
>>       }
>>     else
>>       emit_insn (gen_rtx_SET (operands[0],
>> diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c 
>> b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> new file mode 100644
>> index 00000000000..3863db79aaf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
>> @@ -0,0 +1,14 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-mabi=lp64d -O2" } */
>> +/* { dg-final { scan-assembler-not "slli.w" } } */
>> +
>> +extern int src1, src2, src3;
>> +
>> +int
>> +test (void)
>> +{
>> +  int data1 = src1 + src2;
>> +  int data2 = src1 + src3;
>> +
>> +  return test1 > test2 ? test1 : test2;
>> +}
> Otherwise I think this is okay, and nice catch! ;-)


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-08-25  6:05 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-25  4:01 [PATCH v1] LoongArch: Remove the symbolic extension instruction due to the SLT directive Lulu Cheng
2023-08-25  4:16 ` WANG Xuerui
2023-08-25  6:05   ` chenglulu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).