[PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values
@ 2023-11-18  6:59 Guo Jie
  2023-11-18  9:09 ` Xi Ruoyao
  2023-11-27  2:49 ` chenglulu
  0 siblings, 2 replies; 4+ messages in thread
From: Guo Jie @ 2023-11-18  6:59 UTC (permalink / raw)
  To: gcc-patches; +Cc: xuchenghua, chenglulu, i, xry111, Guo Jie

For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:

	long long r = 0x0101010101010101;

Before this patch:

	lu12i.w	    $r15,16842752>>12
	ori	    $r15,$r15,257
	lu32i.d	    $r15,0x1010100000000>>32
	lu52i.d	    $r15,$r15,0x100000000000000>>52

After this patch:

	lu12i.w     $r15,16842752>>12
	ori         $r15,$r15,257
	bstrins.d   $r15,$r15,63,32

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (enum loongarch_load_imm_method): Add new method.
	(loongarch_build_integer): Add relevant implementations for new method.
	(loongarch_move_integer): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/imm-load1.c: Change old check.
---
 gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
 .../gcc.target/loongarch/imm-load1.c          |  3 ++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index d05743bec87..58c00344d09 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -142,12 +142,16 @@ struct loongarch_address_info
 
    METHOD_LU52I:
      Load 52-63 bit of the immediate number.
+
+   METHOD_MIRROR:
+     Copy 0-31 bit of the immediate number to 32-63bit.
 */
 enum loongarch_load_imm_method
 {
   METHOD_NORMAL,
   METHOD_LU32I,
-  METHOD_LU52I
+  METHOD_LU52I,
+  METHOD_MIRROR
 };
 
 struct loongarch_integer_op
@@ -1556,11 +1560,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
 
       int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
       int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
+
+      unsigned HOST_WIDE_INT hival = value >> 32;
+      unsigned HOST_WIDE_INT loval = value << 32 >> 32;
+
       /* Determine whether the upper 32 bits are sign-extended from the lower
 	 32 bits. If it is, the instructions to load the high order can be
 	 ommitted.  */
       if (lu32i[sign31] && lu52i[sign31])
 	return cost;
+      /* If the lower 32 bits are the same as the upper 32 bits, just copy
+	 the lower 32 bits to the upper 32 bits.  */
+      else if (loval == hival)
+	{
+	  codes[cost].method = METHOD_MIRROR;
+	  codes[cost].curr_value = value;
+	  return cost + 1;
+	}
       /* Determine whether bits 32-51 are sign-extended from the lower 32
 	 bits. If so, directly load 52-63 bits.  */
       else if (lu32i[sign31])
@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
 			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
 			   GEN_INT (codes[i].value));
 	  break;
+	case METHOD_MIRROR:
+	  gcc_assert (mode == DImode);
+	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
index 2ff02971239..f64cc2956a3 100644
--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mabi=lp64d -O2" } */
-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
 
 
 extern long long b[10];
-- 
2.20.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values
  2023-11-18  6:59 [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values Guo Jie
@ 2023-11-18  9:09 ` Xi Ruoyao
  2023-11-20  8:29   ` Guo Jie
  2023-11-27  2:49 ` chenglulu
  1 sibling, 1 reply; 4+ messages in thread
From: Xi Ruoyao @ 2023-11-18  9:09 UTC (permalink / raw)
  To: Guo Jie, gcc-patches; +Cc: xuchenghua, chenglulu, i

On Sat, 2023-11-18 at 14:59 +0800, Guo Jie wrote:
> For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
> 
> 	long long r = 0x0101010101010101;
> 
> Before this patch:
> 
> 	lu12i.w	    $r15,16842752>>12
> 	ori	    $r15,$r15,257
> 	lu32i.d	    $r15,0x1010100000000>>32
> 	lu52i.d	    $r15,$r15,0x100000000000000>>52
> 
> After this patch:
> 
> 	lu12i.w     $r15,16842752>>12
> 	ori         $r15,$r15,257
> 	bstrins.d   $r15,$r15,63,32
> 
> gcc/ChangeLog:
> 
> 	* config/loongarch/loongarch.cc (enum loongarch_load_imm_method): Add new method.
> 	(loongarch_build_integer): Add relevant implementations for new method.
> 	(loongarch_move_integer): Ditto.

IIRC the ChangeLog line should be wrapped at 72 characters.

/* snip */

>  struct loongarch_integer_op
> @@ -1556,11 +1560,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
>  
>        int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
>        int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
> +
> +      unsigned HOST_WIDE_INT hival = value >> 32;
> +      unsigned HOST_WIDE_INT loval = value << 32 >> 32;

Use

uint32_t hival = (uint32_t) (value >> 32);
uint32_t loval = (uint32_t) value;

instead, because "value << 32" may trigger a left-shift of negative
value.

C++11 doesn't allow shifting left any negative value.  Yes it's allowed
as a GCC extension and it's also allowed by C++23, but GCC codebase is
still C++11.  So it may break GCC if bootstrapping from a different
compiler, and --with-build-config=bootstrap-ubsan will complain.

Otherwise LGTM.

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values
  2023-11-18  9:09 ` Xi Ruoyao
@ 2023-11-20  8:29   ` Guo Jie
  0 siblings, 0 replies; 4+ messages in thread
From: Guo Jie @ 2023-11-20  8:29 UTC (permalink / raw)
  To: Xi Ruoyao, gcc-patches; +Cc: xuchenghua, chenglulu, i

Thanks for your advice! I will fix it in patch v2.


在 2023/11/18 下午5:09, Xi Ruoyao 写道:
> On Sat, 2023-11-18 at 14:59 +0800, Guo Jie wrote:
>> For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
>>
>> 	long long r = 0x0101010101010101;
>>
>> Before this patch:
>>
>> 	lu12i.w	    $r15,16842752>>12
>> 	ori	    $r15,$r15,257
>> 	lu32i.d	    $r15,0x1010100000000>>32
>> 	lu52i.d	    $r15,$r15,0x100000000000000>>52
>>
>> After this patch:
>>
>> 	lu12i.w     $r15,16842752>>12
>> 	ori         $r15,$r15,257
>> 	bstrins.d   $r15,$r15,63,32
>>
>> gcc/ChangeLog:
>>
>> 	* config/loongarch/loongarch.cc (enum loongarch_load_imm_method): Add new method.
>> 	(loongarch_build_integer): Add relevant implementations for new method.
>> 	(loongarch_move_integer): Ditto.
> IIRC the ChangeLog line should be wrapped at 72 characters.
>
> /* snip */
>
>>   struct loongarch_integer_op
>> @@ -1556,11 +1560,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
>>   
>>         int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
>>         int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
>> +
>> +      unsigned HOST_WIDE_INT hival = value >> 32;
>> +      unsigned HOST_WIDE_INT loval = value << 32 >> 32;
> Use
>
> uint32_t hival = (uint32_t) (value >> 32);
> uint32_t loval = (uint32_t) value;
>
> instead, because "value << 32" may trigger a left-shift of negative
> value.
>
> C++11 doesn't allow shifting left any negative value.  Yes it's allowed
> as a GCC extension and it's also allowed by C++23, but GCC codebase is
> still C++11.  So it may break GCC if bootstrapping from a different
> compiler, and --with-build-config=bootstrap-ubsan will complain.
>
> Otherwise LGTM.
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re:[pushed] [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values
  2023-11-18  6:59 [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values Guo Jie
  2023-11-18  9:09 ` Xi Ruoyao
@ 2023-11-27  2:49 ` chenglulu
  1 sibling, 0 replies; 4+ messages in thread
From: chenglulu @ 2023-11-27  2:49 UTC (permalink / raw)
  To: Guo Jie, gcc-patches; +Cc: xuchenghua, i, xry111

Pushed to r14-5863.

在 2023/11/18 下午2:59, Guo Jie 写道:
> For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
>
> 	long long r = 0x0101010101010101;
>
> Before this patch:
>
> 	lu12i.w	    $r15,16842752>>12
> 	ori	    $r15,$r15,257
> 	lu32i.d	    $r15,0x1010100000000>>32
> 	lu52i.d	    $r15,$r15,0x100000000000000>>52
>
> After this patch:
>
> 	lu12i.w     $r15,16842752>>12
> 	ori         $r15,$r15,257
> 	bstrins.d   $r15,$r15,63,32
>
> gcc/ChangeLog:
>
> 	* config/loongarch/loongarch.cc (enum loongarch_load_imm_method): Add new method.
> 	(loongarch_build_integer): Add relevant implementations for new method.
> 	(loongarch_move_integer): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/loongarch/imm-load1.c: Change old check.
> ---
>   gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
>   .../gcc.target/loongarch/imm-load1.c          |  3 ++-
>   2 files changed, 23 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index d05743bec87..58c00344d09 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -142,12 +142,16 @@ struct loongarch_address_info
>   
>      METHOD_LU52I:
>        Load 52-63 bit of the immediate number.
> +
> +   METHOD_MIRROR:
> +     Copy 0-31 bit of the immediate number to 32-63bit.
>   */
>   enum loongarch_load_imm_method
>   {
>     METHOD_NORMAL,
>     METHOD_LU32I,
> -  METHOD_LU52I
> +  METHOD_LU52I,
> +  METHOD_MIRROR
>   };
>   
>   struct loongarch_integer_op
> @@ -1556,11 +1560,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
>   
>         int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
>         int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
> +
> +      unsigned HOST_WIDE_INT hival = value >> 32;
> +      unsigned HOST_WIDE_INT loval = value << 32 >> 32;
> +
>         /* Determine whether the upper 32 bits are sign-extended from the lower
>   	 32 bits. If it is, the instructions to load the high order can be
>   	 ommitted.  */
>         if (lu32i[sign31] && lu52i[sign31])
>   	return cost;
> +      /* If the lower 32 bits are the same as the upper 32 bits, just copy
> +	 the lower 32 bits to the upper 32 bits.  */
> +      else if (loval == hival)
> +	{
> +	  codes[cost].method = METHOD_MIRROR;
> +	  codes[cost].curr_value = value;
> +	  return cost + 1;
> +	}
>         /* Determine whether bits 32-51 are sign-extended from the lower 32
>   	 bits. If so, directly load 52-63 bits.  */
>         else if (lu32i[sign31])
> @@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
>   			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
>   			   GEN_INT (codes[i].value));
>   	  break;
> +	case METHOD_MIRROR:
> +	  gcc_assert (mode == DImode);
> +	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
> +	  break;
>   	default:
>   	  gcc_unreachable ();
>   	}
> diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> index 2ff02971239..f64cc2956a3 100644
> --- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
> @@ -1,6 +1,7 @@
>   /* { dg-do compile } */
>   /* { dg-options "-mabi=lp64d -O2" } */
> -/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
> +/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
> +/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
>   
>   
>   extern long long b[10];


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-11-27  2:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-18  6:59 [PATCH] LoongArch: Optimize the loading of immediate numbers with the same high and low 32-bit values Guo Jie
2023-11-18  9:09 ` Xi Ruoyao
2023-11-20  8:29   ` Guo Jie
2023-11-27  2:49 ` chenglulu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).