public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
@ 2024-06-15 17:50 Xi Ruoyao
  2024-06-26  7:53 ` Ping: " Xi Ruoyao
  0 siblings, 1 reply; 3+ messages in thread
From: Xi Ruoyao @ 2024-06-15 17:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: chenglulu, i, xuchenghua, Xi Ruoyao

Consider

    c &= 0xfff;
    a &= ~0xfff;
    b &= ~0xfff;
    a |= c;
    b |= c;

This can be done with 2 bstrins instructions.  But we need to recognize
it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff"
forward.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc:
	(loongarch_use_bstrins_for_ior_with_mask): Split the main logic
	into ...
	(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
	(loongarch_rtx_costs): Special case for IOR those can be
	implemented with bstrins.

gcc/testsuite/ChangeLog;

	* gcc.target/loongarch/bstrins-3.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

 gcc/config/loongarch/loongarch.cc             | 73 ++++++++++++++-----
 .../gcc.target/loongarch/bstrins-3.c          | 16 ++++
 2 files changed, 72 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 6ec3ee62502..256b76d044b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
   return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
 }
 
+static int
+loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
+					   unsigned HOST_WIDE_INT mask1,
+					   unsigned HOST_WIDE_INT mask2)
+{
+  if (mask1 != ~mask2 || !mask1 || !mask2)
+    return 0;
+
+  /* Try to avoid a right-shift.  */
+  if (low_bitmask_len (mode, mask1) != -1)
+    return -1;
+
+  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+    return 1;
+
+  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+    return -1;
+
+  return 0;
+}
+
 /* Return the cost of moving between two registers of mode MODE.  */
 
 static int
@@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
       /* Fall through.  */
 
     case IOR:
+      {
+	rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
+	if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
+	    && (mode == SImode || (TARGET_64BIT && mode == DImode)))
+	  {
+	    rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1);
+	    if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
+	      {
+		unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
+		unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
+		if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+							       mask0,
+							       mask1))
+		  {
+		    /* A bstrins instruction */
+		    *total = COSTS_N_INSNS (1);
+
+		    /* A srai instruction */
+		    if (low_bitmask_len (mode, mask0) == -1
+			&& low_bitmask_len (mode, mask1) == -1)
+		      *total += COSTS_N_INSNS (1);
+
+		    for (int i = 0; i < 2; i++)
+		      *total += set_src_cost (XEXP (op[i], 0), mode, speed);
+
+		    return true;
+		  }
+	      }
+	  }
+      }
+
+      /* Fall through.  */
     case XOR:
       /* Double-word operations use two single-word operations.  */
       *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
@@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
 int
 loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
 {
-  unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
-  unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
-
-  if (mask1 != ~mask2 || !mask1 || !mask2)
-    return 0;
-
-  /* Try to avoid a right-shift.  */
-  if (low_bitmask_len (mode, mask1) != -1)
-    return -1;
-
-  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
-    return 1;
-
-  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
-    return -1;
-
-  return 0;
+  return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+						    UINTVAL (op[2]),
+						    UINTVAL (op[4]));
 }
 
 /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
new file mode 100644
index 00000000000..13762bdef42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
+/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
+
+struct X {
+  long a, b;
+};
+
+struct X
+test (long a, long b, long c)
+{
+  c &= 0xfff;
+  a &= ~0xfff;
+  b &= ~0xfff;
+  return (struct X){.a = a | c, .b = b | c}; 
+}
-- 
2.45.2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Ping: [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
  2024-06-15 17:50 [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins Xi Ruoyao
@ 2024-06-26  7:53 ` Xi Ruoyao
  2024-06-27  2:39   ` Lulu Cheng
  0 siblings, 1 reply; 3+ messages in thread
From: Xi Ruoyao @ 2024-06-26  7:53 UTC (permalink / raw)
  To: gcc-patches; +Cc: chenglulu, i, xuchenghua

Ping.

On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote:
> Consider
> 
>     c &= 0xfff;
>     a &= ~0xfff;
>     b &= ~0xfff;
>     a |= c;
>     b |= c;
> 
> This can be done with 2 bstrins instructions.  But we need to
> recognize
> it in loongarch_rtx_costs or the compiler will not propagate "c &
> 0xfff"
> forward.
> 
> gcc/ChangeLog:
> 
> 	* config/loongarch/loongarch.cc:
> 	(loongarch_use_bstrins_for_ior_with_mask): Split the main
> logic
> 	into ...
> 	(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
> 	(loongarch_rtx_costs): Special case for IOR those can be
> 	implemented with bstrins.
> 
> gcc/testsuite/ChangeLog;
> 
> 	* gcc.target/loongarch/bstrins-3.c: New test.
> ---
> 
> Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?
> 
>  gcc/config/loongarch/loongarch.cc             | 73 ++++++++++++++----
> -
>  .../gcc.target/loongarch/bstrins-3.c          | 16 ++++
>  2 files changed, 72 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> 
> diff --git a/gcc/config/loongarch/loongarch.cc
> b/gcc/config/loongarch/loongarch.cc
> index 6ec3ee62502..256b76d044b 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode
> mode, unsigned int units)
>    return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
>  }
>  
> +static int
> +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
> +					   unsigned HOST_WIDE_INT
> mask1,
> +					   unsigned HOST_WIDE_INT
> mask2)
> +{
> +  if (mask1 != ~mask2 || !mask1 || !mask2)
> +    return 0;
> +
> +  /* Try to avoid a right-shift.  */
> +  if (low_bitmask_len (mode, mask1) != -1)
> +    return -1;
> +
> +  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
> +    return 1;
> +
> +  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
> +    return -1;
> +
> +  return 0;
> +}
> +
>  /* Return the cost of moving between two registers of mode MODE.  */
>  
>  static int
> @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
>        /* Fall through.  */
>  
>      case IOR:
> +      {
> +	rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
> +	if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
> +	    && (mode == SImode || (TARGET_64BIT && mode == DImode)))
> +	  {
> +	    rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1],
> 1);
> +	    if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
> +	      {
> +		unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
> +		unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
> +		if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
> +							       mask0,
> +							      
> mask1))
> +		  {
> +		    /* A bstrins instruction */
> +		    *total = COSTS_N_INSNS (1);
> +
> +		    /* A srai instruction */
> +		    if (low_bitmask_len (mode, mask0) == -1
> +			&& low_bitmask_len (mode, mask1) == -1)
> +		      *total += COSTS_N_INSNS (1);
> +
> +		    for (int i = 0; i < 2; i++)
> +		      *total += set_src_cost (XEXP (op[i], 0), mode,
> speed);
> +
> +		    return true;
> +		  }
> +	      }
> +	  }
> +      }
> +
> +      /* Fall through.  */
>      case XOR:
>        /* Double-word operations use two single-word operations.  */
>        *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
> COSTS_N_INSNS (2),
> @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
>  int
>  loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
>  {
> -  unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
> -  unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
> -
> -  if (mask1 != ~mask2 || !mask1 || !mask2)
> -    return 0;
> -
> -  /* Try to avoid a right-shift.  */
> -  if (low_bitmask_len (mode, mask1) != -1)
> -    return -1;
> -
> -  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
> -    return 1;
> -
> -  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
> -    return -1;
> -
> -  return 0;
> +  return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
> +						    UINTVAL (op[2]),
> +						    UINTVAL (op[4]));
>  }
>  
>  /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
> diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> new file mode 100644
> index 00000000000..13762bdef42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-final" } */
> +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
> +
> +struct X {
> +  long a, b;
> +};
> +
> +struct X
> +test (long a, long b, long c)
> +{
> +  c &= 0xfff;
> +  a &= ~0xfff;
> +  b &= ~0xfff;
> +  return (struct X){.a = a | c, .b = b | c}; 
> +}

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: Ping: [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
  2024-06-26  7:53 ` Ping: " Xi Ruoyao
@ 2024-06-27  2:39   ` Lulu Cheng
  0 siblings, 0 replies; 3+ messages in thread
From: Lulu Cheng @ 2024-06-27  2:39 UTC (permalink / raw)
  To: Xi Ruoyao, gcc-patches; +Cc: i, xuchenghua

LGTM!

Thanks very much!


在 2024/6/26 下午3:53, Xi Ruoyao 写道:
> Ping.
>
> On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote:
>> Consider
>>
>>      c &= 0xfff;
>>      a &= ~0xfff;
>>      b &= ~0xfff;
>>      a |= c;
>>      b |= c;
>>
>> This can be done with 2 bstrins instructions.  But we need to
>> recognize
>> it in loongarch_rtx_costs or the compiler will not propagate "c &
>> 0xfff"
>> forward.
>>
>> gcc/ChangeLog:
>>
>> 	* config/loongarch/loongarch.cc:
>> 	(loongarch_use_bstrins_for_ior_with_mask): Split the main
>> logic
>> 	into ...
>> 	(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
>> 	(loongarch_rtx_costs): Special case for IOR those can be
>> 	implemented with bstrins.
>>
>> gcc/testsuite/ChangeLog;
>>
>> 	* gcc.target/loongarch/bstrins-3.c: New test.
>> ---
>>
>> Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?
>>
>>   gcc/config/loongarch/loongarch.cc             | 73 ++++++++++++++----
>> -
>>   .../gcc.target/loongarch/bstrins-3.c          | 16 ++++
>>   2 files changed, 72 insertions(+), 17 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>>
>> diff --git a/gcc/config/loongarch/loongarch.cc
>> b/gcc/config/loongarch/loongarch.cc
>> index 6ec3ee62502..256b76d044b 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode
>> mode, unsigned int units)
>>     return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
>>   }
>>   
>> +static int
>> +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
>> +					   unsigned HOST_WIDE_INT
>> mask1,
>> +					   unsigned HOST_WIDE_INT
>> mask2)
>> +{
>> +  if (mask1 != ~mask2 || !mask1 || !mask2)
>> +    return 0;
>> +
>> +  /* Try to avoid a right-shift.  */
>> +  if (low_bitmask_len (mode, mask1) != -1)
>> +    return -1;
>> +
>> +  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
>> +    return 1;
>> +
>> +  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
>> +    return -1;
>> +
>> +  return 0;
>> +}
>> +
>>   /* Return the cost of moving between two registers of mode MODE.  */
>>   
>>   static int
>> @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode,
>> int outer_code,
>>         /* Fall through.  */
>>   
>>       case IOR:
>> +      {
>> +	rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
>> +	if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
>> +	    && (mode == SImode || (TARGET_64BIT && mode == DImode)))
>> +	  {
>> +	    rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1],
>> 1);
>> +	    if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
>> +	      {
>> +		unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
>> +		unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
>> +		if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
>> +							       mask0,
>> +							
>> mask1))
>> +		  {
>> +		    /* A bstrins instruction */
>> +		    *total = COSTS_N_INSNS (1);
>> +
>> +		    /* A srai instruction */
>> +		    if (low_bitmask_len (mode, mask0) == -1
>> +			&& low_bitmask_len (mode, mask1) == -1)
>> +		      *total += COSTS_N_INSNS (1);
>> +
>> +		    for (int i = 0; i < 2; i++)
>> +		      *total += set_src_cost (XEXP (op[i], 0), mode,
>> speed);
>> +
>> +		    return true;
>> +		  }
>> +	      }
>> +	  }
>> +      }
>> +
>> +      /* Fall through.  */
>>       case XOR:
>>         /* Double-word operations use two single-word operations.  */
>>         *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
>> COSTS_N_INSNS (2),
>> @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
>>   int
>>   loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
>>   {
>> -  unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
>> -  unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
>> -
>> -  if (mask1 != ~mask2 || !mask1 || !mask2)
>> -    return 0;
>> -
>> -  /* Try to avoid a right-shift.  */
>> -  if (low_bitmask_len (mode, mask1) != -1)
>> -    return -1;
>> -
>> -  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
>> -    return 1;
>> -
>> -  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
>> -    return -1;
>> -
>> -  return 0;
>> +  return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
>> +						    UINTVAL (op[2]),
>> +						    UINTVAL (op[4]));
>>   }
>>   
>>   /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
>> diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> new file mode 100644
>> index 00000000000..13762bdef42
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-rtl-final" } */
>> +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
>> +
>> +struct X {
>> +  long a, b;
>> +};
>> +
>> +struct X
>> +test (long a, long b, long c)
>> +{
>> +  c &= 0xfff;
>> +  a &= ~0xfff;
>> +  b &= ~0xfff;
>> +  return (struct X){.a = a | c, .b = b | c};
>> +}


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-27  2:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-15 17:50 [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins Xi Ruoyao
2024-06-26  7:53 ` Ping: " Xi Ruoyao
2024-06-27  2:39   ` Lulu Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).