* [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
@ 2024-06-15 17:50 Xi Ruoyao
2024-06-26 7:53 ` Ping: " Xi Ruoyao
0 siblings, 1 reply; 3+ messages in thread
From: Xi Ruoyao @ 2024-06-15 17:50 UTC (permalink / raw)
To: gcc-patches; +Cc: chenglulu, i, xuchenghua, Xi Ruoyao
Consider
c &= 0xfff;
a &= ~0xfff;
b &= ~0xfff;
a |= c;
b |= c;
This can be done with 2 bstrins instructions. But we need to recognize
it in loongarch_rtx_costs or the compiler will not propagate "c & 0xfff"
forward.
gcc/ChangeLog:
* config/loongarch/loongarch.cc:
(loongarch_use_bstrins_for_ior_with_mask): Split the main logic
into ...
(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
(loongarch_rtx_costs): Special case for IOR those can be
implemented with bstrins.
gcc/testsuite/ChangeLog;
* gcc.target/loongarch/bstrins-3.c: New test.
---
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++-----
.../gcc.target/loongarch/bstrins-3.c | 16 ++++
2 files changed, 72 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 6ec3ee62502..256b76d044b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
}
+static int
+loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
+ unsigned HOST_WIDE_INT mask1,
+ unsigned HOST_WIDE_INT mask2)
+{
+ if (mask1 != ~mask2 || !mask1 || !mask2)
+ return 0;
+
+ /* Try to avoid a right-shift. */
+ if (low_bitmask_len (mode, mask1) != -1)
+ return -1;
+
+ if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+ return 1;
+
+ if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+ return -1;
+
+ return 0;
+}
+
/* Return the cost of moving between two registers of mode MODE. */
static int
@@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
/* Fall through. */
case IOR:
+ {
+ rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
+ if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
+ && (mode == SImode || (TARGET_64BIT && mode == DImode)))
+ {
+ rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1], 1);
+ if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
+ {
+ unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
+ unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
+ if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+ mask0,
+ mask1))
+ {
+ /* A bstrins instruction */
+ *total = COSTS_N_INSNS (1);
+
+ /* A srai instruction */
+ if (low_bitmask_len (mode, mask0) == -1
+ && low_bitmask_len (mode, mask1) == -1)
+ *total += COSTS_N_INSNS (1);
+
+ for (int i = 0; i < 2; i++)
+ *total += set_src_cost (XEXP (op[i], 0), mode, speed);
+
+ return true;
+ }
+ }
+ }
+ }
+
+ /* Fall through. */
case XOR:
/* Double-word operations use two single-word operations. */
*total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
@@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
int
loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
{
- unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
- unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
-
- if (mask1 != ~mask2 || !mask1 || !mask2)
- return 0;
-
- /* Try to avoid a right-shift. */
- if (low_bitmask_len (mode, mask1) != -1)
- return -1;
-
- if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
- return 1;
-
- if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
- return -1;
-
- return 0;
+ return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+ UINTVAL (op[2]),
+ UINTVAL (op[4]));
}
/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
new file mode 100644
index 00000000000..13762bdef42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
+/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
+
+struct X {
+ long a, b;
+};
+
+struct X
+test (long a, long b, long c)
+{
+ c &= 0xfff;
+ a &= ~0xfff;
+ b &= ~0xfff;
+ return (struct X){.a = a | c, .b = b | c};
+}
--
2.45.2
^ permalink raw reply [flat|nested] 3+ messages in thread
* Ping: [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
2024-06-15 17:50 [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins Xi Ruoyao
@ 2024-06-26 7:53 ` Xi Ruoyao
2024-06-27 2:39 ` Lulu Cheng
0 siblings, 1 reply; 3+ messages in thread
From: Xi Ruoyao @ 2024-06-26 7:53 UTC (permalink / raw)
To: gcc-patches; +Cc: chenglulu, i, xuchenghua
Ping.
On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote:
> Consider
>
> c &= 0xfff;
> a &= ~0xfff;
> b &= ~0xfff;
> a |= c;
> b |= c;
>
> This can be done with 2 bstrins instructions. But we need to
> recognize
> it in loongarch_rtx_costs or the compiler will not propagate "c &
> 0xfff"
> forward.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.cc:
> (loongarch_use_bstrins_for_ior_with_mask): Split the main
> logic
> into ...
> (loongarch_use_bstrins_for_ior_with_mask_1): ... here.
> (loongarch_rtx_costs): Special case for IOR those can be
> implemented with bstrins.
>
> gcc/testsuite/ChangeLog;
>
> * gcc.target/loongarch/bstrins-3.c: New test.
> ---
>
> Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
>
> gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----
> -
> .../gcc.target/loongarch/bstrins-3.c | 16 ++++
> 2 files changed, 72 insertions(+), 17 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>
> diff --git a/gcc/config/loongarch/loongarch.cc
> b/gcc/config/loongarch/loongarch.cc
> index 6ec3ee62502..256b76d044b 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode
> mode, unsigned int units)
> return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
> }
>
> +static int
> +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
> + unsigned HOST_WIDE_INT
> mask1,
> + unsigned HOST_WIDE_INT
> mask2)
> +{
> + if (mask1 != ~mask2 || !mask1 || !mask2)
> + return 0;
> +
> + /* Try to avoid a right-shift. */
> + if (low_bitmask_len (mode, mask1) != -1)
> + return -1;
> +
> + if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
> + return 1;
> +
> + if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
> + return -1;
> +
> + return 0;
> +}
> +
> /* Return the cost of moving between two registers of mode MODE. */
>
> static int
> @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
> /* Fall through. */
>
> case IOR:
> + {
> + rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
> + if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
> + && (mode == SImode || (TARGET_64BIT && mode == DImode)))
> + {
> + rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1],
> 1);
> + if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
> + {
> + unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
> + unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
> + if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
> + mask0,
> +
> mask1))
> + {
> + /* A bstrins instruction */
> + *total = COSTS_N_INSNS (1);
> +
> + /* A srai instruction */
> + if (low_bitmask_len (mode, mask0) == -1
> + && low_bitmask_len (mode, mask1) == -1)
> + *total += COSTS_N_INSNS (1);
> +
> + for (int i = 0; i < 2; i++)
> + *total += set_src_cost (XEXP (op[i], 0), mode,
> speed);
> +
> + return true;
> + }
> + }
> + }
> + }
> +
> + /* Fall through. */
> case XOR:
> /* Double-word operations use two single-word operations. */
> *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
> COSTS_N_INSNS (2),
> @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
> int
> loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
> {
> - unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
> - unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
> -
> - if (mask1 != ~mask2 || !mask1 || !mask2)
> - return 0;
> -
> - /* Try to avoid a right-shift. */
> - if (low_bitmask_len (mode, mask1) != -1)
> - return -1;
> -
> - if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
> - return 1;
> -
> - if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
> - return -1;
> -
> - return 0;
> + return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
> + UINTVAL (op[2]),
> + UINTVAL (op[4]));
> }
>
> /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
> diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> new file mode 100644
> index 00000000000..13762bdef42
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-rtl-final" } */
> +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
> +
> +struct X {
> + long a, b;
> +};
> +
> +struct X
> +test (long a, long b, long c)
> +{
> + c &= 0xfff;
> + a &= ~0xfff;
> + b &= ~0xfff;
> + return (struct X){.a = a | c, .b = b | c};
> +}
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: Ping: [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins
2024-06-26 7:53 ` Ping: " Xi Ruoyao
@ 2024-06-27 2:39 ` Lulu Cheng
0 siblings, 0 replies; 3+ messages in thread
From: Lulu Cheng @ 2024-06-27 2:39 UTC (permalink / raw)
To: Xi Ruoyao, gcc-patches; +Cc: i, xuchenghua
LGTM!
Thanks very much!
在 2024/6/26 下午3:53, Xi Ruoyao 写道:
> Ping.
>
> On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote:
>> Consider
>>
>> c &= 0xfff;
>> a &= ~0xfff;
>> b &= ~0xfff;
>> a |= c;
>> b |= c;
>>
>> This can be done with 2 bstrins instructions. But we need to
>> recognize
>> it in loongarch_rtx_costs or the compiler will not propagate "c &
>> 0xfff"
>> forward.
>>
>> gcc/ChangeLog:
>>
>> * config/loongarch/loongarch.cc:
>> (loongarch_use_bstrins_for_ior_with_mask): Split the main
>> logic
>> into ...
>> (loongarch_use_bstrins_for_ior_with_mask_1): ... here.
>> (loongarch_rtx_costs): Special case for IOR those can be
>> implemented with bstrins.
>>
>> gcc/testsuite/ChangeLog;
>>
>> * gcc.target/loongarch/bstrins-3.c: New test.
>> ---
>>
>> Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
>>
>> gcc/config/loongarch/loongarch.cc | 73 ++++++++++++++----
>> -
>> .../gcc.target/loongarch/bstrins-3.c | 16 ++++
>> 2 files changed, 72 insertions(+), 17 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>>
>> diff --git a/gcc/config/loongarch/loongarch.cc
>> b/gcc/config/loongarch/loongarch.cc
>> index 6ec3ee62502..256b76d044b 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode
>> mode, unsigned int units)
>> return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
>> }
>>
>> +static int
>> +loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
>> + unsigned HOST_WIDE_INT
>> mask1,
>> + unsigned HOST_WIDE_INT
>> mask2)
>> +{
>> + if (mask1 != ~mask2 || !mask1 || !mask2)
>> + return 0;
>> +
>> + /* Try to avoid a right-shift. */
>> + if (low_bitmask_len (mode, mask1) != -1)
>> + return -1;
>> +
>> + if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
>> + return 1;
>> +
>> + if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
>> + return -1;
>> +
>> + return 0;
>> +}
>> +
>> /* Return the cost of moving between two registers of mode MODE. */
>>
>> static int
>> @@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode,
>> int outer_code,
>> /* Fall through. */
>>
>> case IOR:
>> + {
>> + rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
>> + if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
>> + && (mode == SImode || (TARGET_64BIT && mode == DImode)))
>> + {
>> + rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1],
>> 1);
>> + if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
>> + {
>> + unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
>> + unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
>> + if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
>> + mask0,
>> +
>> mask1))
>> + {
>> + /* A bstrins instruction */
>> + *total = COSTS_N_INSNS (1);
>> +
>> + /* A srai instruction */
>> + if (low_bitmask_len (mode, mask0) == -1
>> + && low_bitmask_len (mode, mask1) == -1)
>> + *total += COSTS_N_INSNS (1);
>> +
>> + for (int i = 0; i < 2; i++)
>> + *total += set_src_cost (XEXP (op[i], 0), mode,
>> speed);
>> +
>> + return true;
>> + }
>> + }
>> + }
>> + }
>> +
>> + /* Fall through. */
>> case XOR:
>> /* Double-word operations use two single-word operations. */
>> *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
>> COSTS_N_INSNS (2),
>> @@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
>> int
>> loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
>> {
>> - unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
>> - unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
>> -
>> - if (mask1 != ~mask2 || !mask1 || !mask2)
>> - return 0;
>> -
>> - /* Try to avoid a right-shift. */
>> - if (low_bitmask_len (mode, mask1) != -1)
>> - return -1;
>> -
>> - if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
>> - return 1;
>> -
>> - if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
>> - return -1;
>> -
>> - return 0;
>> + return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
>> + UINTVAL (op[2]),
>> + UINTVAL (op[4]));
>> }
>>
>> /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
>> diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> new file mode 100644
>> index 00000000000..13762bdef42
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-rtl-final" } */
>> +/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
>> +
>> +struct X {
>> + long a, b;
>> +};
>> +
>> +struct X
>> +test (long a, long b, long c)
>> +{
>> + c &= 0xfff;
>> + a &= ~0xfff;
>> + b &= ~0xfff;
>> + return (struct X){.a = a | c, .b = b | c};
>> +}
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-06-27 2:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-15 17:50 [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins Xi Ruoyao
2024-06-26 7:53 ` Ping: " Xi Ruoyao
2024-06-27 2:39 ` Lulu Cheng
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).