From: Lulu Cheng <chenglulu@loongson.cn>
To: Xi Ruoyao <xry111@xry111.site>, gcc-patches@gcc.gnu.org
Cc: i@xen0n.name, xuchenghua@loongson.cn
Subject: Re: [pushed][PATCH v4] LoongArch: Optimize immediate load.
Date: Mon, 28 Nov 2022 10:46:19 +0800 [thread overview]
Message-ID: <85e70380-399e-41dc-127f-bd442d371e85@loongson.cn> (raw)
In-Reply-To: <32b7624b24d1f48805d4c777ebde1380fd3d1596.camel@xry111.site>
Pushed r13-4315.
在 2022/11/23 上午12:44, Xi Ruoyao 写道:
> On Tue, 2022-11-22 at 22:03 +0800, Xi Ruoyao via Gcc-patches wrote:
>> While I still can't fully understand the immediate load issue and how
>> this patch fix it, I've tested this patch (alongside the prefetch
>> instruction patch) with bootstrap-ubsan. And the compiled result of
>> imm-load1.c seems OK.
> And it's doing correct thing for Glibc "improved generic string
> functions" patch, producing some really tight loop now.
>
>> On Thu, 2022-11-17 at 17:59 +0800, Lulu Cheng wrote:
>>> v1 -> v2:
>>> 1. Change the code format.
>>> 2. Fix bugs in the code.
>>>
>>> v2 -> v3:
>>> Modifying a code implementation of an undefined behavior.
>>>
>>> v3 -> v4:
>>> Move the part of the immediate number decomposition from expand pass
>>> to split
>>> pass.
>>>
>>> Both regression tests and spec2006 passed.
>>>
>>> The problem mentioned in the link does not move the four immediate
>>> load
>>> instructions out of the loop. It has been optimized. Now, as in the
>>> test case,
>>> four immediate load instructions are generated outside the loop.
>>> (
>>> https://sourceware.org/pipermail/libc-alpha/2022-September/142202.html
>>> )
>>>
>>> --------------------------------------------------------------------
>>> Because loop2_invariant pass will extract the instructions that do
>>> not
>>> change
>>> in the loop out of the loop, some instructions will not meet the
>>> extraction
>>> conditions if the machine performs immediate decomposition while
>>> expand pass,
>>> so the immediate decomposition will be transferred to the split
>>> process.
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/loongarch/loongarch.cc (enum
>>> loongarch_load_imm_method):
>>> Remove the member METHOD_INSV that is not currently used.
>>> (struct loongarch_integer_op): Define a new member
>>> curr_value,
>>> that records the value of the number stored in the
>>> destination
>>> register immediately after the current instruction has run.
>>> (loongarch_build_integer): Assign a value to the curr_value
>>> member variable.
>>> (loongarch_move_integer): Adds information for the immediate
>>> load instruction.
>>> * config/loongarch/loongarch.md (*movdi_32bit): Redefine as
>>> define_insn_and_split.
>>> (*movdi_64bit): Likewise.
>>> (*movsi_internal): Likewise.
>>> (*movhi_internal): Likewise.
>>> * config/loongarch/predicates.md: Return true as long as it
>>> is
>>> CONST_INT, ensure
>>> that the immediate number is not optimized by decomposition
>>> during expand
>>> optimization loop.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc.target/loongarch/imm-load.c: New test.
>>> * gcc.target/loongarch/imm-load1.c: New test.
>>> ---
>>> gcc/config/loongarch/loongarch.cc | 62 ++++++++++------
>>> --
>>> -
>>> gcc/config/loongarch/loongarch.md | 44 +++++++++++--
>>> gcc/config/loongarch/predicates.md | 2 +-
>>> gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++
>>> .../gcc.target/loongarch/imm-load1.c | 26 ++++++++
>>> 5 files changed, 110 insertions(+), 34 deletions(-)
>>> create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c
>>> create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c
>>>
>>> diff --git a/gcc/config/loongarch/loongarch.cc
>>> b/gcc/config/loongarch/loongarch.cc
>>> index 8ee32c90573..9e0d6c7c3ea 100644
>>> --- a/gcc/config/loongarch/loongarch.cc
>>> +++ b/gcc/config/loongarch/loongarch.cc
>>> @@ -139,22 +139,21 @@ struct loongarch_address_info
>>>
>>> METHOD_LU52I:
>>> Load 52-63 bit of the immediate number.
>>> -
>>> - METHOD_INSV:
>>> - immediate like 0xfff00000fffffxxx
>>> - */
>>> +*/
>>> enum loongarch_load_imm_method
>>> {
>>> METHOD_NORMAL,
>>> METHOD_LU32I,
>>> - METHOD_LU52I,
>>> - METHOD_INSV
>>> + METHOD_LU52I
>>> };
>>>
>>> struct loongarch_integer_op
>>> {
>>> enum rtx_code code;
>>> HOST_WIDE_INT value;
>>> + /* Represent the result of the immediate count of the load
>>> instruction at
>>> + each step. */
>>> + HOST_WIDE_INT curr_value;
>>> enum loongarch_load_imm_method method;
>>> };
>>>
>>> @@ -1475,24 +1474,27 @@ loongarch_build_integer (struct
>>> loongarch_integer_op *codes,
>>> {
>>> /* The value of the lower 32 bit be loaded with one
>>> instruction.
>>> lu12i.w. */
>>> - codes[0].code = UNKNOWN;
>>> - codes[0].method = METHOD_NORMAL;
>>> - codes[0].value = low_part;
>>> + codes[cost].code = UNKNOWN;
>>> + codes[cost].method = METHOD_NORMAL;
>>> + codes[cost].value = low_part;
>>> + codes[cost].curr_value = low_part;
>>> cost++;
>>> }
>>> else
>>> {
>>> /* lu12i.w + ior. */
>>> - codes[0].code = UNKNOWN;
>>> - codes[0].method = METHOD_NORMAL;
>>> - codes[0].value = low_part & ~(IMM_REACH - 1);
>>> + codes[cost].code = UNKNOWN;
>>> + codes[cost].method = METHOD_NORMAL;
>>> + codes[cost].value = low_part & ~(IMM_REACH - 1);
>>> + codes[cost].curr_value = codes[cost].value;
>>> cost++;
>>> HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
>>> if (iorv != 0)
>>> {
>>> - codes[1].code = IOR;
>>> - codes[1].method = METHOD_NORMAL;
>>> - codes[1].value = iorv;
>>> + codes[cost].code = IOR;
>>> + codes[cost].method = METHOD_NORMAL;
>>> + codes[cost].value = iorv;
>>> + codes[cost].curr_value = low_part;
>>> cost++;
>>> }
>>> }
>>> @@ -1515,11 +1517,14 @@ loongarch_build_integer (struct
>>> loongarch_integer_op *codes,
>>> {
>>> codes[cost].method = METHOD_LU52I;
>>> codes[cost].value = value & LU52I_B;
>>> + codes[cost].curr_value = value;
>>> return cost + 1;
>>> }
>>>
>>> codes[cost].method = METHOD_LU32I;
>>> codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B :
>>> 0);
>>> + codes[cost].curr_value = (value & 0xfffffffffffff)
>>> + | (sign51 ? LU52I_B : 0);
>>> cost++;
>>>
>>> /* Determine whether the 52-61 bits are sign-extended from
>>> the
>>> low order,
>>> @@ -1528,6 +1533,7 @@ loongarch_build_integer (struct
>>> loongarch_integer_op *codes,
>>> {
>>> codes[cost].method = METHOD_LU52I;
>>> codes[cost].value = value & LU52I_B;
>>> + codes[cost].curr_value = value;
>>> cost++;
>>> }
>>> }
>>> @@ -2911,6 +2917,9 @@ loongarch_move_integer (rtx temp, rtx dest,
>>> unsigned HOST_WIDE_INT value)
>>> else
>>> x = force_reg (mode, x);
>>>
>>> + set_unique_reg_note (get_last_insn (), REG_EQUAL,
>>> + GEN_INT (codes[i-1].curr_value));
>>> +
>>> switch (codes[i].method)
>>> {
>>> case METHOD_NORMAL:
>>> @@ -2918,22 +2927,17 @@ loongarch_move_integer (rtx temp, rtx dest,
>>> unsigned HOST_WIDE_INT value)
>>> GEN_INT (codes[i].value));
>>> break;
>>> case METHOD_LU32I:
>>> - emit_insn (
>>> - gen_rtx_SET (x,
>>> - gen_rtx_IOR (DImode,
>>> - gen_rtx_ZERO_EXTEND (
>>> - DImode, gen_rtx_SUBREG
>>> (SImode, x, 0)),
>>> - GEN_INT (codes[i].value))));
>>> + gcc_assert (mode == DImode);
>>> + x = gen_rtx_IOR (DImode,
>>> + gen_rtx_ZERO_EXTEND (DImode,
>>> + gen_rtx_SUBREG
>>> (SImode, x, 0)),
>>> + GEN_INT (codes[i].value));
>>> break;
>>> case METHOD_LU52I:
>>> - emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
>>> - GEN_INT (codes[i].value)));
>>> - break;
>>> - case METHOD_INSV:
>>> - emit_insn (
>>> - gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT
>>> (20),
>>> - GEN_INT (32)),
>>> - gen_rtx_REG (DImode, 0)));
>>> + gcc_assert (mode == DImode);
>>> + x = gen_rtx_IOR (DImode,
>>> + gen_rtx_AND (DImode, x, GEN_INT
>>> (0xfffffffffffff)),
>>> + GEN_INT (codes[i].value));
>>> break;
>>> default:
>>> gcc_unreachable ();
>>> diff --git a/gcc/config/loongarch/loongarch.md
>>> b/gcc/config/loongarch/loongarch.md
>>> index 2fda5381904..f61db66d535 100644
>>> --- a/gcc/config/loongarch/loongarch.md
>>> +++ b/gcc/config/loongarch/loongarch.md
>>> @@ -1718,23 +1718,41 @@ (define_expand "movdi"
>>> DONE;
>>> })
>>>
>>> -(define_insn "*movdi_32bit"
>>> +(define_insn_and_split "*movdi_32bit"
>>> [(set (match_operand:DI 0 "nonimmediate_operand"
>>> "=r,r,r,w,*f,*f,*r,*m")
>>> (match_operand:DI 1 "move_operand"
>>> "r,i,w,r,*J*r,*m,*f,*f"))]
>>> "!TARGET_64BIT
>>> && (register_operand (operands[0], DImode)
>>> || reg_or_0_operand (operands[1], DImode))"
>>> { return loongarch_output_move (operands[0], operands[1]); }
>>> + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P
>>> (REGNO
>>> + (operands[0]))"
>>> + [(const_int 0)]
>>> + "
>>> +{
>>> + loongarch_move_integer (operands[0], operands[0], INTVAL
>>> (operands[1]));
>>> + DONE;
>>> +}
>>> + "
>>> [(set_attr "move_type"
>>> "move,const,load,store,mgtf,fpload,mftg,fpstore")
>>> (set_attr "mode" "DI")])
>>>
>>> -(define_insn "*movdi_64bit"
>>> +(define_insn_and_split "*movdi_64bit"
>>> [(set (match_operand:DI 0 "nonimmediate_operand"
>>> "=r,r,r,w,*f,*f,*r,*m")
>>> (match_operand:DI 1 "move_operand"
>>> "r,Yd,w,rJ,*r*J,*m,*f,*f"))]
>>> "TARGET_64BIT
>>> && (register_operand (operands[0], DImode)
>>> || reg_or_0_operand (operands[1], DImode))"
>>> { return loongarch_output_move (operands[0], operands[1]); }
>>> + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P
>>> (REGNO
>>> + (operands[0]))"
>>> + [(const_int 0)]
>>> + "
>>> +{
>>> + loongarch_move_integer (operands[0], operands[0], INTVAL
>>> (operands[1]));
>>> + DONE;
>>> +}
>>> + "
>>> [(set_attr "move_type"
>>> "move,const,load,store,mgtf,fpload,mftg,fpstore")
>>> (set_attr "mode" "DI")])
>>>
>>> @@ -1749,12 +1767,21 @@ (define_expand "movsi"
>>> DONE;
>>> })
>>>
>>> -(define_insn "*movsi_internal"
>>> +(define_insn_and_split "*movsi_internal"
>>> [(set (match_operand:SI 0 "nonimmediate_operand"
>>> "=r,r,r,w,*f,*f,*r,*m,*r,*z")
>>> (match_operand:SI 1 "move_operand"
>>> "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))]
>>> "(register_operand (operands[0], SImode)
>>> || reg_or_0_operand (operands[1], SImode))"
>>> { return loongarch_output_move (operands[0], operands[1]); }
>>> + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P
>>> (REGNO
>>> + (operands[0]))"
>>> + [(const_int 0)]
>>> + "
>>> +{
>>> + loongarch_move_integer (operands[0], operands[0], INTVAL
>>> (operands[1]));
>>> + DONE;
>>> +}
>>> + "
>>> [(set_attr "move_type"
>>> "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
>>> (set_attr "mode" "SI")])
>>>
>>> @@ -1774,12 +1801,21 @@ (define_expand "movhi"
>>> DONE;
>>> })
>>>
>>> -(define_insn "*movhi_internal"
>>> +(define_insn_and_split "*movhi_internal"
>>> [(set (match_operand:HI 0 "nonimmediate_operand"
>>> "=r,r,r,r,m,r,k")
>>> (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
>>> "(register_operand (operands[0], HImode)
>>> || reg_or_0_operand (operands[1], HImode))"
>>> { return loongarch_output_move (operands[0], operands[1]); }
>>> + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P
>>> (REGNO
>>> + (operands[0]))"
>>> + [(const_int 0)]
>>> + "
>>> +{
>>> + loongarch_move_integer (operands[0], operands[0], INTVAL
>>> (operands[1]));
>>> + DONE;
>>> +}
>>> + "
>>> [(set_attr "move_type" "move,const,const,load,store,load,store")
>>> (set_attr "mode" "HI")])
>>>
>>> diff --git a/gcc/config/loongarch/predicates.md
>>> b/gcc/config/loongarch/predicates.md
>>> index 8bd0c1376c9..58c3dc2261c 100644
>>> --- a/gcc/config/loongarch/predicates.md
>>> +++ b/gcc/config/loongarch/predicates.md
>>> @@ -226,7 +226,7 @@ (define_predicate "move_operand"
>>> switch (GET_CODE (op))
>>> {
>>> case CONST_INT:
>>> - return !splittable_const_int_operand (op, mode);
>>> + return true;
>>>
>>> case CONST:
>>> case SYMBOL_REF:
>>> diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c
>>> b/gcc/testsuite/gcc.target/loongarch/imm-load.c
>>> new file mode 100644
>>> index 00000000000..c04ca33996f
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c
>>> @@ -0,0 +1,10 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */
>>> +
>>> +long int
>>> +test (void)
>>> +{
>>> + return 0x1234567890abcdef;
>>> +}
>>> +/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6
>>> "split1" } } */
>>> +
>>> diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
>>> b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
>>> new file mode 100644
>>> index 00000000000..2ff02971239
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
>>> @@ -0,0 +1,26 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-mabi=lp64d -O2" } */
>>> +/* { dg-final { scan-assembler
>>> "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
>>> +
>>> +
>>> +extern long long b[10];
>>> +static inline long long
>>> +repeat_bytes (void)
>>> +{
>>> + long long r = 0x0101010101010101;
>>> +
>>> + return r;
>>> +}
>>> +
>>> +static inline long long
>>> +highbit_mask (long long m)
>>> +{
>>> + return m & repeat_bytes ();
>>> +}
>>> +
>>> +void test(long long *a)
>>> +{
>>> + for (int i = 0; i < 10; i++)
>>> + b[i] = highbit_mask (a[i]);
>>> +
>>> +}
prev parent reply other threads:[~2022-11-28 2:46 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-17 9:59 [PATCH " Lulu Cheng
2022-11-22 14:03 ` Xi Ruoyao
2022-11-22 16:44 ` Xi Ruoyao
2022-11-23 2:12 ` chenglulu
2022-11-28 2:46 ` Lulu Cheng [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=85e70380-399e-41dc-127f-bd442d371e85@loongson.cn \
--to=chenglulu@loongson.cn \
--cc=gcc-patches@gcc.gnu.org \
--cc=i@xen0n.name \
--cc=xry111@xry111.site \
--cc=xuchenghua@loongson.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).