From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27636 invoked by alias); 7 Aug 2014 12:57:13 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 27563 invoked by uid 89); 7 Aug 2014 12:57:07 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL,BAYES_00,SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 07 Aug 2014 12:57:05 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Thu, 07 Aug 2014 13:57:03 +0100 Received: from [10.1.208.24] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Thu, 7 Aug 2014 13:57:00 +0100 Message-ID: <53E3779C.1020709@arm.com> Date: Thu, 07 Aug 2014 12:57:00 -0000 From: Kyrill Tkachov User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0 MIME-Version: 1.0 To: Richard Earnshaw CC: gcc-patches Subject: Re: [PATCH, AArch64] Use MOVN to generate 64-bit negative immediates where sensible References: <000001cf6ae4$059c2280$10d46780$@bolton@arm.com> <537605FF.8070700@arm.com> <53E363C2.4000405@arm.com> <53E37532.60101@arm.com> In-Reply-To: <53E37532.60101@arm.com> X-MC-Unique: 114080713570300201 Content-Type: multipart/mixed; boundary="------------020008020103060303020108" X-IsSubscribed: yes X-SW-Source: 2014-08/txt/msg00806.txt.bz2 This is a multi-part message in MIME format. --------------020008020103060303020108 Content-Type: text/plain; charset=WINDOWS-1252; format=flowed Content-Transfer-Encoding: quoted-printable Content-length: 5404 On 07/08/14 13:46, Richard Earnshaw wrote: > On 07/08/14 12:32, Kyrill Tkachov wrote: >> On 16/05/14 13:35, Richard Earnshaw wrote: >>> On 08/05/14 18:36, Ian Bolton wrote: >>>> Hi, >>>> >>>> It currently takes 4 instructions to generate certain immediates on >>>> AArch64 (unless we put them in the constant pool). >>>> >>>> For example ... >>>> >>>> long long >>>> ffffbeefcafebabe () >>>> { >>>> return 0xFFFFBEEFCAFEBABEll; >>>> } >>>> >>>> leads to ... >>>> >>>> mov x0, 0x47806 >>>> mov x0, 0xcafe, lsl 16 >>>> mov x0, 0xbeef, lsl 32 >>>> orr x0, x0, -281474976710656 >>>> >>>> The above case is tackled in this patch by employing MOVN >>>> to generate the top 32-bits in a single instruction ... >>>> >>>> mov x0, -71536975282177 >>>> movk x0, 0xcafe, lsl 16 >>>> movk x0, 0xbabe, lsl 0 >>>> >>>> Note that where at least two half-words are 0xffff, existing >>>> code that does the immediate in two instructions is still used.) >>>> >>>> Tested on standard gcc regressions and the attached test case. >>>> >>>> OK for commit? >>> What about: >>> >>> long long a() >>> { >>> return 0x1234ffff56789abcll; >>> } >>> >>> long long b() >>> { >>> return 0x12345678ffff9abcll; >>> } >>> >>> long long c() >>> { >>> return 0x123456789abcffffll; >>> } >>> >>> ? >>> >>> Surely these can also benefit from this sort of optimization, but it >>> looks as though you only handle the top 16 bits being set. >> Hi Richard, >> >> How about this rework of the patch? >> >> For code: >> >> long long foo () >> { >> return 0xFFFFBEEFCAFEBABEll; >> } >> >> long long a() >> { >> return 0x1234ffff56789abcll; >> } >> >> long long b() >> { >> return 0x12345678ffff9abcll; >> } >> >> long long c() >> { >> return 0x123456789abcffffll; >> } >> >> we now generate: >> foo: >> mov x0, -17730 >> movk x0, 0xcafe, lsl 16 >> movk x0, 0xbeef, lsl 32 >> ret >> .size foo, .-foo >> .align 2 >> .global a >> .type a, %function >> a: >> mov x0, -25924 >> movk x0, 0x5678, lsl 16 >> movk x0, 0x1234, lsl 48 >> ret >> .size a, .-a >> .align 2 >> .global b >> .type b, %function >> b: >> mov x0, -25924 >> movk x0, 0x5678, lsl 32 >> movk x0, 0x1234, lsl 48 >> ret >> .size b, .-b >> .align 2 >> .global c >> .type c, %function >> c: >> mov x0, -1698889729 >> movk x0, 0x5678, lsl 32 >> movk x0, 0x1234, lsl 48 >> ret >> >> >> 3 instructions are used in each case. >> >> Thanks, >> Kyrill >> >> 2014-08-07 Ian Bolton >> Kyrylo Tkachov >> >> * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): >> Use MOVN when one of the half-words is 0xffff. >> >> >> aarch64-movn-pattern-patch-v3.patch >> >> >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index 0a7f441..2db91c7 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) >> unsigned HOST_WIDE_INT val; >> bool subtargets; >> rtx subtarget; >> - int one_match, zero_match; >> + int one_match, zero_match, first_not_ffff_match; >>=20=20=20 >> gcc_assert (mode =3D=3D SImode || mode =3D=3D DImode); >>=20=20=20 >> @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) >> one_match =3D 0; >> zero_match =3D 0; >> mask =3D 0xffff; >> + first_not_ffff_match =3D -1; >>=20=20=20 >> for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16) >> { >> - if ((val & mask) =3D=3D 0) >> - zero_match++; >> - else if ((val & mask) =3D=3D mask) >> + if ((val & mask) =3D=3D mask) >> one_match++; >> + else >> + { >> + if (first_not_ffff_match < 0) >> + first_not_ffff_match =3D i; >> + if ((val & mask) =3D=3D 0) >> + zero_match++; >> + } >> } >>=20=20=20 >> if (one_match =3D=3D 2) >> { >> - mask =3D 0xffff; >> - for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16) >> + /* Set one of the quarters and then insert back into result. */ >> + mask =3D 0xffffll << first_not_ffff_match; >> + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); >> + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), >> + GEN_INT ((val >> first_not_ffff_match) >> + & 0xffff))); >> + return; >> + } >> + >> + if (one_match =3D=3D 1) > I think this should be (one_match > zero_match). > > Otherwise constants such as > > > 0x00001234ffff0000ll > > might end up taking three rather than two insns. You're right, we generate: mov x0, -65536 movk x0, 0x1234, lsl 32 and x0, x0, 281474976710655 with your suggestion we can improve this to: mov x0, 4294901760 movk x0, 0x1234, lsl 32 Ok with that change then? Kyrill 2014-08-07 Ian Bolton Kyrylo Tkachov * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Use MOVN when one of the half-words is 0xffff. --------------020008020103060303020108 Content-Type: text/x-patch; name=aarch64-movn-pattern-patch-v3.patch Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="aarch64-movn-pattern-patch-v3.patch" Content-length: 2309 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 0a7f441..2db91c7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1005,7 +1005,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) unsigned HOST_WIDE_INT val; bool subtargets; rtx subtarget; - int one_match, zero_match; + int one_match, zero_match, first_not_ffff_match; =20 gcc_assert (mode =3D=3D SImode || mode =3D=3D DImode); =20 @@ -1106,29 +1106,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) one_match =3D 0; zero_match =3D 0; mask =3D 0xffff; + first_not_ffff_match =3D -1; =20 for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16) { - if ((val & mask) =3D=3D 0) - zero_match++; - else if ((val & mask) =3D=3D mask) + if ((val & mask) =3D=3D mask) one_match++; + else + { + if (first_not_ffff_match < 0) + first_not_ffff_match =3D i; + if ((val & mask) =3D=3D 0) + zero_match++; + } } =20 if (one_match =3D=3D 2) { - mask =3D 0xffff; - for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16) + /* Set one of the quarters and then insert back into result. */ + mask =3D 0xffffll << first_not_ffff_match; + emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); + emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), + GEN_INT ((val >> first_not_ffff_match) + & 0xffff))); + return; + } + + if (one_match > zero_match) + { + /* Set either first three quarters or all but the third. */ + mask =3D 0xffffll << (16 - first_not_ffff_match); + emit_insn (gen_rtx_SET (VOIDmode, dest, + GEN_INT (val | mask | 0xffffffff00000000ull))); + + /* Now insert other two quarters. */ + for (i =3D first_not_ffff_match + 16, mask <<=3D (first_not_ffff_mat= ch << 1); + i < 64; i +=3D 16, mask <<=3D 16) { if ((val & mask) !=3D mask) - { - emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); - emit_insn (gen_insv_immdi (dest, GEN_INT (i), - GEN_INT ((val >> i) & 0xffff))); - return; - } + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); } - gcc_unreachable (); + return; } =20 if (zero_match =3D=3D 2)= --------------020008020103060303020108--