From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 57895 invoked by alias); 2 Sep 2015 12:35:34 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 57644 invoked by uid 89); 2 Sep 2015 12:35:33 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL,BAYES_00,SPF_PASS autolearn=ham version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (207.82.80.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 02 Sep 2015 12:35:27 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by eu-smtp-1.mimecast.com with ESMTP id uk-mta-20-fxAZeCbQSl-2QPeofKpR5w-1; Wed, 02 Sep 2015 13:35:22 +0100 Received: from e103246vm ([10.1.2.79]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Wed, 2 Sep 2015 13:35:22 +0100 From: "Wilco Dijkstra" To: "'GCC Patches'" Subject: [PATCH][AArch64][2/5] Improve immediate generation Date: Wed, 02 Sep 2015 12:35:00 -0000 Message-ID: <000a01d0e57b$d435d420$7ca17c60$@com> MIME-Version: 1.0 X-MC-Unique: fxAZeCbQSl-2QPeofKpR5w-1 Content-Type: text/plain; charset=WINDOWS-1252 Content-Transfer-Encoding: quoted-printable X-SW-Source: 2015-09/txt/msg00140.txt.bz2 aarch64_internal_mov_immediate uses loops iterating over all legal bitmask = immediates to find 2-instruction immediate combinations. One loop is quadratic and despite bei= ng extremely expensive very rarely finds a matching immediate (43 matches in all of SPEC2006 but n= one are emitted in final code), so it can be removed without any effect on code quality. The other l= oop can be replaced by a constant-time search: rather than iterating over all legal bitmask values, = reconstruct a potential bitmask and query the fast aarch64_bitmask_imm. No change in generated code, passes GCC regression tests/bootstrap. ChangeLog: 2015-09-02 Wilco Dijkstra * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate): Replace slow immediate matching loops with a faster algorithm. --- gcc/config/aarch64/aarch64.c | 96 +++++++++++-----------------------------= ---- 1 file changed, 23 insertions(+), 73 deletions(-) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index c0280e6..d6f7cb0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bo= ol generate, unsigned HOST_WIDE_INT mask; int i; bool first; - unsigned HOST_WIDE_INT val; + unsigned HOST_WIDE_INT val, val2; bool subtargets; rtx subtarget; int one_match, zero_match, first_not_ffff_match; @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, = bool generate, } } =20 - /* See if we can do it by arithmetically combining two - immediates. */ - for (i =3D 0; i < AARCH64_NUM_BITMASKS; i++) + if (zero_match !=3D 2 && one_match !=3D 2) { - int j; - mask =3D 0xffff; + /* Try emitting a bitmask immediate with a movk replacing 16 bits. + For a 64-bit bitmask try whether changing 16 bits to all ones or + zeroes creates a valid bitmask. To check any repeated bitmask, + try using 16 bits from the other 32-bit half of val. */ =20 - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) + for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16) { - if (generate) - { - subtarget =3D subtargets ? gen_reg_rtx (DImode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_adddi3 (dest, subtarget, - GEN_INT (val - aarch64_bitmasks[i]))); - } - num_insns +=3D 2; - return num_insns; + val2 =3D val & ~mask; + if (val2 !=3D val && aarch64_bitmask_imm (val2, mode)) + break; + val2 =3D val | mask; + if (val2 !=3D val && aarch64_bitmask_imm (val2, mode)) + break; + val2 =3D val2 & ~mask; + val2 =3D val2 | (((val2 >> 32) | (val2 << 32)) & mask); + if (val2 !=3D val && aarch64_bitmask_imm (val2, mode)) + break; } - - for (j =3D 0; j < 64; j +=3D 16, mask <<=3D 16) + if (i !=3D 64) { - if ((aarch64_bitmasks[i] & ~mask) =3D=3D (val & ~mask)) + if (generate) { - if (generate) - { - emit_insn (gen_rtx_SET (dest, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_insv_immdi (dest, GEN_INT (j), - GEN_INT ((val >> j) & 0xffff))); - } - num_insns +=3D 2; - return num_insns; + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); + emit_insn (gen_insv_immdi (dest, GEN_INT (i), + GEN_INT ((val >> i) & 0xffff))); } - } - } - - /* See if we can do it by logically combining two immediates. */ - for (i =3D 0; i < AARCH64_NUM_BITMASKS; i++) - { - if ((aarch64_bitmasks[i] & val) =3D=3D aarch64_bitmasks[i]) - { - int j; - - for (j =3D i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val =3D=3D (aarch64_bitmasks[i] | aarch64_bitmasks[j])) - { - if (generate) - { - subtarget =3D subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[i]))); - emit_insn (gen_iordi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[j]))); - } - num_insns +=3D 2; - return num_insns; - } - } - else if ((val & aarch64_bitmasks[i]) =3D=3D val) - { - int j; - - for (j =3D i + 1; j < AARCH64_NUM_BITMASKS; j++) - if (val =3D=3D (aarch64_bitmasks[j] & aarch64_bitmasks[i])) - { - if (generate) - { - subtarget =3D subtargets ? gen_reg_rtx (mode) : dest; - emit_insn (gen_rtx_SET (subtarget, - GEN_INT (aarch64_bitmasks[j]))); - emit_insn (gen_anddi3 (dest, subtarget, - GEN_INT (aarch64_bitmasks[i]))); - } - num_insns +=3D 2; - return num_insns; - } + return 2; } } =20 --=20 1.8.3