From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 106067 invoked by alias); 18 Sep 2015 14:33:02 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 106039 invoked by uid 89); 18 Sep 2015 14:33:01 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL,BAYES_00,SPF_PASS,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: cam-smtp0.cambridge.arm.com Received: from fw-tnat.cambridge.arm.com (HELO cam-smtp0.cambridge.arm.com) (217.140.96.140) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES256-SHA encrypted) ESMTPS; Fri, 18 Sep 2015 14:33:00 +0000 Received: from arm.com (e107456-lin.cambridge.arm.com [10.2.207.14]) by cam-smtp0.cambridge.arm.com (8.13.8/8.13.8) with ESMTP id t8IEWumY017840; Fri, 18 Sep 2015 15:32:56 +0100 Date: Fri, 18 Sep 2015 14:38:00 -0000 From: James Greenhalgh To: Wilco Dijkstra Cc: "'GCC Patches'" Subject: Re: [PATCH][AArch64][5/5] Improve immediate generation Message-ID: <20150918143256.GE16108@arm.com> References: <000d01d0e57b$fd70a360$f851ea20$@com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <000d01d0e57b$fd70a360$f851ea20$@com> User-Agent: Mutt/1.5.21 (2010-09-15) X-IsSubscribed: yes X-SW-Source: 2015-09/txt/msg01422.txt.bz2 On Wed, Sep 02, 2015 at 01:36:28PM +0100, Wilco Dijkstra wrote: > Cleanup the remainder of aarch64_internal_mov_immediate. Compute the number > of 16-bit aligned 16-bit masks that are all-zeroes or all-ones, and emit the > smallest sequence using a single loop skipping either all-ones or all-zeroes. > > Passes GCC regression tests/bootstrap. Minor changes in generated code for > some special cases but codesize is identical. OK. Thanks, James > > ChangeLog: > 2015-09-02 Wilco Dijkstra > > * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate): > Cleanup immediate generation code. > > --- > gcc/config/aarch64/aarch64.c | 137 ++++++++++++------------------------------- > 1 file changed, 39 insertions(+), 98 deletions(-) > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index af9a3d3..ca4428a 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1367,75 +1367,42 @@ static int > aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, > machine_mode mode) > { > - unsigned HOST_WIDE_INT mask; > int i; > - bool first; > - unsigned HOST_WIDE_INT val, val2; > - int one_match, zero_match, first_not_ffff_match; > - int num_insns = 0; > + unsigned HOST_WIDE_INT val, val2, mask; > + int one_match, zero_match; > + int num_insns; > > - if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) > + val = INTVAL (imm); > + > + if (aarch64_move_imm (val, mode)) > { > if (generate) > emit_insn (gen_rtx_SET (dest, imm)); > - num_insns++; > - return num_insns; > + return 1; > } > > - if (mode == SImode) > + if ((val >> 32) == 0 || mode == SImode) > { > - /* We know we can't do this in 1 insn, and we must be able to do it > - in two; so don't mess around looking for sequences that don't buy > - us anything. */ > if (generate) > { > - emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff))); > - emit_insn (gen_insv_immsi (dest, GEN_INT (16), > - GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); > + emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff))); > + if (mode == SImode) > + emit_insn (gen_insv_immsi (dest, GEN_INT (16), > + GEN_INT ((val >> 16) & 0xffff))); > + else > + emit_insn (gen_insv_immdi (dest, GEN_INT (16), > + GEN_INT ((val >> 16) & 0xffff))); > } > - num_insns += 2; > - return num_insns; > + return 2; > } > > /* Remaining cases are all for DImode. */ > > - val = INTVAL (imm); > - > - one_match = 0; > - zero_match = 0; > mask = 0xffff; > - first_not_ffff_match = -1; > - > - for (i = 0; i < 64; i += 16, mask <<= 16) > - { > - if ((val & mask) == mask) > - one_match++; > - else > - { > - if (first_not_ffff_match < 0) > - first_not_ffff_match = i; > - if ((val & mask) == 0) > - zero_match++; > - } > - } > - > - if (one_match == 2) > - { > - /* Set one of the quarters and then insert back into result. */ > - mask = 0xffffll << first_not_ffff_match; > - if (generate) > - { > - emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask))); > - emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), > - GEN_INT ((val >> first_not_ffff_match) > - & 0xffff))); > - } > - num_insns += 2; > - return num_insns; > - } > - > - if (zero_match == 2) > - goto simple_sequence; > + zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) + > + ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0); > + one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + > + ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); > > if (zero_match != 2 && one_match != 2) > { > @@ -1463,58 +1430,32 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, > { > emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); > emit_insn (gen_insv_immdi (dest, GEN_INT (i), > - GEN_INT ((val >> i) & 0xffff))); > + GEN_INT ((val >> i) & 0xffff))); > } > - return 2; > } > } > > - if (one_match > zero_match) > - { > - /* Set either first three quarters or all but the third. */ > - mask = 0xffffll << (16 - first_not_ffff_match); > - if (generate) > - emit_insn (gen_rtx_SET (dest, > - GEN_INT (val | mask | 0xffffffff00000000ull))); > - num_insns ++; > + /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which > + are emitted by the initial mov. If one_match > zero_match, skip set bits, > + otherwise skip zero bits. */ > > - /* Now insert other two quarters. */ > - for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); > - i < 64; i += 16, mask <<= 16) > - { > - if ((val & mask) != mask) > - { > - if (generate) > - emit_insn (gen_insv_immdi (dest, GEN_INT (i), > - GEN_INT ((val >> i) & 0xffff))); > - num_insns ++; > - } > - } > - return num_insns; > - } > - > - simple_sequence: > - first = true; > + num_insns = 1; > mask = 0xffff; > - for (i = 0; i < 64; i += 16, mask <<= 16) > + val2 = one_match > zero_match ? ~val : val; > + i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32; > + > + if (generate) > + emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match > + ? (val | ~(mask << i)) > + : (val & (mask << i))))); > + for (i += 16; i < 64; i += 16) > { > - if ((val & mask) != 0) > - { > - if (first) > - { > - if (generate) > - emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask))); > - num_insns ++; > - first = false; > - } > - else > - { > - if (generate) > - emit_insn (gen_insv_immdi (dest, GEN_INT (i), > - GEN_INT ((val >> i) & 0xffff))); > - num_insns ++; > - } > - } > + if ((val2 & (mask << i)) == 0) > + continue; > + if (generate) > + emit_insn (gen_insv_immdi (dest, GEN_INT (i), > + GEN_INT ((val >> i) & 0xffff))); > + num_insns ++; > } > > return num_insns; > -- > 1.8.3 > > >