From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 4984B3858D33 for ; Wed, 25 Oct 2023 10:00:43 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 4984B3858D33 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=foss.arm.com Authentication-Results: sourceware.org; spf=none smtp.mailfrom=foss.arm.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 4984B3858D33 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698228048; cv=none; b=NygNvVBfyGFt+L+aHHRiIdSFOAaIkslwjaWUpchwCt/KxTrNpaRo8A+6AVbdLvpUoQnUjrv9uBtu9yvFpS/oETFbxVp/fzaIOn45gPC+mikAVU4EZQvWvDKijERFnF4SOMVWsOBSj2u6YBPunlLaAiHx6EfDU6VDNCZ+mtM1yqE= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698228048; c=relaxed/simple; bh=gKpZzauOahFq95cQcb4mclZJ2FNOLXpPZMdteaLd7U8=; h=Message-ID:Date:MIME-Version:Subject:To:From; b=XblA9iCSyHtr5bE8b8DHiT3lxaJKCew/VSRL1kFan1kDxR4BbcV8wkmIDgqgNMPTj9hervkLxB0Nr+7FbRSRQbTej2Lc8txjAQLxEOGShbQHK8aKWVkYWYXULqvONOWHsEUnLu+Hz2OUgKNXLJQGDnCuIqtpoJVrWG19ZRSxdgo= ARC-Authentication-Results: i=1; server2.sourceware.org Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 61BB62F4; Wed, 25 Oct 2023 03:01:24 -0700 (PDT) Received: from [10.57.3.253] (unknown [10.57.3.253]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 43D623F738; Wed, 25 Oct 2023 03:00:42 -0700 (PDT) Message-ID: <06249542-f74e-4696-b1f1-54b9ca4c4868@foss.arm.com> Date: Wed, 25 Oct 2023 11:00:40 +0100 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird Subject: Re: [PATCH v2] AArch64: Improve immediate generation Content-Language: en-GB To: Wilco Dijkstra , Richard Earnshaw Cc: GCC Patches References: From: Richard Earnshaw In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-3495.1 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,KAM_LAZY_DOMAIN_SECURITY,KAM_LOTSOFHASH,KAM_SHORT,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On 24/10/2023 18:27, Wilco Dijkstra wrote: > v2: Use check-function-bodies in tests > > Further improve immediate generation by adding support for 2-instruction > MOV/EOR bitmask immediates. This reduces the number of 3/4-instruction > immediates in SPECCPU2017 by ~2%. > > Passes regress, OK for commit? > > gcc/ChangeLog: > * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) > Add support for immediates using MOV/EOR bitmask. > > gcc/testsuite: > * gcc.target/aarch64/imm_choice_comparison.c: Change tests. > * gcc.target/aarch64/moveor_imm.c: Add new test. > * gcc.target/aarch64/pr106583.c: Change tests. > > --- > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 578a253d6e0e133e19592553fc873b3e73f9f218..ed5be2b64c9a767d74e9d78415da964c669001aa 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -5748,6 +5748,26 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, > } > return 2; > } > + > + /* Try 2 bitmask immediates which are xor'd together. */ > + for (i = 0; i < 64; i += 16) > + { > + val2 = (val >> i) & mask; > + val2 |= val2 << 16; > + val2 |= val2 << 32; > + if (aarch64_bitmask_imm (val2) && aarch64_bitmask_imm (val ^ val2)) > + break; > + } > + > + if (i != 64) > + { > + if (generate) > + { > + emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); > + emit_insn (gen_xordi3 (dest, dest, GEN_INT (val ^ val2))); > + } > + return 2; > + } > } > > /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ > diff --git a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > index ebc44d6dbc7287d907603d77d7b54496de177c4b..a1fc90ad73411ae8ed848fa321586afcb8d710aa 100644 > --- a/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > +++ b/gcc/testsuite/gcc.target/aarch64/imm_choice_comparison.c > @@ -1,32 +1,64 @@ > /* { dg-do compile } */ > /* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > > /* Go from four moves to two. */ > > +/* > +** foo: > +** mov w[0-9]+, 2576980377 > +** movk x[0-9]+, 0x9999, lsl 32 > +** ... > +*/ > + > int > foo (long long x) > { > - return x <= 0x1999999999999998; > + return x <= 0x0000999999999998; > } > > +/* > +** GT: > +** mov w[0-9]+, -16777217 > +** ... > +*/ > + > int > GT (unsigned int x) > { > return x > 0xfefffffe; > } > > +/* > +** LE: > +** mov w[0-9]+, -16777217 > +** ... > +*/ > + > int > LE (unsigned int x) > { > return x <= 0xfefffffe; > } > > +/* > +** GE: > +** mov w[0-9]+, 4278190079 > +** ... > +*/ > + > int > GE (long long x) > { > return x >= 0xff000000; > } > > +/* > +** LT: > +** mov w[0-9]+, -16777217 > +** ... > +*/ > + > int > LT (int x) > { > @@ -35,6 +67,13 @@ LT (int x) > > /* Optimize the immediate in conditionals. */ > > +/* > +** check: > +** ... > +** mov w[0-9]+, -16777217 > +** ... > +*/ > + > int > check (int x, int y) > { > @@ -44,11 +83,15 @@ check (int x, int y) > return x; > } > > +/* > +** tern: > +** ... > +** mov w[0-9]+, -16777217 > +** ... > +*/ > + > int > tern (int x) > { > return x >= 0xff000000 ? 5 : -3; > } > - > -/* baz produces one movk instruction. */ > -/* { dg-final { scan-assembler-times "movk" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/moveor_imm.c b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c > new file mode 100644 > index 0000000000000000000000000000000000000000..1c0c3f3bf8c588f9661112a8b3f9a72c5ddff95c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/moveor_imm.c > @@ -0,0 +1,63 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** f1: > +** mov x0, -6148914691236517206 > +** eor x0, x0, -9223372036854775807 > +** ret > +*/ Some odd white space above. Also, I think it would be better to write the tests as ** f1: ** ... ** ** ... Then different prologue and epilogue options (such as BTI or pac-ret) won't affect the tests. > + > +long f1 (void) > +{ > + return 0x2aaaaaaaaaaaaaab; > +} > + > +/* > +** f2: > +** mov x0, -1085102592571150096 > +** eor x0, x0, -2305843009213693951 > +** ret > +*/ > + > +long f2 (void) > +{ > + return 0x10f0f0f0f0f0f0f1; > +} > + > +/* > +** f3: > +** mov x0, -3689348814741910324 > +** eor x0, x0, -4611686018427387903 > +** ret > +*/ > + > +long f3 (void) > +{ > + return 0xccccccccccccccd; > +} > + > +/* > +** f4: > +** mov x0, -7378697629483820647 > +** eor x0, x0, -9223372036854775807 > +** ret > +*/ > + > +long f4 (void) > +{ > + return 0x1999999999999998; > +} > + > +/* > +** f5: > +** mov x0, 3689348814741910323 > +** eor x0, x0, 864691128656461824 > +** ret > +*/ > + > +long f5 (void) > +{ > + return 0x3f3333333f333333; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c > index 0f931580817d78dc1cc58f03b251bd21bec71f59..63df7395edf9491720e3601848e15aa773c51e6d 100644 > --- a/gcc/testsuite/gcc.target/aarch64/pr106583.c > +++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c > @@ -1,41 +1,94 @@ > -/* { dg-do assemble } */ > -/* { dg-options "-O2 --save-temps" } */ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** f1: > +** mov x0, -72340172838076674 > +** movk x0, 0xfeff, lsl 0 > +** movk x0, 0x75fe, lsl 48 > +** ret > +*/ > > long f1 (void) > { > - return 0x7efefefefefefeff; > + return 0x75fefefefefefeff; > } > > +/* > +** f2: > +** mov x0, -6148914691236517206 > +** movk x0, 0x5678, lsl 32 > +** movk x0, 0x1234, lsl 48 > +** ret > +*/ > + > long f2 (void) > { > return 0x12345678aaaaaaaa; > } > > +/* > +** f3: > +** mov x0, -3689348814741910324 > +** movk x0, 0x5678, lsl 0 > +** movk x0, 0x1234, lsl 48 > +** ret > +*/ > + > long f3 (void) > { > return 0x1234cccccccc5678; > } > > +/* > +** f4: > +** mov x0, 8608480567731124087 > +** movk x0, 0x5678, lsl 16 > +** movk x0, 0x1234, lsl 32 > +** ret > +*/ > + > long f4 (void) > { > return 0x7777123456787777; > } > > +/* > +** f5: > +** mov x0, 6148914691236517205 > +** movk x0, 0x5678, lsl 0 > +** movk x0, 0x1234, lsl 16 > +** ret > +*/ > + > long f5 (void) > { > return 0x5555555512345678; > } > > +/* > +** f6: > +** mov x0, -4919131752989213765 > +** movk x0, 0x5678, lsl 16 > +** movk x0, 0x1234, lsl 48 > +** ret > +*/ > + > long f6 (void) > { > return 0x1234bbbb5678bbbb; > } > > +/* > +** f7: > +** mov x0, 4919131752989213764 > +** movk x0, 0x5678, lsl 0 > +** movk x0, 0x1234, lsl 32 > +** ret > +*/ > + > long f7 (void) > { > return 0x4444123444445678; > } > - > - > -/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */ > -/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */ > OK with that change. R.