From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-406515-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 57895 invoked by alias); 2 Sep 2015 12:35:34 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Received: (qmail 57644 invoked by uid 89); 2 Sep 2015 12:35:33 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL,BAYES_00,SPF_PASS autolearn=ham version=3.3.2
X-HELO: eu-smtp-delivery-143.mimecast.com
Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (207.82.80.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 02 Sep 2015 12:35:27 +0000
Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by eu-smtp-1.mimecast.com with ESMTP id uk-mta-20-fxAZeCbQSl-2QPeofKpR5w-1; Wed, 02 Sep 2015 13:35:22 +0100
Received: from e103246vm ([10.1.2.79]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959);	 Wed, 2 Sep 2015 13:35:22 +0100
From: "Wilco Dijkstra" <wdijkstr@arm.com>
To: "'GCC Patches'" <gcc-patches@gcc.gnu.org>
Subject: [PATCH][AArch64][2/5] Improve immediate generation
Date: Wed, 02 Sep 2015 12:35:00 -0000
Message-ID: <000a01d0e57b$d435d420$7ca17c60$@com>
MIME-Version: 1.0
X-MC-Unique: fxAZeCbQSl-2QPeofKpR5w-1
Content-Type: text/plain; charset=WINDOWS-1252
Content-Transfer-Encoding: quoted-printable
X-SW-Source: 2015-09/txt/msg00140.txt.bz2

aarch64_internal_mov_immediate uses loops iterating over all legal bitmask =
immediates to find
2-instruction immediate combinations. One loop is quadratic and despite bei=
ng extremely expensive
very rarely finds a matching immediate (43 matches in all of SPEC2006 but n=
one are emitted in final
code), so it can be removed without any effect on code quality. The other l=
oop can be replaced by a
constant-time search: rather than iterating over all legal bitmask values, =
reconstruct a potential
bitmask and query the fast aarch64_bitmask_imm.

No change in generated code, passes GCC regression tests/bootstrap.

ChangeLog:
2015-09-02  Wilco Dijkstra  <wdijkstr@arm.com>

	* gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
	Replace slow immediate matching loops with a faster algorithm.

---
 gcc/config/aarch64/aarch64.c | 96 +++++++++++-----------------------------=
----
 1 file changed, 23 insertions(+), 73 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c0280e6..d6f7cb0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bo=
ol generate,
   unsigned HOST_WIDE_INT mask;
   int i;
   bool first;
-  unsigned HOST_WIDE_INT val;
+  unsigned HOST_WIDE_INT val, val2;
   bool subtargets;
   rtx subtarget;
   int one_match, zero_match, first_not_ffff_match;
@@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, =
bool generate,
 	}
     }
=20
-  /* See if we can do it by arithmetically combining two
-     immediates.  */
-  for (i =3D 0; i < AARCH64_NUM_BITMASKS; i++)
+  if (zero_match !=3D 2 && one_match !=3D 2)
     {
-      int j;
-      mask =3D 0xffff;
+      /* Try emitting a bitmask immediate with a movk replacing 16 bits.
+	 For a 64-bit bitmask try whether changing 16 bits to all ones or
+	 zeroes creates a valid bitmask.  To check any repeated bitmask,
+	 try using 16 bits from the other 32-bit half of val.  */
=20
-      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
-	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
+      for (i =3D 0; i < 64; i +=3D 16, mask <<=3D 16)
 	{
-	  if (generate)
-	    {
-	      subtarget =3D subtargets ? gen_reg_rtx (DImode) : dest;
-	      emit_insn (gen_rtx_SET (subtarget,
-				      GEN_INT (aarch64_bitmasks[i])));
-	      emit_insn (gen_adddi3 (dest, subtarget,
-				     GEN_INT (val - aarch64_bitmasks[i])));
-	    }
-	  num_insns +=3D 2;
-	  return num_insns;
+	  val2 =3D val & ~mask;
+	  if (val2 !=3D val && aarch64_bitmask_imm (val2, mode))
+	    break;
+	  val2 =3D val | mask;
+	  if (val2 !=3D val && aarch64_bitmask_imm (val2, mode))
+	    break;
+	  val2 =3D val2 & ~mask;
+	  val2 =3D val2 | (((val2 >> 32) | (val2 << 32)) & mask);
+	  if (val2 !=3D val && aarch64_bitmask_imm (val2, mode))
+	    break;
 	}
-
-      for (j =3D 0; j < 64; j +=3D 16, mask <<=3D 16)
+      if (i !=3D 64)
 	{
-	  if ((aarch64_bitmasks[i] & ~mask) =3D=3D (val & ~mask))
+	  if (generate)
 	    {
-	      if (generate)
-		{
-		  emit_insn (gen_rtx_SET (dest,
-					  GEN_INT (aarch64_bitmasks[i])));
-		  emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-					     GEN_INT ((val >> j) & 0xffff)));
-		}
-	      num_insns +=3D 2;
-	      return num_insns;
+	      emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+			 GEN_INT ((val >> i) & 0xffff)));
 	    }
-	}
-    }
-
-  /* See if we can do it by logically combining two immediates.  */
-  for (i =3D 0; i < AARCH64_NUM_BITMASKS; i++)
-    {
-      if ((aarch64_bitmasks[i] & val) =3D=3D aarch64_bitmasks[i])
-	{
-	  int j;
-
-	  for (j =3D i + 1; j < AARCH64_NUM_BITMASKS; j++)
-	    if (val =3D=3D (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
-	      {
-		if (generate)
-		  {
-		    subtarget =3D subtargets ? gen_reg_rtx (mode) : dest;
-		    emit_insn (gen_rtx_SET (subtarget,
-					    GEN_INT (aarch64_bitmasks[i])));
-		    emit_insn (gen_iordi3 (dest, subtarget,
-					   GEN_INT (aarch64_bitmasks[j])));
-		  }
-		num_insns +=3D 2;
-		return num_insns;
-	      }
-	}
-      else if ((val & aarch64_bitmasks[i]) =3D=3D val)
-	{
-	  int j;
-
-	  for (j =3D i + 1; j < AARCH64_NUM_BITMASKS; j++)
-	    if (val =3D=3D (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
-	      {
-		if (generate)
-		  {
-		    subtarget =3D subtargets ? gen_reg_rtx (mode) : dest;
-		    emit_insn (gen_rtx_SET (subtarget,
-					    GEN_INT (aarch64_bitmasks[j])));
-		    emit_insn (gen_anddi3 (dest, subtarget,
-					   GEN_INT (aarch64_bitmasks[i])));
-		  }
-		num_insns +=3D 2;
-		return num_insns;
-	      }
+	  return 2;
 	}
     }
=20
--=20
1.8.3