public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
@ 2005-05-12 16:17 H. J. Lu
  2005-05-12 17:14 ` H. J. Lu
  0 siblings, 1 reply; 12+ messages in thread
From: H. J. Lu @ 2005-05-12 16:17 UTC (permalink / raw)
  To: binutils

This patch will try to turn br into brl when it is possible.


H.J.
-----
2005-05-12  H.J. Lu  <hongjiu.lu@intel.com>

	PR 834
	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
	(elfNN_ia64_relax_brl): Use it.

--- bfd/elfxx-ia64.c.brl	2005-05-07 06:58:12.000000000 -0700
+++ bfd/elfxx-ia64.c	2005-05-12 09:03:18.000000000 -0700
@@ -681,6 +681,113 @@ bfd_elfNN_ia64_after_parse (int itanium)
   oor_branch_size = itanium ? sizeof (oor_ip) : sizeof (oor_brl);
 }
 
+static bfd_boolean
+elfNN_ia64_relax_br (bfd_byte *contents, bfd_vma off)
+{
+  unsigned int template, t0, t1, t2, t3, br_code1, br_code2;
+  long br_slot;
+  bfd_byte *hit_addr;
+
+  hit_addr = (bfd_byte *) (contents + off);
+  br_slot = (long) hit_addr & 0x3;
+  hit_addr -= br_slot;
+  t0 = bfd_getl32 (hit_addr + 0);
+  t1 = bfd_getl32 (hit_addr + 4);
+  t2 = bfd_getl32 (hit_addr + 8);
+  t3 = bfd_getl32 (hit_addr + 12);
+
+  /* Check if we can turn br into brl.  A label is always at the start
+     of the bundle.  Even if there are predicates on NOPs, we still
+     perform this optimization.  */
+  template = t0 & 0x1e;
+  switch (br_slot)
+    {
+    case 0:
+      /* Check if slot 1 and slot 2 are NOPs. Possible template is
+         BBB.  We only need to check nop.b.  */
+      if (!((t2 & 0x787e00) == 0x100000
+	    && (t3 & 0xf0fc0000) == 0x20000000))
+	return FALSE;
+      br_code1 = (t0 & 0xffffffe0) >> 5 | (t1 & 0x1f) << 27;
+      br_code2 = (t1 & 0x3fe0) >> 5;
+      break;
+    case 1:
+      /* Check if slot 0 and slot 2 are NOPs. Possible templates are
+         MBB and BBB.  */
+      if (!((template == 0x12				/* MBB */
+	     && (t0 & 0x80000000) == 0x0
+	     && (t1 & 0x37ff) == 0x1
+	     && (t3 & 0xf0fc0000) == 0x20000000)
+	    || (template == 0x16			/*  BBB */
+		&& (t1 & 0x3c3f) == 0x800
+		&& (t3 & 0xf0fc0000) == 0x20000000)))
+	return FALSE;
+      br_code1 = (t1 & 0xffffc000) >> 14 | (t2 & 0x3fff) << 18;
+      br_code2 = (t2 & 0x7fc000) >> 14;
+      break;
+    case 2:
+      /* Check if slot 0 and slot 1 are NOPs. Possible templates are
+	 MIB, MBB, BBB, MMB and MFB.  */
+      if (!((template == 0x10				/* MIB */
+	     && (t0 & 0x80000000) == 0x0
+	     && (t1 & 0x37ff) == 0x1
+	     && (t2 & 0x7bff00) == 0x200)
+	    || (template == 0x12			/* MBB */
+		&& (t0 & 0x80000000) == 0x0
+		&& (t1 & 0x37ff) == 0x1
+		&& (t2 & 0x787e00) == 0x100000)
+	    || (template == 0x16			/* BBB */
+		&& (t1 & 0x3c3f) == 0x800
+		&& (t2 & 0x787e00) == 0x100000)
+	    || (template == 0x18			/* MMB */
+		&& (t0 & 0x80000000) == 0x0
+		&& (t1 & 0x37ff) == 0x1
+		&& (t2 & 0x7bff00) == 0x200)
+	    || (template == 0x1c			/* MFB */
+		&& (t0 & 0x80000000) == 0x0
+		&& (t1 & 0x37ff) == 0x1
+		&& (t2 & 0x78ff00) == 0x200)))
+	return FALSE;
+      br_code1 = (t2 & 0xff800000) >> 23 | (t3 & 0x7fffff) << 9;
+      br_code2 = (t3 & 0xff800000) >> 23;
+      break;
+    default:
+      /* It should never happen.  */
+      abort ();
+    }
+  
+  /* Check if we can turn br into brl.  */
+  if (!(((br_code2 & 0x1e0) == 0xa0)
+	|| ((br_code1 & 0x1c0) == 0 && (br_code2 & 0x1e0) == 0x80)))
+    return FALSE;
+
+  /* Turn br into brl by setting bit 40.  */
+  br_code2 |= 0x100;
+
+  /* Turn the old bundle into a MLX bundle with the same stop-bit
+     variety.  */
+  if (t0 & 0x1)
+    template = 0x5;
+  else
+    template = 0x4;
+
+  /* Put nop.m in slot 0 and keep the original predicate. */
+  t0 &= 0x7e0;
+  t0 |= template;
+  t1 &= ~0x3fff;
+  t1 |= 0x1;
+
+  /* Put brl in slot 1.  */
+  t2 = br_code1 << 23;
+  t3 = (br_code1 >> 9) | (br_code2 << 23);
+
+  bfd_putl32 (t0, hit_addr);
+  bfd_putl32 (t1, hit_addr + 4);
+  bfd_putl32 (t2, hit_addr + 8);
+  bfd_putl32 (t3, hit_addr + 12);
+  return TRUE;
+}
+
 static void
 elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
 {
@@ -985,6 +1092,16 @@ elfNN_ia64_relax_section (abfd, sec, lin
 	    }
 	  else if (r_type == R_IA64_PCREL60B)
 	    continue;
+	  else if (elfNN_ia64_relax_br (contents, roff))
+	    {
+	      irel->r_info
+		= ELFNN_R_INFO (ELFNN_R_SYM (irel->r_info),
+				R_IA64_PCREL60B);
+
+	      /* Make the relocation offset point to slot 1.  */
+	      irel->r_offset = (irel->r_offset & ~((bfd_vma) 0x3)) + 1;
+	      continue;
+	    }
 
 	  /* We can't put a trampoline in a .init/.fini section. Issue
 	     an error.  */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 16:17 PATCH: PR 834: IA64: Change br to brl for "far" branches when possible H. J. Lu
@ 2005-05-12 17:14 ` H. J. Lu
  2005-05-12 20:05   ` Richard Henderson
  0 siblings, 1 reply; 12+ messages in thread
From: H. J. Lu @ 2005-05-12 17:14 UTC (permalink / raw)
  To: binutils

On Thu, May 12, 2005 at 09:11:41AM -0700, H. J. Lu wrote:
> This patch will try to turn br into brl when it is possible.
> 

Slot 0 has to be NOP only for BBB. Here is an update.


H.J.
----
2005-05-12  H.J. Lu  <hongjiu.lu@intel.com>

	PR 834
	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
	(elfNN_ia64_relax_brl): Use it.

--- bfd/elfxx-ia64.c.brl	2005-05-07 06:58:12.000000000 -0700
+++ bfd/elfxx-ia64.c	2005-05-12 09:34:52.000000000 -0700
@@ -681,6 +681,114 @@ bfd_elfNN_ia64_after_parse (int itanium)
   oor_branch_size = itanium ? sizeof (oor_ip) : sizeof (oor_brl);
 }
 
+static bfd_boolean
+elfNN_ia64_relax_br (bfd_byte *contents, bfd_vma off)
+{
+  unsigned int template, t0, t1, t2, t3, br_code1, br_code2, mlx;
+  long br_slot;
+  bfd_byte *hit_addr;
+
+  hit_addr = (bfd_byte *) (contents + off);
+  br_slot = (long) hit_addr & 0x3;
+  hit_addr -= br_slot;
+  t0 = bfd_getl32 (hit_addr + 0);
+  t1 = bfd_getl32 (hit_addr + 4);
+  t2 = bfd_getl32 (hit_addr + 8);
+  t3 = bfd_getl32 (hit_addr + 12);
+
+  /* Check if we can turn br into brl.  A label is always at the start
+     of the bundle.  Even if there are predicates on NOPs, we still
+     perform this optimization.  */
+  template = t0 & 0x1e;
+  switch (br_slot)
+    {
+    case 0:
+      /* Check if slot 1 and slot 2 are NOPs. Possible template is
+         BBB.  We only need to check nop.b.  */
+      if (!((t2 & 0x787e00) == 0x100000
+	    && (t3 & 0xf0fc0000) == 0x20000000))
+	return FALSE;
+      br_code1 = (t0 & 0xffffffe0) >> 5 | (t1 & 0x1f) << 27;
+      br_code2 = (t1 & 0x3fe0) >> 5;
+      break;
+    case 1:
+      /* Check if slot 2 is NOP. Possible templates are MBB and BBB.
+	 For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x12				/* MBB */
+	     && (t3 & 0xf0fc0000) == 0x20000000)
+	    || (template == 0x16			/* BBB */
+		&& (t1 & 0x3c3f) == 0x800
+		&& (t3 & 0xf0fc0000) == 0x20000000)))
+	return FALSE;
+      br_code1 = (t1 & 0xffffc000) >> 14 | (t2 & 0x3fff) << 18;
+      br_code2 = (t2 & 0x7fc000) >> 14;
+      break;
+    case 2:
+      /* Check if slot 1 is NOP. Possible templates are MIB, MBB, BBB,
+	 MMB and MFB. For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x10				/* MIB */
+	     && (t2 & 0x7bff00) == 0x200)
+	    || (template == 0x12			/* MBB */
+		&& (t2 & 0x787e00) == 0x100000)
+	    || (template == 0x16			/* BBB */
+		&& (t1 & 0x3c3f) == 0x800
+		&& (t2 & 0x787e00) == 0x100000)
+	    || (template == 0x18			/* MMB */
+		&& (t2 & 0x7bff00) == 0x200)
+	    || (template == 0x1c			/* MFB */
+		&& (t2 & 0x78ff00) == 0x200)))
+	return FALSE;
+      br_code1 = (t2 & 0xff800000) >> 23 | (t3 & 0x7fffff) << 9;
+      br_code2 = (t3 & 0xff800000) >> 23;
+      break;
+    default:
+      /* It should never happen.  */
+      abort ();
+    }
+  
+  /* Check if we can turn br into brl.  */
+  if (!(((br_code2 & 0x1e0) == 0xa0)
+	|| ((br_code1 & 0x1c0) == 0 && (br_code2 & 0x1e0) == 0x80)))
+    return FALSE;
+
+  /* Turn br into brl by setting bit 40.  */
+  br_code2 |= 0x100;
+
+  /* Turn the old bundle into a MLX bundle with the same stop-bit
+     variety.  */
+  if (t0 & 0x1)
+    mlx = 0x5;
+  else
+    mlx = 0x4;
+
+  if (template == 0x16)
+    {
+      /* For BBB, we need to put nop.m in slot 0 and keep the original
+	 predicate.  */
+      t0 &= 0x7e0;
+      t1 &= ~0x3fff;
+      t1 |= 0x1;
+    }
+  else
+    {
+      /* Keep the original instruction in slot 0.  */
+      t0 &= 0xffffffe0;
+      t1 &= 0x3fff;
+    }
+
+  t0 |= mlx;
+
+  /* Put brl in slot 1.  */
+  t2 = br_code1 << 23;
+  t3 = (br_code1 >> 9) | (br_code2 << 23);
+
+  bfd_putl32 (t0, hit_addr);
+  bfd_putl32 (t1, hit_addr + 4);
+  bfd_putl32 (t2, hit_addr + 8);
+  bfd_putl32 (t3, hit_addr + 12);
+  return TRUE;
+}
+
 static void
 elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
 {
@@ -985,6 +1093,16 @@ elfNN_ia64_relax_section (abfd, sec, lin
 	    }
 	  else if (r_type == R_IA64_PCREL60B)
 	    continue;
+	  else if (elfNN_ia64_relax_br (contents, roff))
+	    {
+	      irel->r_info
+		= ELFNN_R_INFO (ELFNN_R_SYM (irel->r_info),
+				R_IA64_PCREL60B);
+
+	      /* Make the relocation offset point to slot 1.  */
+	      irel->r_offset = (irel->r_offset & ~((bfd_vma) 0x3)) + 1;
+	      continue;
+	    }
 
 	  /* We can't put a trampoline in a .init/.fini section. Issue
 	     an error.  */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 17:14 ` H. J. Lu
@ 2005-05-12 20:05   ` Richard Henderson
  2005-05-12 20:55     ` H. J. Lu
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2005-05-12 20:05 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

On Thu, May 12, 2005 at 09:37:32AM -0700, H. J. Lu wrote:
> 	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
> 	(elfNN_ia64_relax_brl): Use it.

I really wish you'd extract the three insns independently, rather
than extracting 3 words and using quite so many arbitrary masks.
It's not like we don't have a 64-bit type available.


r~

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 20:05   ` Richard Henderson
@ 2005-05-12 20:55     ` H. J. Lu
  2005-05-12 22:36       ` Richard Henderson
  0 siblings, 1 reply; 12+ messages in thread
From: H. J. Lu @ 2005-05-12 20:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: binutils

On Thu, May 12, 2005 at 12:56:02PM -0700, Richard Henderson wrote:
> On Thu, May 12, 2005 at 09:37:32AM -0700, H. J. Lu wrote:
> > 	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
> > 	(elfNN_ia64_relax_brl): Use it.
> 
> I really wish you'd extract the three insns independently, rather
> than extracting 3 words and using quite so many arbitrary masks.
> It's not like we don't have a 64-bit type available.
> 

I was thinking to use long long since long may be 32bit. But I am not
sure if all compilers support it. I guess I can use

#if BFD_HOST_LONG_LONG
long long ..
#else
  return FALSE;
#endif


H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 20:55     ` H. J. Lu
@ 2005-05-12 22:36       ` Richard Henderson
  2005-05-12 23:08         ` James E Wilson
                           ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Richard Henderson @ 2005-05-12 22:36 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

On Thu, May 12, 2005 at 01:05:22PM -0700, H. J. Lu wrote:
> > I really wish you'd extract the three insns independently, rather
> > than extracting 3 words and using quite so many arbitrary masks.
> > It's not like we don't have a 64-bit type available.
> > 
> 
> I was thinking to use long long since long may be 32bit. But I am not
> sure if all compilers support it. I guess I can use
> 
> #if BFD_HOST_LONG_LONG
> long long ..
> #else
>   return FALSE;
> #endif

We already assume a 64-bit type, for instance in elfNN_ia64_install_value.
I see that elf32-ia64.lo is in BFD32_BACKENDS; that's probably a mistake.

I see no reason to complicate things by doing anything other than assuming
a 64-bit type is available when compiling for ia64.  Every reasonable
compiler does support such a type.



r~

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 22:36       ` Richard Henderson
@ 2005-05-12 23:08         ` James E Wilson
  2005-05-12 23:31           ` Richard Henderson
  2005-05-13  1:11         ` H. J. Lu
  2005-05-13  1:19         ` PATCH: Undo the elfNN_ia64_relax_brl change H. J. Lu
  2 siblings, 1 reply; 12+ messages in thread
From: James E Wilson @ 2005-05-12 23:08 UTC (permalink / raw)
  To: Richard Henderson; +Cc: H. J. Lu, binutils

On Thu, 2005-05-12 at 14:36, Richard Henderson wrote:
> We already assume a 64-bit type, for instance in elfNN_ia64_install_value.
> I see that elf32-ia64.lo is in BFD32_BACKENDS; that's probably a mistake.

I fixed that mistake about 6 weeks ago.

> I see no reason to complicate things by doing anything other than assuming
> a 64-bit type is available when compiling for ia64.  Every reasonable
> compiler does support such a type.

HJ has mentioned that he would like to split elfxx-ia64.c into separate
elf32-ia64.c and elf64-ia64.c files.  If this is done, then the
elf32-ia64.c file probably should not require long long support. 
Otherwise, I agree with you.  All IA-64 machines are 64-bit machines,
and there should never be any need to avoid use of 64-bit types in the
current elfxx-ia64.c file, or the proposed elf64-ia64.c.

There is some 32-bit code in elfxx-ia64.c, but that was a mistake, and
was done before elf32-ia64.c was removed from BFD32_BACKENDS.
-- 
Jim Wilson, GNU Tools Support, http://www.SpecifixInc.com


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 23:08         ` James E Wilson
@ 2005-05-12 23:31           ` Richard Henderson
  2005-05-12 23:40             ` James E Wilson
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2005-05-12 23:31 UTC (permalink / raw)
  To: James E Wilson; +Cc: H. J. Lu, binutils

On Thu, May 12, 2005 at 03:35:58PM -0700, James E Wilson wrote:
> I fixed that mistake about 6 weeks ago.

Oops.  Clearly the copy of the tree I looked at is out of date.

> HJ has mentioned that he would like to split elfxx-ia64.c into separate
> elf32-ia64.c and elf64-ia64.c files.

To what purpose?


r~

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 23:31           ` Richard Henderson
@ 2005-05-12 23:40             ` James E Wilson
  2005-05-13  0:58               ` H. J. Lu
  0 siblings, 1 reply; 12+ messages in thread
From: James E Wilson @ 2005-05-12 23:40 UTC (permalink / raw)
  To: Richard Henderson; +Cc: H. J. Lu, binutils

On Thu, 2005-05-12 at 16:08, Richard Henderson wrote:
> On Thu, May 12, 2005 at 03:35:58PM -0700, James E Wilson wrote:
> > HJ has mentioned that he would like to split elfxx-ia64.c into separate
> > elf32-ia64.c and elf64-ia64.c files.
> To what purpose?

I don't know the details.  I wasn't going to worry about it until I saw
a patch.  I assume it is part of his work to try to get the ILP32 linker
working.  Perhaps HJ can comment on this.
-- 
Jim Wilson, GNU Tools Support, http://www.SpecifixInc.com


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 23:40             ` James E Wilson
@ 2005-05-13  0:58               ` H. J. Lu
  0 siblings, 0 replies; 12+ messages in thread
From: H. J. Lu @ 2005-05-13  0:58 UTC (permalink / raw)
  To: James E Wilson; +Cc: Richard Henderson, binutils

On Thu, May 12, 2005 at 04:31:24PM -0700, James E Wilson wrote:
> On Thu, 2005-05-12 at 16:08, Richard Henderson wrote:
> > On Thu, May 12, 2005 at 03:35:58PM -0700, James E Wilson wrote:
> > > HJ has mentioned that he would like to split elfxx-ia64.c into separate
> > > elf32-ia64.c and elf64-ia64.c files.
> > To what purpose?
> 
> I don't know the details.  I wasn't going to worry about it until I saw
> a patch.  I assume it is part of his work to try to get the ILP32 linker
> working.  Perhaps HJ can comment on this.

The code is duplicated when both ELF32 and ELF64 are configured.
Also it isn't easy to debug it. You need to handle both elfxx-ia64.c
and elf64-ia64.c. But it seems to too much effort to add elf32-ia64
add elf64-ia64.c. I gave it up.


H.J.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-12 22:36       ` Richard Henderson
  2005-05-12 23:08         ` James E Wilson
@ 2005-05-13  1:11         ` H. J. Lu
  2005-05-13  7:40           ` H. J. Lu
  2005-05-13  1:19         ` PATCH: Undo the elfNN_ia64_relax_brl change H. J. Lu
  2 siblings, 1 reply; 12+ messages in thread
From: H. J. Lu @ 2005-05-13  1:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: binutils

On Thu, May 12, 2005 at 02:36:33PM -0700, Richard Henderson wrote:
> On Thu, May 12, 2005 at 01:05:22PM -0700, H. J. Lu wrote:
> > > I really wish you'd extract the three insns independently, rather
> > > than extracting 3 words and using quite so many arbitrary masks.
> > > It's not like we don't have a 64-bit type available.
> > > 
> > 
> > I was thinking to use long long since long may be 32bit. But I am not
> > sure if all compilers support it. I guess I can use
> > 
> > #if BFD_HOST_LONG_LONG
> > long long ..
> > #else
> >   return FALSE;
> > #endif
> 
> We already assume a 64-bit type, for instance in elfNN_ia64_install_value.
> I see that elf32-ia64.lo is in BFD32_BACKENDS; that's probably a mistake.
> 
> I see no reason to complicate things by doing anything other than assuming
> a 64-bit type is available when compiling for ia64.  Every reasonable
> compiler does support such a type.
> 
> 

Here is the updated patch.


H.J.
----
2005-05-12  H.J. Lu  <hongjiu.lu@intel.com>

	PR 834
	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
	(elfNN_ia64_relax_brl): Use it.

--- bfd/elfxx-ia64.c.brl	2005-05-07 06:58:12.000000000 -0700
+++ bfd/elfxx-ia64.c	2005-05-12 17:50:36.000000000 -0700
@@ -681,6 +681,108 @@ bfd_elfNN_ia64_after_parse (int itanium)
   oor_branch_size = itanium ? sizeof (oor_ip) : sizeof (oor_brl);
 }
 
+static bfd_boolean
+elfNN_ia64_relax_br (bfd_byte *contents, bfd_vma off)
+{
+  unsigned int template, mlx;
+  bfd_vma t0, t1, s0, s1, s2, br_code;
+  long br_slot;
+  bfd_byte *hit_addr;
+
+  hit_addr = (bfd_byte *) (contents + off);
+  br_slot = (long) hit_addr & 0x3;
+  hit_addr -= br_slot;
+  t0 = bfd_getl64 (hit_addr + 0);
+  t1 = bfd_getl64 (hit_addr + 8);
+
+  /* Check if we can turn br into brl.  A label is always at the start
+     of the bundle.  Even if there are predicates on NOPs, we still
+     perform this optimization.  */
+  template = t0 & 0x1e;
+  s0 = t0 >> 5;
+  s1 = (t0 >> 46 | t1 << 18) & 0x1ffffffffffLL;
+  s2 = t1 >> 23;
+  switch (br_slot)
+    {
+    case 0:
+      /* Check if slot 1 and slot 2 are NOPs. Possible template is
+         BBB.  We only need to check nop.b.  */
+      if (!((s1 & 0x1e1f8000000LL) == 0x4000000000LL
+	    && (s2 & 0x1e1f8000000LL) == 0x4000000000LL))
+	return FALSE;
+      br_code = s0;
+      break;
+    case 1:
+      /* Check if slot 2 is NOP. Possible templates are MBB and BBB.
+	 For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x12				/* MBB */
+	     && (s2 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x16			/* BBB */
+		&& (s0 & 0x1e1f8000000LL) == 0x4000000000LL
+		&& (s2 & 0x1e1f8000000LL) == 0x4000000000LL)))
+	return FALSE;
+      br_code = s1;
+      break;
+    case 2:
+      /* Check if slot 1 is NOP. Possible templates are MIB, MBB, BBB,
+	 MMB and MFB. For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x10				/* MIB */
+	     && (s1 & 0x1effc000000LL) == 0x8000000LL)
+	    || (template == 0x12			/* MBB */
+		&& (s1 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x16			/* BBB */
+		&& (s0 & 0x1e1f8000000LL) == 0x4000000000LL
+		&& (s1 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x18			/* MMB */
+		&& (s1 & 0x1effc000000LL) == 0x8000000LL)
+	    || (template == 0x1c			/* MFB */
+		&& (s1 & 0x1e3fc000000LL) == 0x8000000LL)))
+	return FALSE;
+      br_code = s2;
+      break;
+    default:
+      /* It should never happen.  */
+      abort ();
+    }
+  
+  /* We can turn br.cond/br.call into brl.cond/brl.call.  */
+  if (!(((br_code & 0x1e0000001c0LL) == 0x8000000000LL)
+	|| (br_code & 0x1e000000000LL) == 0xa000000000LL))
+    return FALSE;
+
+  /* Turn br into brl by setting bit 40.  */
+  br_code |= 0x10000000000LL;
+
+  /* Turn the old bundle into a MLX bundle with the same stop-bit
+     variety.  */
+  if (t0 & 0x1)
+    mlx = 0x5;
+  else
+    mlx = 0x4;
+
+  if (template == 0x16)
+    {
+      /* For BBB, we need to put nop.m in slot 0 and keep the original
+	 predicate.  */
+      t0 &= 0x7e0;
+      t0 |= 0x100000000LL;
+    }
+  else
+    {
+      /* Keep the original instruction in slot 0.  */
+      t0 &= 0x3fffffffffe0LL;
+    }
+
+  t0 |= mlx;
+
+  /* Put brl in slot 1.  */
+  t1 = br_code << 23;
+
+  bfd_putl64 (t0, hit_addr);
+  bfd_putl64 (t1, hit_addr + 8);
+  return TRUE;
+}
+
 static void
 elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
 {
@@ -985,6 +1087,16 @@ elfNN_ia64_relax_section (abfd, sec, lin
 	    }
 	  else if (r_type == R_IA64_PCREL60B)
 	    continue;
+	  else if (elfNN_ia64_relax_br (contents, roff))
+	    {
+	      irel->r_info
+		= ELFNN_R_INFO (ELFNN_R_SYM (irel->r_info),
+				R_IA64_PCREL60B);
+
+	      /* Make the relocation offset point to slot 1.  */
+	      irel->r_offset = (irel->r_offset & ~((bfd_vma) 0x3)) + 1;
+	      continue;
+	    }
 
 	  /* We can't put a trampoline in a .init/.fini section. Issue
 	     an error.  */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* PATCH: Undo the elfNN_ia64_relax_brl change
  2005-05-12 22:36       ` Richard Henderson
  2005-05-12 23:08         ` James E Wilson
  2005-05-13  1:11         ` H. J. Lu
@ 2005-05-13  1:19         ` H. J. Lu
  2 siblings, 0 replies; 12+ messages in thread
From: H. J. Lu @ 2005-05-13  1:19 UTC (permalink / raw)
  To: Richard Henderson; +Cc: binutils

On Thu, May 12, 2005 at 02:36:33PM -0700, Richard Henderson wrote:
> On Thu, May 12, 2005 at 01:05:22PM -0700, H. J. Lu wrote:
> > > I really wish you'd extract the three insns independently, rather
> > > than extracting 3 words and using quite so many arbitrary masks.
> > > It's not like we don't have a 64-bit type available.
> > > 
> > 
> > I was thinking to use long long since long may be 32bit. But I am not
> > sure if all compilers support it. I guess I can use
> > 
> > #if BFD_HOST_LONG_LONG
> > long long ..
> > #else
> >   return FALSE;
> > #endif
> 
> We already assume a 64-bit type, for instance in elfNN_ia64_install_value.
> I see that elf32-ia64.lo is in BFD32_BACKENDS; that's probably a mistake.
> 
> I see no reason to complicate things by doing anything other than assuming
> a 64-bit type is available when compiling for ia64.  Every reasonable
> compiler does support such a type.
> 
> 
> 

Given this, should we revert this change

http://sourceware.org/ml/binutils/2005-02/msg00348.html


H.J.
----
2005-05-12  H.J. Lu  <hongjiu.lu@intel.com>

	* elfxx-ia64.c (elfNN_ia64_relax_brl): Undo the change made on
	2005-02-16.

--- bfd/elfxx-ia64.c.ll	2005-05-12 17:50:36.000000000 -0700
+++ bfd/elfxx-ia64.c	2005-05-12 18:01:54.000000000 -0700
@@ -786,39 +786,33 @@ elfNN_ia64_relax_br (bfd_byte *contents,
 static void
 elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
 {
-  unsigned int template, t0, t1, t2, t3;
+  int template;
   bfd_byte *hit_addr;
+  bfd_vma t0, t1, i0, i1, i2;
 
   hit_addr = (bfd_byte *) (contents + off);
   hit_addr -= (long) hit_addr & 0x3;
-  t0 = bfd_getl32 (hit_addr + 0);
-  t1 = bfd_getl32 (hit_addr + 4);
-  t2 = bfd_getl32 (hit_addr + 8);
-  t3 = bfd_getl32 (hit_addr + 12);
-
-  /* Turn a MLX bundle into a MBB bundle with the same stop-bit
-     variety.  */
-  template = 0x12;
-  if ((t0 & 0x1f) == 5)
-    template += 1;
+  t0 = bfd_getl64 (hit_addr);
+  t1 = bfd_getl64 (hit_addr + 8);
 
   /* Keep the instruction in slot 0. */
-  t0 &= 0xffffffe0;
-  t1 &= 0x3fff;
-
-  t0 |= template;
-
+  i0 = (t0 >> 5) & 0x1ffffffffffLL;
+  /* Use nop.b for slot 1. */
+  i1 = 0x4000000000LL;
   /* For slot 2, turn brl into br by masking out bit 40.  */
-  t2 &= 0xff800000;
-  t3 &= 0x7fffffff;
+  i2 = (t1 >> 23) & 0x0ffffffffffLL;
 
-  /* Use nop.b for slot 1. */
-  t2 |= 0x100000;
+  /* Turn a MLX bundle into a MBB bundle with the same stop-bit
+     variety.  */
+  if (t0 & 0x1)
+    template = 0x13;
+  else
+    template = 0x12;
+  t0 = (i1 << 46) | (i0 << 5) | template;
+  t1 = (i2 << 23) | (i1 >> 18);
 
-  bfd_putl32 (t0, hit_addr);
-  bfd_putl32 (t1, hit_addr + 4);
-  bfd_putl32 (t2, hit_addr + 8);
-  bfd_putl32 (t3, hit_addr + 12);
+  bfd_putl64 (t0, hit_addr);
+  bfd_putl64 (t1, hit_addr + 8);
 }
 \f
 /* These functions do relaxation for IA-64 ELF.  */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: PATCH: PR 834: IA64: Change br to brl for "far" branches when possible
  2005-05-13  1:11         ` H. J. Lu
@ 2005-05-13  7:40           ` H. J. Lu
  0 siblings, 0 replies; 12+ messages in thread
From: H. J. Lu @ 2005-05-13  7:40 UTC (permalink / raw)
  To: Richard Henderson; +Cc: binutils

On Thu, May 12, 2005 at 05:58:18PM -0700, H. J. Lu wrote:
> On Thu, May 12, 2005 at 02:36:33PM -0700, Richard Henderson wrote:
> > On Thu, May 12, 2005 at 01:05:22PM -0700, H. J. Lu wrote:
> > > > I really wish you'd extract the three insns independently, rather
> > > > than extracting 3 words and using quite so many arbitrary masks.
> > > > It's not like we don't have a 64-bit type available.
> > > > 
> > > 
> > > I was thinking to use long long since long may be 32bit. But I am not
> > > sure if all compilers support it. I guess I can use
> > > 
> > > #if BFD_HOST_LONG_LONG
> > > long long ..
> > > #else
> > >   return FALSE;
> > > #endif
> > 
> > We already assume a 64-bit type, for instance in elfNN_ia64_install_value.
> > I see that elf32-ia64.lo is in BFD32_BACKENDS; that's probably a mistake.
> > 
> > I see no reason to complicate things by doing anything other than assuming
> > a 64-bit type is available when compiling for ia64.  Every reasonable
> > compiler does support such a type.
> > 
> > 
> 
> Here is the updated patch.
> 
> 

Added 40bit masks for instructions.


H.J.
---
2005-05-12  H.J. Lu  <hongjiu.lu@intel.com>

	PR 834
	* elfxx-ia64.c (elfNN_ia64_relax_br): New.
	(elfNN_ia64_relax_brl): Use it.

--- bfd/elfxx-ia64.c.brl	2005-05-07 06:58:12.000000000 -0700
+++ bfd/elfxx-ia64.c	2005-05-12 17:50:36.000000000 -0700
@@ -681,6 +681,108 @@ bfd_elfNN_ia64_after_parse (int itanium)
   oor_branch_size = itanium ? sizeof (oor_ip) : sizeof (oor_brl);
 }
 
+static bfd_boolean
+elfNN_ia64_relax_br (bfd_byte *contents, bfd_vma off)
+{
+  unsigned int template, mlx;
+  bfd_vma t0, t1, s0, s1, s2, br_code;
+  long br_slot;
+  bfd_byte *hit_addr;
+
+  hit_addr = (bfd_byte *) (contents + off);
+  br_slot = (long) hit_addr & 0x3;
+  hit_addr -= br_slot;
+  t0 = bfd_getl64 (hit_addr + 0);
+  t1 = bfd_getl64 (hit_addr + 8);
+
+  /* Check if we can turn br into brl.  A label is always at the start
+     of the bundle.  Even if there are predicates on NOPs, we still
+     perform this optimization.  */
+  template = t0 & 0x1e;
+  s0 = (t0 >> 5) & 0x1ffffffffffLL;
+  s1 = ((t0 >> 46) | (t1 << 18)) & 0x1ffffffffffLL;
+  s2 = (t1 >> 23) & 0x1ffffffffffLL;
+  switch (br_slot)
+    {
+    case 0:
+      /* Check if slot 1 and slot 2 are NOPs. Possible template is
+         BBB.  We only need to check nop.b.  */
+      if (!((s1 & 0x1e1f8000000LL) == 0x4000000000LL
+	    && (s2 & 0x1e1f8000000LL) == 0x4000000000LL))
+	return FALSE;
+      br_code = s0;
+      break;
+    case 1:
+      /* Check if slot 2 is NOP. Possible templates are MBB and BBB.
+	 For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x12				/* MBB */
+	     && (s2 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x16			/* BBB */
+		&& (s0 & 0x1e1f8000000LL) == 0x4000000000LL
+		&& (s2 & 0x1e1f8000000LL) == 0x4000000000LL)))
+	return FALSE;
+      br_code = s1;
+      break;
+    case 2:
+      /* Check if slot 1 is NOP. Possible templates are MIB, MBB, BBB,
+	 MMB and MFB. For BBB, slot 0 also has to be nop.b.  */
+      if (!((template == 0x10				/* MIB */
+	     && (s1 & 0x1effc000000LL) == 0x8000000LL)
+	    || (template == 0x12			/* MBB */
+		&& (s1 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x16			/* BBB */
+		&& (s0 & 0x1e1f8000000LL) == 0x4000000000LL
+		&& (s1 & 0x1e1f8000000LL) == 0x4000000000LL)
+	    || (template == 0x18			/* MMB */
+		&& (s1 & 0x1effc000000LL) == 0x8000000LL)
+	    || (template == 0x1c			/* MFB */
+		&& (s1 & 0x1e3fc000000LL) == 0x8000000LL)))
+	return FALSE;
+      br_code = s2;
+      break;
+    default:
+      /* It should never happen.  */
+      abort ();
+    }
+  
+  /* We can turn br.cond/br.call into brl.cond/brl.call.  */
+  if (!(((br_code & 0x1e0000001c0LL) == 0x8000000000LL)
+	|| (br_code & 0x1e000000000LL) == 0xa000000000LL))
+    return FALSE;
+
+  /* Turn br into brl by setting bit 40.  */
+  br_code |= 0x10000000000LL;
+
+  /* Turn the old bundle into a MLX bundle with the same stop-bit
+     variety.  */
+  if (t0 & 0x1)
+    mlx = 0x5;
+  else
+    mlx = 0x4;
+
+  if (template == 0x16)
+    {
+      /* For BBB, we need to put nop.m in slot 0 and keep the original
+	 predicate.  */
+      t0 &= 0x7e0LL;
+      t0 |= 0x100000000LL;
+    }
+  else
+    {
+      /* Keep the original instruction in slot 0.  */
+      t0 &= 0x3fffffffffe0LL;
+    }
+
+  t0 |= mlx;
+
+  /* Put brl in slot 1.  */
+  t1 = br_code << 23;
+
+  bfd_putl64 (t0, hit_addr);
+  bfd_putl64 (t1, hit_addr + 8);
+  return TRUE;
+}
+
 static void
 elfNN_ia64_relax_brl (bfd_byte *contents, bfd_vma off)
 {
@@ -985,6 +1087,16 @@ elfNN_ia64_relax_section (abfd, sec, lin
 	    }
 	  else if (r_type == R_IA64_PCREL60B)
 	    continue;
+	  else if (elfNN_ia64_relax_br (contents, roff))
+	    {
+	      irel->r_info
+		= ELFNN_R_INFO (ELFNN_R_SYM (irel->r_info),
+				R_IA64_PCREL60B);
+
+	      /* Make the relocation offset point to slot 1.  */
+	      irel->r_offset = (irel->r_offset & ~((bfd_vma) 0x3)) + 1;
+	      continue;
+	    }
 
 	  /* We can't put a trampoline in a .init/.fini section. Issue
 	     an error.  */

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2005-05-13  1:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-05-12 16:17 PATCH: PR 834: IA64: Change br to brl for "far" branches when possible H. J. Lu
2005-05-12 17:14 ` H. J. Lu
2005-05-12 20:05   ` Richard Henderson
2005-05-12 20:55     ` H. J. Lu
2005-05-12 22:36       ` Richard Henderson
2005-05-12 23:08         ` James E Wilson
2005-05-12 23:31           ` Richard Henderson
2005-05-12 23:40             ` James E Wilson
2005-05-13  0:58               ` H. J. Lu
2005-05-13  1:11         ` H. J. Lu
2005-05-13  7:40           ` H. J. Lu
2005-05-13  1:19         ` PATCH: Undo the elfNN_ia64_relax_brl change H. J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).