public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* Xtensa GAS port: loosen loop alignment requirements
@ 2006-03-21 21:25 Bob Wilson
  0 siblings, 0 replies; only message in thread
From: Bob Wilson @ 2006-03-21 21:25 UTC (permalink / raw)
  To: binutils

[-- Attachment #1: Type: text/plain, Size: 912 bytes --]

I've committed this patch from Sterling Augustine on the mainline.  Newer 
versions of the Xtensa processor have a more relaxed requirement for aligning 
the first instruction of a zero-overhead loop.  The patch changes GAS to take 
advantage of this when generating code for an Xtensa LX or later processor. 
Tested with an xtensa-elf target (and with several weeks of internal use at 
Tensilica).

2006-03-21  Sterling Augustine  <sterling@tensilica.com>

	* config/tc-xtensa.c (enforce_three_byte_loop_align): New flag.
	(xtensa_setup_hw_workarounds): Set this new flag for older hardware.
	(get_loop_align_size): New.
	(xtensa_end): Skip xtensa_mark_narrow_branches when not aligning.
	(xtensa_mark_zcl_first_insns): Prevent widening of first loop frag.
	(get_text_align_power): Rewrite to handle inputs in the range 2-8.
	(get_noop_aligned_address): Use get_loop_align_size.
	(get_aligned_diff): Likewise.


[-- Attachment #2: gas-loop-align.diff --]
[-- Type: text/x-patch, Size: 5181 bytes --]

Index: config/tc-xtensa.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-xtensa.c,v
retrieving revision 1.59
diff -u -p -r1.59 tc-xtensa.c
--- config/tc-xtensa.c	13 Mar 2006 21:46:53 -0000	1.59
+++ config/tc-xtensa.c	21 Mar 2006 20:19:34 -0000
@@ -566,6 +566,7 @@ static bfd_boolean workaround_short_loop
 static bfd_boolean maybe_has_short_loop = FALSE;
 static bfd_boolean workaround_close_loop_end = FALSE;
 static bfd_boolean maybe_has_close_loop_end = FALSE;
+static bfd_boolean enforce_three_byte_loop_align = FALSE;
 
 /* When workaround_short_loops is TRUE, all loops with early exits must
    have at least 3 instructions.  workaround_all_short_loops is a modifier
@@ -590,6 +591,7 @@ xtensa_setup_hw_workarounds (int earlies
       workaround_short_loop |= TRUE;
       workaround_close_loop_end |= TRUE;
       workaround_all_short_loops |= TRUE;
+      enforce_three_byte_loop_align = TRUE;
     }
 }
 
@@ -4430,6 +4432,26 @@ next_frag_format_size (const fragS *frag
 }
 
 
+/* In early Xtensa Processors, for reasons that are unclear, the ISA
+   required two-byte instructions to be treated as three-byte instructions
+   for loop instruction alignment.  This restriction was removed beginning
+   with Xtensa LX.  Now the only requirement on loop instruction alignment
+   is that the first instruction of the loop must appear at an address that
+   does not cross a fetch boundary.  */
+
+static int
+get_loop_align_size (int insn_size)
+{
+  if (insn_size == XTENSA_UNDEFINED)
+    return xtensa_fetch_width;
+
+  if (enforce_three_byte_loop_align && insn_size == 2)
+    return 3;
+
+  return insn_size;
+}
+
+
 /* If the next legit fragment is an end-of-loop marker,
    switch its state so it will instantiate a NOP.  */
 
@@ -6918,7 +6940,8 @@ xtensa_end (void)
 
   if (workaround_short_loop && maybe_has_short_loop)
     xtensa_fix_short_loop_frags ();
-  xtensa_mark_narrow_branches ();
+  if (align_targets)
+    xtensa_mark_narrow_branches ();
   xtensa_mark_zcl_first_insns ();
 
   xtensa_sanity_check ();
@@ -7113,7 +7136,24 @@ xtensa_mark_zcl_first_insns (void)
 	      /* Of course, sometimes (mostly for toy test cases) a
 		 zero-cost loop instruction is the last in a section.  */
 	      if (targ_frag)
-		targ_frag->tc_frag_data.is_first_loop_insn = TRUE;
+		{
+		  targ_frag->tc_frag_data.is_first_loop_insn = TRUE;
+		  /* Do not widen a frag that is the first instruction of a
+		     zero-cost loop.  It makes that loop harder to align.  */
+		  if (targ_frag->fr_type == rs_machine_dependent
+		      && targ_frag->fr_subtype == RELAX_SLOTS
+		      && (targ_frag->tc_frag_data.slot_subtypes[0]
+			  == RELAX_NARROW))
+		    {
+		      if (targ_frag->tc_frag_data.is_aligning_branch)
+			targ_frag->tc_frag_data.slot_subtypes[0] = RELAX_IMMED;
+		      else
+			{
+			  frag_wane (targ_frag);
+			  targ_frag->tc_frag_data.slot_subtypes[0] = 0;
+			}
+		    }
+		}
 	      if (fragP->fr_subtype == RELAX_CHECK_ALIGN_NEXT_OPCODE)
 		frag_wane (fragP);
 	    }
@@ -7835,16 +7875,10 @@ is_local_forward_loop (const TInsn *insn
 static int
 get_text_align_power (unsigned target_size)
 {
-  int i = 0;
-  unsigned power = 1;
-
-  assert (target_size <= INT_MAX);
-  while (target_size > power)
-    {
-      power <<= 1;
-      i += 1;
-    }
-  return i;
+  if (target_size <= 4)
+    return 2;
+  assert (target_size == 8);
+  return 3;
 }
 
 
@@ -8038,14 +8072,9 @@ get_noop_aligned_address (fragS *fragP, 
      instruction following the loop, not the LOOP instruction.  */
 
   if (first_insn == NULL)
-    return address;
-
-  assert (first_insn->tc_frag_data.is_first_loop_insn);
-
-  first_insn_size = frag_format_size (first_insn);
-
-  if (first_insn_size == 2 || first_insn_size == XTENSA_UNDEFINED)
-    first_insn_size = 3;	/* ISA specifies this */
+    first_insn_size = xtensa_fetch_width;
+  else
+    first_insn_size = get_loop_align_size (frag_format_size (first_insn));
 
   /* If it was 8, then we'll need a larger alignment for the section.  */
   align_power = get_text_align_power (first_insn_size);
@@ -8108,7 +8137,7 @@ get_aligned_diff (fragS *fragP, addressT
       return opt_diff;
 
     case RELAX_ALIGN_NEXT_OPCODE:
-      target_size = next_frag_format_size (fragP);
+      target_size = get_loop_align_size (next_frag_format_size (fragP));
       loop_insn_offset = 0;
       is_loop = next_frag_opcode_is_loop (fragP, &loop_opcode);
       assert (is_loop);
@@ -8119,9 +8148,6 @@ get_aligned_diff (fragS *fragP, addressT
 	  != RELAX_IMMED)
 	loop_insn_offset = get_expanded_loop_offset (loop_opcode);
 
-      if (target_size == 2)
-	target_size = 3; /* ISA specifies this */
-
       /* In an ideal world, which is what we are shooting for here,
 	 we wouldn't need to use any NOPs immediately prior to the
 	 LOOP instruction.  If this approach fails, relax_frag_loop_align
@@ -8725,7 +8751,7 @@ bytes_to_stretch (fragS *this_frag,
       /* We will need a NOP no matter what, but should we widen
 	 this instruction to help?
 
-	 This is a RELAX_FRAG_NARROW frag.  */
+	 This is a RELAX_NARROW frag.  */
       switch (desired_diff)
 	{
 	case 1:

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2006-03-21 20:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-21 21:25 Xtensa GAS port: loosen loop alignment requirements Bob Wilson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).