public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
@ 2006-06-13 20:45 H. J. Lu
  2006-06-14  8:30 ` Nick Clifton
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-13 20:45 UTC (permalink / raw)
  To: binutils

Since Pentium Pro, there are new nops instructions. This patch adds
--alt-nops=short|long to x86/x86-64 assemblers. --alt-nops=short
will generate a single nop instruction up to 10 bytes for code
alignment. --alt-nops=long will generate a single nop instruction up
to 15 bytes. Any comments?


H.J.
---
gas/

2006-06-13  H.J. Lu  <hongjiu.lu@intel.com>

	* config/tc-i386.c (alt_nops): New.
	(i386_align_code): Use short or long alternative nops if
	alt_nops is set to alt_nops_short or alt_nops_long,
	respectively.
	(OPTION_ALT_NOPS): New.
	(md_longopts): Add --alt-nops=.
	(md_parse_option): Support OPTION_ALT_NOPS.
	(md_show_usage): Add --alt-nops=short|long.

	* doc/c-i386.texi: Document --alt-nops=short|long.

gas/testsuite/

2006-06-13  H.J. Lu  <hongjiu.lu@intel.com>

	* gas/i386/alt-nops-long.d: New file.
	* gas/i386/alt-nops-short.d: Likewise.
	* gas/i386/alt-nops.s: Likewise.
	* gas/i386/x86-64-alt-nops-long.d: Likewise.
	* gas/i386/x86-64-alt-nops-short.d: Likewise.
	* gas/i386/x86-64-alt-nops.s: Likewise.
	
	* gas/i386/i386.exp: Run alt-nops-long, alt-nops-short,
	x86-64-alt-nops-long and x86-64-alt-nops-short.

--- binutils/gas/config/tc-i386.c.alt	2006-06-12 12:50:18.000000000 -0700
+++ binutils/gas/config/tc-i386.c	2006-06-13 09:23:24.000000000 -0700
@@ -316,6 +316,14 @@ int optimize_align_code = 1;
 /* Non-zero to quieten some warnings.  */
 static int quiet_warnings = 0;
 
+/* Non-zero to generate alternative nops.  */
+static enum
+{
+  alt_nops_none = 0,
+  alt_nops_long,
+  alt_nops_short
+} alt_nops = alt_nops_none;
+
 /* CPU name.  */
 static const char *cpu_arch_name = NULL;
 static const char *cpu_sub_arch_name = NULL;
@@ -563,13 +571,131 @@ i386_align_code (fragP, count)
     f32_1, f32_2, f16_3, f16_4, f16_5, f16_6, f16_7, f16_8,
     f32_15, f32_15, f32_15, f32_15, f32_15, f32_15, f32_15
   };
+  /* xchg %ax,%ax */
+  static const char alt_2[] =
+    {0x66,0x90};
+  /* nopl (%[re]ax) */
+  static const char alt_3[] =
+    {0x0f,0x1f,0x00};
+  /* nopl 0(%[re]ax) */
+  static const char alt_4[] =
+    {0x0f,0x1f,0x40,0x00};
+  /* nopl 0(%[re]ax,%[re]ax,1) */
+  static const char alt_5[] =
+    {0x0f,0x1f,0x44,0x00,0x00};
+  /* nopw 0(%[re]ax,%[re]ax,1) */
+  static const char alt_6[] =
+    {0x66,0x0f,0x1f,0x44,0x00,0x00};
+  /* nopl 0L(%[re]ax) */
+  static const char alt_7[] =
+    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
+  /* nopl 0L(%[re]ax,%[re]ax,1) */
+  static const char alt_8[] =
+    {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* nopw 0L(%[re]ax,%[re]ax,1) */
+  static const char alt_9[] =
+    {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_10[] =
+    {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* data16
+     nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_long_11[] =
+    {0x66,
+     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* data16
+     data16
+     nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_long_12[] =
+    {0x66,
+     0x66,
+     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* data16
+     data16
+     data16
+     nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_long_13[] =
+    {0x66,
+     0x66,
+     0x66,
+     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* data16
+     data16
+     data16
+     data16
+     nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_long_14[] =
+    {0x66,
+     0x66,
+     0x66,
+     0x66,
+     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* data16
+     data16
+     data16
+     data16
+     data16
+     nopw %cs:0L(%[re]ax,%[re]ax,1) */
+  static const char alt_long_15[] =
+    {0x66,
+     0x66,
+     0x66,
+     0x66,
+     0x66,
+     0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  /* nopl 0(%[re]ax,%[re]ax,1)
+     nopw 0(%[re]ax,%[re]ax,1) */
+  static const char alt_short_11[] =
+    {0x0f,0x1f,0x44,0x00,0x00,
+     0x66,0x0f,0x1f,0x44,0x00,0x00};
+  /* nopw 0(%[re]ax,%[re]ax,1)
+     nopw 0(%[re]ax,%[re]ax,1) */
+  static const char alt_short_12[] =
+    {0x66,0x0f,0x1f,0x44,0x00,0x00,
+     0x66,0x0f,0x1f,0x44,0x00,0x00};
+  /* nopw 0(%[re]ax,%[re]ax,1)
+     nopl 0L(%[re]ax) */
+  static const char alt_short_13[] =
+    {0x66,0x0f,0x1f,0x44,0x00,0x00,
+     0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
+  /* nopl 0L(%[re]ax)
+     nopl 0L(%[re]ax) */
+  static const char alt_short_14[] =
+    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
+     0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
+  /* nopl 0L(%[re]ax)
+     nopl 0L(%[re]ax,%[re]ax,1) */
+  static const char alt_short_15[] =
+    {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
+     0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+  static const char *const alt_short_patt[] = {
+    f32_1, alt_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
+    alt_9, alt_10, alt_short_11, alt_short_12, alt_short_13,
+    alt_short_14, alt_short_15
+  };
+  static const char *const alt_long_patt[] = {
+    f32_1, alt_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
+    alt_9, alt_10, alt_long_11, alt_long_12, alt_long_13,
+    alt_long_14, alt_long_15
+  };
 
   if (count <= 0 || count > 15)
     return;
 
-  /* The recommended way to pad 64bit code is to use NOPs preceded by
-     maximally four 0x66 prefixes.  Balance the size of nops.  */
-  if (flag_code == CODE_64BIT)
+  if (alt_nops != alt_nops_none
+      && flag_code != CODE_16BIT
+      && (cpu_arch_flags & Cpu686) != 0)
+    {
+      const char *const *patt;
+
+      if (alt_nops == alt_nops_long)
+	patt = alt_long_patt;
+      else
+	patt = alt_short_patt;
+      memcpy (fragP->fr_literal + fragP->fr_fix,
+	      patt[count - 1], count);
+    }
+  else if (flag_code == CODE_64BIT)
     {
       int i;
       int nnops = (count + 3) / 4;
@@ -577,6 +703,8 @@ i386_align_code (fragP, count)
       int remains = count - nnops * len;
       int pos = 0;
 
+      /* The recommended way to pad 64bit code is to use NOPs preceded
+         by maximally four 0x66 prefixes.  Balance the size of nops.  */
       for (i = 0; i < remains; i++)
 	{
 	  memset (fragP->fr_literal + fragP->fr_fix + pos, 0x66, len);
@@ -590,18 +718,17 @@ i386_align_code (fragP, count)
 	  pos += len;
 	}
     }
-  else
-    if (flag_code == CODE_16BIT)
-      {
-	memcpy (fragP->fr_literal + fragP->fr_fix,
-		f16_patt[count - 1], count);
-	if (count > 8)
-	  /* Adjust jump offset.  */
-	  fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
-      }
-    else
+  else if (flag_code == CODE_16BIT)
+    {
       memcpy (fragP->fr_literal + fragP->fr_fix,
-	      f32_patt[count - 1], count);
+	      f16_patt[count - 1], count);
+      if (count > 8)
+	/* Adjust jump offset.  */
+	fragP->fr_literal[fragP->fr_fix + 1] = count - 2;
+    }
+  else
+    memcpy (fragP->fr_literal + fragP->fr_fix,
+	    f32_patt[count - 1], count);
   fragP->fr_var = count;
 }
 
@@ -5428,6 +5555,7 @@ const char *md_shortopts = "qn";
 #define OPTION_32 (OPTION_MD_BASE + 0)
 #define OPTION_64 (OPTION_MD_BASE + 1)
 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
+#define OPTION_ALT_NOPS (OPTION_MD_BASE + 3)
 
 struct option md_longopts[] = {
   {"32", no_argument, NULL, OPTION_32},
@@ -5435,6 +5563,7 @@ struct option md_longopts[] = {
   {"64", no_argument, NULL, OPTION_64},
 #endif
   {"divide", no_argument, NULL, OPTION_DIVIDE},
+  {"alt-nops", required_argument, NULL, OPTION_ALT_NOPS},
   {NULL, no_argument, NULL, 0}
 };
 size_t md_longopts_size = sizeof (md_longopts);
@@ -5496,6 +5625,15 @@ md_parse_option (c, arg)
       default_arch = "i386";
       break;
 
+    case OPTION_ALT_NOPS:
+      if (strcmp (optarg, "short") == 0)
+	alt_nops = alt_nops_short;
+      else if (strcmp (optarg, "long") == 0)
+	alt_nops = alt_nops_long;
+      else
+	as_fatal (_("Unknown --alt-nops= option"));
+      break;
+
     case OPTION_DIVIDE:
 #ifdef SVR4_COMMENT_CHARS
       {
@@ -5543,6 +5681,8 @@ md_show_usage (stream)
   fprintf (stream, _("\
   --divide                ignored\n"));
 #endif
+  fprintf (stream, _("\
+  --alt-nops=short|long   generate short/long alternative nops for alignment\n"));
 }
 
 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
--- binutils/gas/doc/c-i386.texi.alt	2005-11-11 10:48:53.000000000 -0800
+++ binutils/gas/doc/c-i386.texi	2006-06-13 09:15:44.000000000 -0700
@@ -76,6 +76,16 @@ character, which means that it cannot be
 not disable @samp{/} at the beginning of a line starting a comment, or
 affect using @samp{#} for starting a comment.
 
+@cindex @samp{--alt-nops=} option, i386
+@cindex @samp{--alt-nops=} option, x86-64
+@item --alt-nops=@var{alternative-nop}
+This option specifies the alternative nop instructions for alignment
+within code sections if the CPU architecture supports @samp{i686}
+instructions. The following alternative nop options are recognized:
+@code{long} and @code{short}. @code{long} will generate a single
+nop instruction up to 15 bytes. @code{short} will generate a single
+nop instruction up to 10 bytes.
+
 @end table
 
 @node i386-Syntax
--- binutils/gas/testsuite/gas/i386/alt-nops-long.d.alt	2006-06-13 09:19:37.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/alt-nops-long.d	2006-06-13 09:20:02.000000000 -0700
@@ -0,0 +1,156 @@
+#as: --alt-nops=long
+#source: alt-nops.s
+#objdump: -drw
+#name: i386 long alternative nops
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <nop15>:
+[	 ]*0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*1:[	 ]+66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+10 <nop14>:
+[	 ]*10:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*11:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*12:[	 ]+66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+20 <nop13>:
+[	 ]*20:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*21:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*22:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*23:[	 ]+66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+30 <nop12>:
+[	 ]*30:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*31:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*32:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*33:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*34:[	 ]+66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+40 <nop11>:
+[	 ]*40:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*41:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*42:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*43:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*44:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*45:[	 ]+66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+50 <nop10>:
+[	 ]*50:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*51:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*52:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*53:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*54:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*55:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*56:[	 ]+66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+60 <nop9>:
+[	 ]*60:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*61:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*62:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*63:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*64:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*65:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*66:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*67:[	 ]+66 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+70 <nop8>:
+[	 ]*70:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*71:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*72:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*73:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*74:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*75:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*76:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*77:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*78:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+
+0+80 <nop7>:
+[	 ]*80:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*81:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*82:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*83:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*84:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*85:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*86:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*87:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*88:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*89:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+90 <nop6>:
+[	 ]*90:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*91:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*92:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*93:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*94:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*95:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*96:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*97:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*98:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*99:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*9a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+a0 <nop5>:
+[	 ]*a0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*aa:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ab:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+
+0+b0 <nop4>:
+[	 ]*b0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ba:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bc:[	 ]+0f 1f 40 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+c0 <nop3>:
+[	 ]*c0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ca:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cd:[	 ]+0f 1f 00[	 ]+nopl[ 	]+\(%eax\)
+
+0+d0 <nop2>:
+[	 ]*d0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*da:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*db:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dd:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*de:[	 ]+66 90[	 ]+xchg[ 	]+%ax,%ax
+#pass
--- binutils/gas/testsuite/gas/i386/alt-nops-short.d.alt	2006-06-12 12:52:53.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/alt-nops-short.d	2006-06-13 09:18:38.000000000 -0700
@@ -0,0 +1,161 @@
+#as: --alt-nops=short
+#source: alt-nops.s
+#objdump: -drw
+#name: i386 short alternative nops
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <nop15>:
+[	 ]*0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*1:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+[	 ]*8:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+
+0+10 <nop14>:
+[	 ]*10:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*11:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*12:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+[	 ]*19:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+20 <nop13>:
+[	 ]*20:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*21:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*22:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*23:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+[	 ]*29:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+30 <nop12>:
+[	 ]*30:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*31:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*32:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*33:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*34:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+[	 ]*3a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+40 <nop11>:
+[	 ]*40:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*41:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*42:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*43:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*44:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*45:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+[	 ]*4a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+50 <nop10>:
+[	 ]*50:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*51:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*52:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*53:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*54:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*55:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*56:[	 ]+66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%eax,%eax,1\)
+
+0+60 <nop9>:
+[	 ]*60:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*61:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*62:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*63:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*64:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*65:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*66:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*67:[	 ]+66 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+70 <nop8>:
+[	 ]*70:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*71:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*72:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*73:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*74:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*75:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*76:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*77:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*78:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+
+0+80 <nop7>:
+[	 ]*80:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*81:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*82:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*83:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*84:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*85:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*86:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*87:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*88:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*89:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+90 <nop6>:
+[	 ]*90:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*91:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*92:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*93:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*94:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*95:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*96:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*97:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*98:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*99:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*9a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%eax,%eax,1\)
+
+0+a0 <nop5>:
+[	 ]*a0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*aa:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ab:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%eax,%eax,1\)
+
+0+b0 <nop4>:
+[	 ]*b0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ba:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bc:[	 ]+0f 1f 40 00[	 ]+nopl[ 	]+0x0\(%eax\)
+
+0+c0 <nop3>:
+[	 ]*c0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ca:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cd:[	 ]+0f 1f 00[	 ]+nopl[ 	]+\(%eax\)
+
+0+d0 <nop2>:
+[	 ]*d0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*da:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*db:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dd:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*de:[	 ]+66 90[	 ]+xchg[ 	]+%ax,%ax
+#pass
--- binutils/gas/testsuite/gas/i386/alt-nops.s.alt	2006-06-12 12:52:53.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/alt-nops.s	2006-06-12 12:52:53.000000000 -0700
@@ -0,0 +1,147 @@
+	.text
+nop15:
+	nop
+	.p2align 4
+
+nop14:
+	nop
+	nop
+	.p2align 4
+
+nop13:
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop12:
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop11:
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop10:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop9:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop8:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop7:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop6:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop5:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop4:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop3:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop2:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
--- binutils/gas/testsuite/gas/i386/i386.exp.alt	2006-06-12 12:50:18.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/i386.exp	2006-06-13 09:22:01.000000000 -0700
@@ -73,6 +73,8 @@ if [expr ([istarget "i*86-*-*"] ||  [ist
     run_dump_test "rep-suffix"
     run_dump_test "fp"
     run_dump_test "nops"
+    run_dump_test "alt-nops-long"
+    run_dump_test "alt-nops-short"
 
     # These tests require support for 8 and 16 bit relocs,
     # so we only run them for ELF and COFF targets.
@@ -146,6 +148,8 @@ if [expr ([istarget "i*86-*-*"] || [ista
     run_dump_test "x86-64-rep-suffix"
     run_dump_test "x86-64-gidt"
     run_dump_test "x86-64-nops"
+    run_dump_test "x86-64-alt-nops-long"
+    run_dump_test "x86-64-alt-nops-short"
 
     if { ![istarget "*-*-aix*"]
       && ![istarget "*-*-beos*"]
--- binutils/gas/testsuite/gas/i386/x86-64-alt-nops-long.d.alt	2006-06-13 09:19:25.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-alt-nops-long.d	2006-06-13 09:18:54.000000000 -0700
@@ -0,0 +1,156 @@
+#as: --alt-nops=long
+#source: x86-64-alt-nops.s
+#objdump: -drw
+#name: x86-64 long alternative nops
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <nop15>:
+[	 ]*0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*1:[	 ]+66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+10 <nop14>:
+[	 ]*10:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*11:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*12:[	 ]+66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+20 <nop13>:
+[	 ]*20:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*21:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*22:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*23:[	 ]+66 66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+30 <nop12>:
+[	 ]*30:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*31:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*32:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*33:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*34:[	 ]+66 66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+40 <nop11>:
+[	 ]*40:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*41:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*42:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*43:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*44:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*45:[	 ]+66 66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+50 <nop10>:
+[	 ]*50:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*51:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*52:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*53:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*54:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*55:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*56:[	 ]+66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+60 <nop9>:
+[	 ]*60:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*61:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*62:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*63:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*64:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*65:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*66:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*67:[	 ]+66 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+70 <nop8>:
+[	 ]*70:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*71:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*72:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*73:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*74:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*75:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*76:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*77:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*78:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+
+0+80 <nop7>:
+[	 ]*80:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*81:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*82:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*83:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*84:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*85:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*86:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*87:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*88:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*89:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+90 <nop6>:
+[	 ]*90:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*91:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*92:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*93:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*94:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*95:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*96:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*97:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*98:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*99:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*9a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+a0 <nop5>:
+[	 ]*a0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*aa:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ab:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+
+0+b0 <nop4>:
+[	 ]*b0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ba:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bc:[	 ]+0f 1f 40 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+c0 <nop3>:
+[	 ]*c0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ca:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cd:[	 ]+0f 1f 00[	 ]+nopl[ 	]+\(%rax\)
+
+0+d0 <nop2>:
+[	 ]*d0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*da:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*db:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dd:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*de:[	 ]+66 90[	 ]+xchg[ 	]+%ax,%ax
+#pass
--- binutils/gas/testsuite/gas/i386/x86-64-alt-nops-short.d.alt	2006-06-12 12:52:53.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-alt-nops-short.d	2006-06-13 09:20:32.000000000 -0700
@@ -0,0 +1,161 @@
+#as: --alt-nops=short
+#source: x86-64-alt-nops.s
+#objdump: -drw
+#name: x86-64 short alternative nops
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <nop15>:
+[	 ]*0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*1:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+[	 ]*8:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+
+0+10 <nop14>:
+[	 ]*10:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*11:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*12:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+[	 ]*19:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+20 <nop13>:
+[	 ]*20:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*21:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*22:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*23:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+[	 ]*29:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+30 <nop12>:
+[	 ]*30:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*31:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*32:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*33:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*34:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+[	 ]*3a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+40 <nop11>:
+[	 ]*40:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*41:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*42:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*43:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*44:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*45:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+[	 ]*4a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+50 <nop10>:
+[	 ]*50:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*51:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*52:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*53:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*54:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*55:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*56:[	 ]+66 2e 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+%cs:0x0\(%rax,%rax,1\)
+
+0+60 <nop9>:
+[	 ]*60:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*61:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*62:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*63:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*64:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*65:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*66:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*67:[	 ]+66 0f 1f 84 00 00 00 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+70 <nop8>:
+[	 ]*70:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*71:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*72:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*73:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*74:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*75:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*76:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*77:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*78:[	 ]+0f 1f 84 00 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+
+0+80 <nop7>:
+[	 ]*80:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*81:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*82:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*83:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*84:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*85:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*86:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*87:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*88:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*89:[	 ]+0f 1f 80 00 00 00 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+90 <nop6>:
+[	 ]*90:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*91:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*92:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*93:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*94:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*95:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*96:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*97:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*98:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*99:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*9a:[	 ]+66 0f 1f 44 00 00[	 ]+nopw[ 	]+0x0\(%rax,%rax,1\)
+
+0+a0 <nop5>:
+[	 ]*a0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*a9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*aa:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ab:[	 ]+0f 1f 44 00 00[	 ]+nopl[ 	]+0x0\(%rax,%rax,1\)
+
+0+b0 <nop4>:
+[	 ]*b0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*b9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ba:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*bc:[	 ]+0f 1f 40 00[	 ]+nopl[ 	]+0x0\(%rax\)
+
+0+c0 <nop3>:
+[	 ]*c0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*c9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*ca:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cb:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*cd:[	 ]+0f 1f 00[	 ]+nopl[ 	]+\(%rax\)
+
+0+d0 <nop2>:
+[	 ]*d0:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d1:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d2:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d3:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d4:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d5:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d6:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d7:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d8:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*d9:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*da:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*db:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dc:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*dd:[	 ]+90[	 ]+nop[ 	]*
+[	 ]*de:[	 ]+66 90[	 ]+xchg[ 	]+%ax,%ax
+#pass
--- binutils/gas/testsuite/gas/i386/x86-64-alt-nops.s.alt	2006-06-12 12:52:53.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-alt-nops.s	2006-06-12 12:52:53.000000000 -0700
@@ -0,0 +1,147 @@
+	.text
+nop15:
+	nop
+	.p2align 4
+
+nop14:
+	nop
+	nop
+	.p2align 4
+
+nop13:
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop12:
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop11:
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop10:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop9:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop8:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop7:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop6:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop5:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop4:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop3:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4
+
+nop2:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	.p2align 4

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-13 20:45 PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers H. J. Lu
@ 2006-06-14  8:30 ` Nick Clifton
  2006-06-14 15:14   ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Nick Clifton @ 2006-06-14  8:30 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

Hi H.J.

> Since Pentium Pro, there are new nops instructions. This patch adds
> --alt-nops=short|long to x86/x86-64 assemblers. --alt-nops=short
> will generate a single nop instruction up to 10 bytes for code
> alignment. --alt-nops=long will generate a single nop instruction up
> to 15 bytes. Any comments?

I am not familiar with the x86 instruction set, so please can you 
explain why it is necessary to have these two different versions of the 
nop and why the user has to select one ?  ie if the space to be padded 
is up to 10 bytes why can't the assembler just use the "short" version 
automatically and if it is longer than 10 bytes use the "long" version ?

Cheers
   Nick

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-14  8:30 ` Nick Clifton
@ 2006-06-14 15:14   ` H. J. Lu
  2006-06-14 17:20     ` Nick Clifton
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-14 15:14 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

On Wed, Jun 14, 2006 at 08:31:50AM +0100, Nick Clifton wrote:
> Hi H.J.
> 
> >Since Pentium Pro, there are new nops instructions. This patch adds
> >--alt-nops=short|long to x86/x86-64 assemblers. --alt-nops=short
> >will generate a single nop instruction up to 10 bytes for code
> >alignment. --alt-nops=long will generate a single nop instruction up
> >to 15 bytes. Any comments?
> 
> I am not familiar with the x86 instruction set, so please can you 
> explain why it is necessary to have these two different versions of the 
> nop and why the user has to select one ?  ie if the space to be padded 
> is up to 10 bytes why can't the assembler just use the "short" version 
> automatically and if it is longer than 10 bytes use the "long" version ?

x86/x86-64 assemblers need to fill the text section from 1 to
15 bytes for alignment. We have simple nop instructions for 1 to
10 bytes. For 11 to 15 bytes, we can add 0x66 prefix repeatedly
to 10 byte nop to get 11-15byte nops. However some processors
prefer simple nops. That is, 3 0x66 prefixes on the 10 byte nop
are slower on those processors than a 6 byte nop + a 7 byte nop.
User can use --alt-nops=short to avoid repeated 0x66 prefixes.
Maybe we can use something other than short|long. But I don't
have a better name.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-14 15:14   ` H. J. Lu
@ 2006-06-14 17:20     ` Nick Clifton
  2006-06-14 18:07       ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Nick Clifton @ 2006-06-14 17:20 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

Hi H. J.

>> I am not familiar with the x86 instruction set, so please can you 
>> explain why it is necessary to have these two different versions of the 
>> nop and why the user has to select one ?  ie if the space to be padded 
>> is up to 10 bytes why can't the assembler just use the "short" version 
>> automatically and if it is longer than 10 bytes use the "long" version ?
> 
> x86/x86-64 assemblers need to fill the text section from 1 to
> 15 bytes for alignment. We have simple nop instructions for 1 to
> 10 bytes. For 11 to 15 bytes, we can add 0x66 prefix repeatedly
> to 10 byte nop to get 11-15byte nops. However some processors
> prefer simple nops. That is, 3 0x66 prefixes on the 10 byte nop
> are slower on those processors than a 6 byte nop + a 7 byte nop.

Does the assembler not know the target processor variant ?  ie is the 
assembler unable to make an informed choice of which type of nop to use ?

> User can use --alt-nops=short to avoid repeated 0x66 prefixes.
> Maybe we can use something other than short|long. But I don't
> have a better name.

How about "--allow-long-nop-sequences" and its inverse 
"--no-allow-long-nop-sequences" ?  The default would presumably be to 
allow the long sequences since this is faster on more modern processors, 
yes ?

I would also suggest that you extend the description in your patch to 
c-i386.texi to cover the explanation you just gave me, unless you are 
sure that all x86 assembler programmers would know about these two types 
of nop.

Cheers
   Nick


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-14 17:20     ` Nick Clifton
@ 2006-06-14 18:07       ` H. J. Lu
  2006-06-15  8:10         ` Nick Clifton
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-14 18:07 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

On Wed, Jun 14, 2006 at 04:14:23PM +0100, Nick Clifton wrote:
> Hi H. J.
> 
> >>I am not familiar with the x86 instruction set, so please can you 
> >>explain why it is necessary to have these two different versions of the 
> >>nop and why the user has to select one ?  ie if the space to be padded 
> >>is up to 10 bytes why can't the assembler just use the "short" version 
> >>automatically and if it is longer than 10 bytes use the "long" version ?
> >
> >x86/x86-64 assemblers need to fill the text section from 1 to
> >15 bytes for alignment. We have simple nop instructions for 1 to
> >10 bytes. For 11 to 15 bytes, we can add 0x66 prefix repeatedly
> >to 10 byte nop to get 11-15byte nops. However some processors
> >prefer simple nops. That is, 3 0x66 prefixes on the 10 byte nop
> >are slower on those processors than a 6 byte nop + a 7 byte nop.
> 
> Does the assembler not know the target processor variant ?  ie is the 
> assembler unable to make an informed choice of which type of nop to use ?

x86/x86-64 assemblers don't support -mtune/-march/-mcpu.

> 
> >User can use --alt-nops=short to avoid repeated 0x66 prefixes.
> >Maybe we can use something other than short|long. But I don't
> >have a better name.
> 
> How about "--allow-long-nop-sequences" and its inverse 
> "--no-allow-long-nop-sequences" ?  The default would presumably be to 
> allow the long sequences since this is faster on more modern processors, 
> yes ?

There are 2 problems. The new nop instructions are available only
on Pentium Pro or above. Without -mtune/-march/-mcpu, we don't
know if we can use them, short or long. We need a switch to tell
assembler if the new nop instructions can be used at all. Secondly,
not all modern processors prefer "long" versions. We need another
switch to tell which kind of the new nop instructions should be used,
short or long.

Maybe I should add -march= and -mtune= to assembler.



H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-14 18:07       ` H. J. Lu
@ 2006-06-15  8:10         ` Nick Clifton
  2006-06-15 14:51           ` Paul Brook
  2006-06-15 15:00           ` PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers H. J. Lu
  0 siblings, 2 replies; 18+ messages in thread
From: Nick Clifton @ 2006-06-15  8:10 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

Hi H. J.

> x86/x86-64 assemblers don't support -mtune/-march/-mcpu.

Ahh.  How does the assembler know which instruction set variants are 
valid then ?  (I am thinking of the all the different SSE, PowerNow, etc 
variants).

> We need a switch to tell
> assembler if the new nop instructions can be used at all. Secondly,
> not all modern processors prefer "long" versions. We need another
> switch to tell which kind of the new nop instructions should be used,
> short or long.

In which case the default presumably ought to be the short version and 
the long version should only be enabled if explicitly requested via a 
command line switch.

> Maybe I should add -march= and -mtune= to assembler.

It sounds like it would be a good idea, although if it is only to 
support this new feature then you may not want to go that far.

Cheers
   Nick

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15  8:10         ` Nick Clifton
@ 2006-06-15 14:51           ` Paul Brook
  2006-06-15 15:15             ` H. J. Lu
  2006-06-15 15:00           ` PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers H. J. Lu
  1 sibling, 1 reply; 18+ messages in thread
From: Paul Brook @ 2006-06-15 14:51 UTC (permalink / raw)
  To: binutils; +Cc: Nick Clifton, H. J. Lu

> > Maybe I should add -march= and -mtune= to assembler.
>
> It sounds like it would be a good idea, although if it is only to
> support this new feature then you may not want to go that far.

FWIW I've found it more useful to add .cpu and/or .arch assembly directives. 
Handling the gcc spec strings for commandline controls and making sure the 
gcc and gas defaults are consistent can get painful, especially if gcc has 
several different ways of specifying the cpu variant.

Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15  8:10         ` Nick Clifton
  2006-06-15 14:51           ` Paul Brook
@ 2006-06-15 15:00           ` H. J. Lu
  1 sibling, 0 replies; 18+ messages in thread
From: H. J. Lu @ 2006-06-15 15:00 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

On Thu, Jun 15, 2006 at 08:53:32AM +0100, Nick Clifton wrote:
> Hi H. J.
> 
> >x86/x86-64 assemblers don't support -mtune/-march/-mcpu.
> 
> Ahh.  How does the assembler know which instruction set variants are 
> valid then ?  (I am thinking of the all the different SSE, PowerNow, etc 
> variants).

By default, the x86/x86-64 assemblers take everything. You can limit
the instruction set with .arch directive. Until now, we never generate
instructions beyond i386 ourselves. So it isn't a problem.

> 
> >We need a switch to tell
> >assembler if the new nop instructions can be used at all. Secondly,
> >not all modern processors prefer "long" versions. We need another
> >switch to tell which kind of the new nop instructions should be used,
> >short or long.
> 
> In which case the default presumably ought to be the short version and 
> the long version should only be enabled if explicitly requested via a 
> command line switch.
> 
> >Maybe I should add -march= and -mtune= to assembler.
> 
> It sounds like it would be a good idea, although if it is only to 
> support this new feature then you may not want to go that far.

I am considering adding -mtune= and updating document to indicate that
all instructions are allowed by default. But I will do some experiment
first.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 14:51           ` Paul Brook
@ 2006-06-15 15:15             ` H. J. Lu
  2006-06-15 15:28               ` Paul Brook
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-15 15:15 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 03:44:11PM +0100, Paul Brook wrote:
> > > Maybe I should add -march= and -mtune= to assembler.
> >
> > It sounds like it would be a good idea, although if it is only to
> > support this new feature then you may not want to go that far.
> 
> FWIW I've found it more useful to add .cpu and/or .arch assembly directives. 
> Handling the gcc spec strings for commandline controls and making sure the 
> gcc and gas defaults are consistent can get painful, especially if gcc has 
> several different ways of specifying the cpu variant.

x86/x86-64 assembler support .arch. But I don't think gcc uses it. I
am not sure how well it will work with existing code bases if gcc
starts generating .arch.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 15:15             ` H. J. Lu
@ 2006-06-15 15:28               ` Paul Brook
  2006-06-15 16:20                 ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Brook @ 2006-06-15 15:28 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils, Nick Clifton

> > FWIW I've found it more useful to add .cpu and/or .arch assembly
> > directives. Handling the gcc spec strings for commandline controls and
> > making sure the gcc and gas defaults are consistent can get painful,
> > especially if gcc has several different ways of specifying the cpu
> > variant.
>
> x86/x86-64 assembler support .arch. But I don't think gcc uses it. I
> am not sure how well it will work with existing code bases if gcc
> starts generating .arch.

That's what configure checks are for.

Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 15:28               ` Paul Brook
@ 2006-06-15 16:20                 ` H. J. Lu
  2006-06-15 17:08                   ` Paul Brook
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-15 16:20 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 04:15:02PM +0100, Paul Brook wrote:
> > > FWIW I've found it more useful to add .cpu and/or .arch assembly
> > > directives. Handling the gcc spec strings for commandline controls and
> > > making sure the gcc and gas defaults are consistent can get painful,
> > > especially if gcc has several different ways of specifying the cpu
> > > variant.
> >
> > x86/x86-64 assembler support .arch. But I don't think gcc uses it. I
> > am not sure how well it will work with existing code bases if gcc
> > starts generating .arch.
> 

I was talking about the code like

	asm ("some SSE instruction");

If gcc starts to generate ".arch i686", the code above won't compile
any more.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 16:20                 ` H. J. Lu
@ 2006-06-15 17:08                   ` Paul Brook
  2006-06-15 17:41                     ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Brook @ 2006-06-15 17:08 UTC (permalink / raw)
  To: binutils; +Cc: H. J. Lu, Nick Clifton

> > > x86/x86-64 assembler support .arch. But I don't think gcc uses it. I
> > > am not sure how well it will work with existing code bases if gcc
> > > starts generating .arch.
>
> I was talking about the code like
>
> 	asm ("some SSE instruction");
>
> If gcc starts to generate ".arch i686", the code above won't compile
> any more.

I'd say that code is broken. You'd have the same problem if you implemented 
the commandline options. I don't really see why x86 is any different to other 
architectures in this respect.

Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 17:08                   ` Paul Brook
@ 2006-06-15 17:41                     ` H. J. Lu
  2006-06-15 18:45                       ` Paul Brook
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-15 17:41 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 05:20:26PM +0100, Paul Brook wrote:
> > > > x86/x86-64 assembler support .arch. But I don't think gcc uses it. I
> > > > am not sure how well it will work with existing code bases if gcc
> > > > starts generating .arch.
> >
> > I was talking about the code like
> >
> > 	asm ("some SSE instruction");
> >
> > If gcc starts to generate ".arch i686", the code above won't compile
> > any more.
> 
> I'd say that code is broken. You'd have the same problem if you implemented 
> the commandline options. I don't really see why x86 is any different to other 
> architectures in this respect.

x86 is different, for better or worse. The reality is we can't change
it in such a way that all sudden the existing codes won't compile any
more.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 17:41                     ` H. J. Lu
@ 2006-06-15 18:45                       ` Paul Brook
  2006-06-15 18:58                         ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Brook @ 2006-06-15 18:45 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils, Nick Clifton

> > > I was talking about the code like
> > >
> > > 	asm ("some SSE instruction");
> > >
> > > If gcc starts to generate ".arch i686", the code above won't compile
> > > any more.
> >
> > I'd say that code is broken. You'd have the same problem if you
> > implemented the commandline options. I don't really see why x86 is any
> > different to other architectures in this respect.
>
> x86 is different, for better or worse. The reality is we can't change
> it in such a way that all sudden the existing codes won't compile any
> more.

Well, doesn't the same argument apply to commandline arguments. 
Having -march=i686 behave differently to ".arch i686" sounds like a really 
bad idea to me.

Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 18:45                       ` Paul Brook
@ 2006-06-15 18:58                         ` H. J. Lu
  2006-06-15 19:02                           ` Paul Brook
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-15 18:58 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 06:41:23PM +0100, Paul Brook wrote:
> > > > I was talking about the code like
> > > >
> > > > 	asm ("some SSE instruction");
> > > >
> > > > If gcc starts to generate ".arch i686", the code above won't compile
> > > > any more.
> > >
> > > I'd say that code is broken. You'd have the same problem if you
> > > implemented the commandline options. I don't really see why x86 is any
> > > different to other architectures in this respect.
> >
> > x86 is different, for better or worse. The reality is we can't change
> > it in such a way that all sudden the existing codes won't compile any
> > more.
> 
> Well, doesn't the same argument apply to commandline arguments. 
> Having -march=i686 behave differently to ".arch i686" sounds like a really 
> bad idea to me.

That is why I am planning to add -mtune=, not -march=. There are

  if (value == CODE_64BIT && !(cpu_arch_flags & CpuSledgehammer))
    {
      as_bad (_("64bit mode not supported on this CPU."));
    }

in assembler. The reasons we haven't run into any serious problems are

1. By default, cpu_arch_flags is set to accept everything.
2. .arch directive isn't used much.

So my -mtune=CPU switch will optimize for CPU by generating instruction
for CPU if instruction set of CPU is available.


H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers
  2006-06-15 18:58                         ` H. J. Lu
@ 2006-06-15 19:02                           ` Paul Brook
  2006-06-16  7:27                             ` PATCH: Add -march=/-mtune= to x86 assembler H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Brook @ 2006-06-15 19:02 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils, Nick Clifton


> in assembler. The reasons we haven't run into any serious problems are
>
> 1. By default, cpu_arch_flags is set to accept everything.
> 2. .arch directive isn't used much.
>
> So my -mtune=CPU switch will optimize for CPU by generating instruction
> for CPU if instruction set of CPU is available.

But you said these instructions only existed on ppro or later. You can't have 
mtune= effect the choice of instruction unless you also implement -march=. 

Consider gcc -march=386 -mtune=686. IIUC you're proposing that gcc doesn't 
emit .arch, and doesn't pass through -march=, so gas defaults to allowing all 
instructions. gas then sees -mtune= and generates instructions that don't 
work on 386.

Paul

^ permalink raw reply	[flat|nested] 18+ messages in thread

* PATCH: Add -march=/-mtune= to x86 assembler
  2006-06-15 19:02                           ` Paul Brook
@ 2006-06-16  7:27                             ` H. J. Lu
  2006-06-16 15:26                               ` H. J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: H. J. Lu @ 2006-06-16  7:27 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 07:58:26PM +0100, Paul Brook wrote:
> 
> > in assembler. The reasons we haven't run into any serious problems are
> >
> > 1. By default, cpu_arch_flags is set to accept everything.
> > 2. .arch directive isn't used much.
> >
> > So my -mtune=CPU switch will optimize for CPU by generating instruction
> > for CPU if instruction set of CPU is available.
> 
> But you said these instructions only existed on ppro or later. You can't have 
> mtune= effect the choice of instruction unless you also implement -march=. 
> 
> Consider gcc -march=386 -mtune=686. IIUC you're proposing that gcc doesn't 
> emit .arch, and doesn't pass through -march=, so gas defaults to allowing all 
> instructions. gas then sees -mtune= and generates instructions that don't 
> work on 386.
> 

This is a patch to add -march=/-mtune= to x86 assembler. Currently,
it doesn't generate different code. I will add processor specific
optimization later.


H.J.
----
2006-06-14  H.J. Lu  <hongjiu.lu@intel.com>

	* config/tc-i386.h (processor_type): New.
	(arch_entry): Add type.

	* config/tc-i386.c (cpu_arch_tune): New.
	(cpu_arch_tune_flags): Likewise.
	(cpu_arch_isa): Likewise.
	(cpu_arch_isa_flags): Likewise.
	(cpu_arch): Updated.
	(set_cpu_arch): Also update cpu_arch_isa/cpu_arch_isa_flags.
	(i386_target_format): Likewise.
	(OPTION_MARCH): New.
	(OPTION_MTUNE): Likewise.
	(md_longopts): Add -march= and -mtune=.
	(md_parse_option): Support -march= and -mtune=.
	(md_show_usage): Add -march=CPU/-mtune=CPU.

	* doc/as.texinfo: Add -march=CPU/-mtune=CPU.

	* doc/c-i386.texi: Document -march=CPU/-mtune=CPU.

--- gas/config/tc-i386.c.tune	2006-06-12 12:50:18.000000000 -0700
+++ gas/config/tc-i386.c	2006-06-15 15:40:18.000000000 -0700
@@ -323,6 +323,18 @@ static const char *cpu_sub_arch_name = N
 /* CPU feature flags.  */
 static unsigned int cpu_arch_flags = CpuUnknownFlags | CpuNo64;
 
+/* Cpu we are generating instructions for.  */
+enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of cpu we are generating instructions for.  */
+static unsigned int cpu_arch_tune_flags = CpuUnknownFlags;
+
+/* CPU instruction set architecture to use.  */
+enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of instruction set architecture used.  */
+static unsigned int cpu_arch_isa_flags = CpuUnknownFlags;
+
 /* If set, conditional jumps are not automatically promoted to handle
    larger than a byte offset.  */
 static unsigned int no_cond_jump_promotion = 0;
@@ -415,35 +427,85 @@ const relax_typeS md_relax_table[] =
   {0, 0, 4, 0}
 };
 
-static const arch_entry cpu_arch[] = {
-  {"i8086",	Cpu086 },
-  {"i186",	Cpu086|Cpu186 },
-  {"i286",	Cpu086|Cpu186|Cpu286 },
-  {"i386",	Cpu086|Cpu186|Cpu286|Cpu386 },
-  {"i486",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486 },
-  {"i586",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"i686",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentium",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentiumii",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX },
-  {"pentiumiii",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2|CpuSSE },
-  {"pentium4",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {"prescott",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI },
-  {"k6",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX },
-  {"k6_2",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
-  {"athlon",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {"opteron",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {".mmx",	CpuMMX },
-  {".sse",	CpuMMX|CpuMMX2|CpuSSE },
-  {".sse2",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {".sse3",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3 },
-  {".3dnow",	CpuMMX|Cpu3dnow },
-  {".3dnowa",	CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {".padlock",	CpuPadLock },
-  {".pacifica",	CpuSVME },
-  {".svme",	CpuSVME },
-  {NULL, 0 }
+static const arch_entry cpu_arch[] =
+{
+  {"generic32", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"generic64", PROCESSOR_GENERIC64,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"i8086", PROCESSOR_UNKNOWN,
+   Cpu086},
+  {"i186", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186},
+  {"i286", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186|Cpu286},
+  {"i386", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"i486", PROCESSOR_I486,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486},
+  {"i586", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"i686", PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentium", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"pentiumpro",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentiumii",	PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX},
+  {"pentiumiii",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2
+   |CpuSSE},
+  {"pentium4", PROCESSOR_PENTIUM4,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"prescott", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"nocona", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"yonah", PROCESSOR_YONAH,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"merom", PROCESSOR_MEROM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuMNI},
+  {"k6", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX},
+  {"k6_2", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow},
+  {"athlon", PROCESSOR_ATHLON,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {"sledgehammer", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"opteron", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"k8", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {".mmx", PROCESSOR_UNKNOWN,
+   CpuMMX},
+  {".sse", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE},
+  {".sse2", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2},
+  {".sse3", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {".3dnow", PROCESSOR_UNKNOWN,
+   CpuMMX|Cpu3dnow},
+  {".3dnowa", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {".padlock", PROCESSOR_UNKNOWN,
+   CpuPadLock},
+  {".pacifica", PROCESSOR_UNKNOWN,
+   CpuSVME},
+  {".svme", PROCESSOR_UNKNOWN,
+   CpuSVME}
 };
 
 const pseudo_typeS md_pseudo_table[] =
@@ -866,9 +928,9 @@ set_cpu_arch (dummy)
     {
       char *string = input_line_pointer;
       int e = get_symbol_end ();
-      int i;
+      unsigned int i;
 
-      for (i = 0; cpu_arch[i].name; i++)
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
 	{
 	  if (strcmp (string, cpu_arch[i].name) == 0)
 	    {
@@ -878,6 +940,8 @@ set_cpu_arch (dummy)
 		  cpu_sub_arch_name = NULL;
 		  cpu_arch_flags = (cpu_arch[i].flags
 				    | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
+		  cpu_arch_isa = cpu_arch[i].type;
+		  cpu_arch_isa_flags = cpu_arch[i].flags;
 		  break;
 		}
 	      if ((cpu_arch_flags | cpu_arch[i].flags) != cpu_arch_flags)
@@ -890,7 +954,7 @@ set_cpu_arch (dummy)
 	      return;
 	    }
 	}
-      if (!cpu_arch[i].name)
+      if (i >= ARRAY_SIZE (cpu_arch))
 	as_bad (_("no such architecture: `%s'"), string);
 
       *input_line_pointer = e;
@@ -5428,6 +5492,8 @@ const char *md_shortopts = "qn";
 #define OPTION_32 (OPTION_MD_BASE + 0)
 #define OPTION_64 (OPTION_MD_BASE + 1)
 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
+#define OPTION_MARCH (OPTION_MD_BASE + 3)
+#define OPTION_MTUNE (OPTION_MD_BASE + 4)
 
 struct option md_longopts[] = {
   {"32", no_argument, NULL, OPTION_32},
@@ -5435,15 +5501,17 @@ struct option md_longopts[] = {
   {"64", no_argument, NULL, OPTION_64},
 #endif
   {"divide", no_argument, NULL, OPTION_DIVIDE},
+  {"march", required_argument, NULL, OPTION_MARCH},
+  {"mtune", required_argument, NULL, OPTION_MTUNE},
   {NULL, no_argument, NULL, 0}
 };
 size_t md_longopts_size = sizeof (md_longopts);
 
 int
-md_parse_option (c, arg)
-     int c;
-     char *arg ATTRIBUTE_UNUSED;
+md_parse_option (int c, char *arg)
 {
+  unsigned int i;
+
   switch (c)
     {
     case 'n':
@@ -5513,6 +5581,38 @@ md_parse_option (c, arg)
 #endif
       break;
 
+    case OPTION_MARCH:
+      if (*arg == '.')
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_isa = cpu_arch [i].type;
+	      cpu_arch_isa_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      break;
+
+    case OPTION_MTUNE:
+      if (*arg == '.')
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_tune = cpu_arch [i].type;
+	      cpu_arch_tune_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      break;
+
     default:
       return 0;
     }
@@ -5543,6 +5643,11 @@ md_show_usage (stream)
   fprintf (stream, _("\
   --divide                ignored\n"));
 #endif
+  fprintf (stream, _("\
+  -march=CPU/-mtune=CPU   generate code/optimize for CPU, where CPU is one of:\n\
+                           i386, i486, pentium, pentiumpro, pentium4, nocona,\n\
+			   yonah, merom, k6, athlon, k8, generic32, generic64\n"));
+
 }
 
 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -5554,9 +5659,37 @@ const char *
 i386_target_format ()
 {
   if (!strcmp (default_arch, "x86_64"))
-    set_code_flag (CODE_64BIT);
+    {
+      set_code_flag (CODE_64BIT);
+      if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_isa = PROCESSOR_GENERIC64;
+	  cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+			       |Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+			       |CpuSSE|CpuSSE2;
+	}
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC64;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+				|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+				|CpuSSE|CpuSSE2;
+	}
+    }
   else if (!strcmp (default_arch, "i386"))
-    set_code_flag (CODE_32BIT);
+    {
+      set_code_flag (CODE_32BIT);
+      if (cpu_arch_isa == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_isa = PROCESSOR_GENERIC32;
+	  cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+	}
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC32;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+	}
+    }
   else
     as_fatal (_("Unknown architecture"));
   switch (OUTPUT_FLAVOR)
--- gas/config/tc-i386.h.tune	2006-05-19 06:31:21.000000000 -0700
+++ gas/config/tc-i386.h	2006-06-15 14:14:01.000000000 -0700
@@ -377,11 +377,29 @@ typedef struct
 }
 sib_byte;
 
-/* x86 arch names and features */
+enum processor_type
+{
+  PROCESSOR_UNKNOWN,
+  PROCESSOR_I486,
+  PROCESSOR_PENTIUM,
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_PENTIUM4,
+  PROCESSOR_NOCONA,
+  PROCESSOR_YONAH,
+  PROCESSOR_MEROM,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_K8,
+  PROCESSOR_GENERIC32,
+  PROCESSOR_GENERIC64
+};
+
+/* x86 arch names, types and features */
 typedef struct
 {
-  const char *name;	/* arch name */
-  unsigned int flags;	/* cpu feature flags */
+  const char *name;		/* arch name */
+  enum processor_type type;	/* arch type */
+  unsigned int flags;		/* cpu feature flags */
 }
 arch_entry;
 
--- gas/doc/as.texinfo.tune	2006-06-01 09:15:58.000000000 -0700
+++ gas/doc/as.texinfo	2006-06-15 15:26:26.000000000 -0700
@@ -296,6 +296,7 @@ gcc(1), ld(1), and the Info entries for 
 
 @emph{Target i386 options:}
    [@b{--32}|@b{--64}] [@b{-n}]
+   [@b{-march}=@var{CPU}] [@b{-mtune}=@var{CPU}] 
 @end ifset
 @ifset I960
 
--- gas/doc/c-i386.texi.tune	2005-11-11 10:48:53.000000000 -0800
+++ gas/doc/c-i386.texi	2006-06-15 15:28:43.000000000 -0700
@@ -76,6 +76,49 @@ character, which means that it cannot be
 not disable @samp{/} at the beginning of a line starting a comment, or
 affect using @samp{#} for starting a comment.
 
+@cindex @samp{-march=} option, i386
+@cindex @samp{-march=} option, x86-64
+@item -march=@var{CPU}
+This option specifies an instruction set architecture for generating
+instructions.  The following architectures are recognized:
+@code{i8086},
+@code{i186},
+@code{i286},
+@code{i386},
+@code{i486},
+@code{i586},
+@code{i686},
+@code{pentium},
+@code{pentiumpro},
+@code{pentiumii},
+@code{pentiumiii},
+@code{pentium4},
+@code{prescott},
+@code{nocona},
+@code{yonah},
+@code{merom},
+@code{k6},
+@code{k6_2},
+@code{athlon},
+@code{sledgehammer},
+@code{opteron},
+@code{k8},
+@code{generic32} and
+@code{generic64}.
+
+This option only affects instructions generated by the assembler. The
+@code{.arch} directive will take precedent.
+
+@cindex @samp{-mtune=} option, i386
+@cindex @samp{-mtune=} option, x86-64
+@item -mtune=@var{CPU}
+This option specifies a processor to optimize for. When used in
+conjunction with the @option{-march} option, only instructions
+of the processor specified by the @option{-march} option will be
+generated.
+
+Valid @var{CPU} values are identical to @option{-march=@var{CPU}}.
+
 @end table
 
 @node i386-Syntax
@@ -709,8 +752,11 @@ supported on the CPU specified.  The cho
 @item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
 @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
 @item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
-@item @samp{k6} @tab @samp{athlon} @samp{sledgehammer}
-@item @samp{.mmx} @samp{.sse} @samp{.sse2} @samp{.sse3} @samp{.3dnow}
+@item @samp{prescott} @tab @samp{nocona} @tab @samp{yonah} @tab @samp{merom}
+@item @samp{k6} @tab @samp{athlon} @tab @samp{sledgehammer} @tab @samp{k8} 
+@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.padlock} @tab @samp{.pacifica}
+@item @samp{.svme}
 @end multitable
 
 Apart from the warning, there are only two other effects on

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: PATCH: Add -march=/-mtune= to x86 assembler
  2006-06-16  7:27                             ` PATCH: Add -march=/-mtune= to x86 assembler H. J. Lu
@ 2006-06-16 15:26                               ` H. J. Lu
  0 siblings, 0 replies; 18+ messages in thread
From: H. J. Lu @ 2006-06-16 15:26 UTC (permalink / raw)
  To: Paul Brook; +Cc: binutils, Nick Clifton

On Thu, Jun 15, 2006 at 03:46:13PM -0700, H. J. Lu wrote:
> On Thu, Jun 15, 2006 at 07:58:26PM +0100, Paul Brook wrote:
> > 
> > > in assembler. The reasons we haven't run into any serious problems are
> > >
> > > 1. By default, cpu_arch_flags is set to accept everything.
> > > 2. .arch directive isn't used much.
> > >
> > > So my -mtune=CPU switch will optimize for CPU by generating instruction
> > > for CPU if instruction set of CPU is available.
> > 
> > But you said these instructions only existed on ppro or later. You can't have 
> > mtune= effect the choice of instruction unless you also implement -march=. 
> > 
> > Consider gcc -march=386 -mtune=686. IIUC you're proposing that gcc doesn't 
> > emit .arch, and doesn't pass through -march=, so gas defaults to allowing all 
> > instructions. gas then sees -mtune= and generates instructions that don't 
> > work on 386.
> > 
> 
> This is a patch to add -march=/-mtune= to x86 assembler. Currently,
> it doesn't generate different code. I will add processor specific
> optimization later.
> 

This is the patch I am checking in now.


H.J.
---
2006-06-14  H.J. Lu  <hongjiu.lu@intel.com>

	* config/tc-i386.h (processor_type): New.
	(arch_entry): Add type.

	* config/tc-i386.c (cpu_arch_tune): New.
	(cpu_arch_tune_flags): Likewise.
	(cpu_arch_isa_flags): Likewise.
	(cpu_arch): Updated.
	(set_cpu_arch): Also update cpu_arch_isa_flags.
	(md_assemble): Update cpu_arch_isa_flags.
	(OPTION_MARCH): New.
	(OPTION_MTUNE): Likewise.
	(md_longopts): Add -march= and -mtune=.
	(md_parse_option): Support -march= and -mtune=.
	(md_show_usage): Add -march=CPU/-mtune=CPU.
	(i386_target_format): Also update cpu_arch_isa_flags,
	cpu_arch_tune and cpu_arch_tune_flags.

	* doc/as.texinfo: Add -march=CPU/-mtune=CPU.

	* doc/c-i386.texi: Document -march=CPU/-mtune=CPU.

--- gas/config/tc-i386.c.tune	2006-06-12 12:50:18.000000000 -0700
+++ gas/config/tc-i386.c	2006-06-16 07:58:46.000000000 -0700
@@ -323,6 +323,15 @@ static const char *cpu_sub_arch_name = N
 /* CPU feature flags.  */
 static unsigned int cpu_arch_flags = CpuUnknownFlags | CpuNo64;
 
+/* Cpu we are generating instructions for.  */
+static enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
+
+/* CPU feature flags of cpu we are generating instructions for.  */
+static unsigned int cpu_arch_tune_flags = 0;
+
+/* CPU feature flags of instruction set architecture used.  */
+static unsigned int cpu_arch_isa_flags = 0;
+
 /* If set, conditional jumps are not automatically promoted to handle
    larger than a byte offset.  */
 static unsigned int no_cond_jump_promotion = 0;
@@ -415,35 +424,85 @@ const relax_typeS md_relax_table[] =
   {0, 0, 4, 0}
 };
 
-static const arch_entry cpu_arch[] = {
-  {"i8086",	Cpu086 },
-  {"i186",	Cpu086|Cpu186 },
-  {"i286",	Cpu086|Cpu186|Cpu286 },
-  {"i386",	Cpu086|Cpu186|Cpu286|Cpu386 },
-  {"i486",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486 },
-  {"i586",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"i686",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentium",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
-  {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
-  {"pentiumii",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX },
-  {"pentiumiii",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2|CpuSSE },
-  {"pentium4",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {"prescott",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI },
-  {"k6",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX },
-  {"k6_2",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
-  {"athlon",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {"opteron",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
-  {".mmx",	CpuMMX },
-  {".sse",	CpuMMX|CpuMMX2|CpuSSE },
-  {".sse2",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
-  {".sse3",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3 },
-  {".3dnow",	CpuMMX|Cpu3dnow },
-  {".3dnowa",	CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
-  {".padlock",	CpuPadLock },
-  {".pacifica",	CpuSVME },
-  {".svme",	CpuSVME },
-  {NULL, 0 }
+static const arch_entry cpu_arch[] =
+{
+  {"generic32", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"generic64", PROCESSOR_GENERIC64,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"i8086", PROCESSOR_UNKNOWN,
+   Cpu086},
+  {"i186", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186},
+  {"i286", PROCESSOR_UNKNOWN,
+   Cpu086|Cpu186|Cpu286},
+  {"i386", PROCESSOR_GENERIC32,
+   Cpu086|Cpu186|Cpu286|Cpu386},
+  {"i486", PROCESSOR_I486,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486},
+  {"i586", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"i686", PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentium", PROCESSOR_PENTIUM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586},
+  {"pentiumpro",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686},
+  {"pentiumii",	PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX},
+  {"pentiumiii",PROCESSOR_PENTIUMPRO,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2
+   |CpuSSE},
+  {"pentium4", PROCESSOR_PENTIUM4,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2},
+  {"prescott", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"nocona", PROCESSOR_NOCONA,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"yonah", PROCESSOR_YONAH,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {"merom", PROCESSOR_MEROM,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX
+   |CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuMNI},
+  {"k6", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX},
+  {"k6_2", PROCESSOR_K6,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow},
+  {"athlon", PROCESSOR_ATHLON,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {"sledgehammer", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"opteron", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {"k8", PROCESSOR_K8,
+   Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon
+   |CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2},
+  {".mmx", PROCESSOR_UNKNOWN,
+   CpuMMX},
+  {".sse", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE},
+  {".sse2", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2},
+  {".sse3", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3},
+  {".3dnow", PROCESSOR_UNKNOWN,
+   CpuMMX|Cpu3dnow},
+  {".3dnowa", PROCESSOR_UNKNOWN,
+   CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA},
+  {".padlock", PROCESSOR_UNKNOWN,
+   CpuPadLock},
+  {".pacifica", PROCESSOR_UNKNOWN,
+   CpuSVME},
+  {".svme", PROCESSOR_UNKNOWN,
+   CpuSVME}
 };
 
 const pseudo_typeS md_pseudo_table[] =
@@ -866,9 +925,9 @@ set_cpu_arch (dummy)
     {
       char *string = input_line_pointer;
       int e = get_symbol_end ();
-      int i;
+      unsigned int i;
 
-      for (i = 0; cpu_arch[i].name; i++)
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
 	{
 	  if (strcmp (string, cpu_arch[i].name) == 0)
 	    {
@@ -878,6 +937,7 @@ set_cpu_arch (dummy)
 		  cpu_sub_arch_name = NULL;
 		  cpu_arch_flags = (cpu_arch[i].flags
 				    | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
+		  cpu_arch_isa_flags = cpu_arch[i].flags;
 		  break;
 		}
 	      if ((cpu_arch_flags | cpu_arch[i].flags) != cpu_arch_flags)
@@ -890,7 +950,7 @@ set_cpu_arch (dummy)
 	      return;
 	    }
 	}
-      if (!cpu_arch[i].name)
+      if (i >= ARRAY_SIZE (cpu_arch))
 	as_bad (_("no such architecture: `%s'"), string);
 
       *input_line_pointer = e;
@@ -1655,6 +1715,9 @@ md_assemble (line)
   if (i.rex != 0)
     add_prefix (REX_OPCODE | i.rex);
 
+  /* Record what ISA we have generated so far.  */
+  cpu_arch_isa_flags |= i.tm.cpu_flags;
+
   /* We are ready to output the insn.  */
   output_insn ();
 }
@@ -5428,6 +5491,8 @@ const char *md_shortopts = "qn";
 #define OPTION_32 (OPTION_MD_BASE + 0)
 #define OPTION_64 (OPTION_MD_BASE + 1)
 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
+#define OPTION_MARCH (OPTION_MD_BASE + 3)
+#define OPTION_MTUNE (OPTION_MD_BASE + 4)
 
 struct option md_longopts[] = {
   {"32", no_argument, NULL, OPTION_32},
@@ -5435,15 +5500,17 @@ struct option md_longopts[] = {
   {"64", no_argument, NULL, OPTION_64},
 #endif
   {"divide", no_argument, NULL, OPTION_DIVIDE},
+  {"march", required_argument, NULL, OPTION_MARCH},
+  {"mtune", required_argument, NULL, OPTION_MTUNE},
   {NULL, no_argument, NULL, 0}
 };
 size_t md_longopts_size = sizeof (md_longopts);
 
 int
-md_parse_option (c, arg)
-     int c;
-     char *arg ATTRIBUTE_UNUSED;
+md_parse_option (int c, char *arg)
 {
+  unsigned int i;
+
   switch (c)
     {
     case 'n':
@@ -5513,6 +5580,37 @@ md_parse_option (c, arg)
 #endif
       break;
 
+    case OPTION_MARCH:
+      if (*arg == '.')
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_isa_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -march= option: `%s'"), arg);
+      break;
+
+    case OPTION_MTUNE:
+      if (*arg == '.')
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      for (i = 0; i < ARRAY_SIZE (cpu_arch); i++)
+	{
+	  if (strcmp (arg, cpu_arch [i].name) == 0)
+	    {
+	      cpu_arch_tune = cpu_arch [i].type;
+	      cpu_arch_tune_flags = cpu_arch[i].flags;
+	      break;
+	    }
+	}
+      if (i >= ARRAY_SIZE (cpu_arch))
+	as_fatal (_("Invalid -mtune= option: `%s'"), arg);
+      break;
+
     default:
       return 0;
     }
@@ -5543,6 +5641,11 @@ md_show_usage (stream)
   fprintf (stream, _("\
   --divide                ignored\n"));
 #endif
+  fprintf (stream, _("\
+  -march=CPU/-mtune=CPU   generate code/optimize for CPU, where CPU is one of:\n\
+                           i386, i486, pentium, pentiumpro, pentium4, nocona,\n\
+			   yonah, merom, k6, athlon, k8, generic32, generic64\n"));
+
 }
 
 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
@@ -5554,9 +5657,31 @@ const char *
 i386_target_format ()
 {
   if (!strcmp (default_arch, "x86_64"))
-    set_code_flag (CODE_64BIT);
+    {
+      set_code_flag (CODE_64BIT);
+      if (cpu_arch_isa_flags == 0)
+	cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+			     |Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+			     |CpuSSE|CpuSSE2;
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC64;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386|Cpu486
+				|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2
+				|CpuSSE|CpuSSE2;
+	}
+    }
   else if (!strcmp (default_arch, "i386"))
-    set_code_flag (CODE_32BIT);
+    {
+      set_code_flag (CODE_32BIT);
+      if (cpu_arch_isa_flags == 0)
+	cpu_arch_isa_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+      if (cpu_arch_tune == PROCESSOR_UNKNOWN)
+	{
+	  cpu_arch_tune = PROCESSOR_GENERIC32;
+	  cpu_arch_tune_flags = Cpu086|Cpu186|Cpu286|Cpu386;
+	}
+    }
   else
     as_fatal (_("Unknown architecture"));
   switch (OUTPUT_FLAVOR)
--- gas/config/tc-i386.h.tune	2006-05-19 06:31:21.000000000 -0700
+++ gas/config/tc-i386.h	2006-06-15 14:14:01.000000000 -0700
@@ -377,11 +377,29 @@ typedef struct
 }
 sib_byte;
 
-/* x86 arch names and features */
+enum processor_type
+{
+  PROCESSOR_UNKNOWN,
+  PROCESSOR_I486,
+  PROCESSOR_PENTIUM,
+  PROCESSOR_PENTIUMPRO,
+  PROCESSOR_PENTIUM4,
+  PROCESSOR_NOCONA,
+  PROCESSOR_YONAH,
+  PROCESSOR_MEROM,
+  PROCESSOR_K6,
+  PROCESSOR_ATHLON,
+  PROCESSOR_K8,
+  PROCESSOR_GENERIC32,
+  PROCESSOR_GENERIC64
+};
+
+/* x86 arch names, types and features */
 typedef struct
 {
-  const char *name;	/* arch name */
-  unsigned int flags;	/* cpu feature flags */
+  const char *name;		/* arch name */
+  enum processor_type type;	/* arch type */
+  unsigned int flags;		/* cpu feature flags */
 }
 arch_entry;
 
--- gas/doc/as.texinfo.tune	2006-06-01 09:15:58.000000000 -0700
+++ gas/doc/as.texinfo	2006-06-15 15:26:26.000000000 -0700
@@ -296,6 +296,7 @@ gcc(1), ld(1), and the Info entries for 
 
 @emph{Target i386 options:}
    [@b{--32}|@b{--64}] [@b{-n}]
+   [@b{-march}=@var{CPU}] [@b{-mtune}=@var{CPU}] 
 @end ifset
 @ifset I960
 
--- gas/doc/c-i386.texi.tune	2005-11-11 10:48:53.000000000 -0800
+++ gas/doc/c-i386.texi	2006-06-15 15:28:43.000000000 -0700
@@ -76,6 +76,49 @@ character, which means that it cannot be
 not disable @samp{/} at the beginning of a line starting a comment, or
 affect using @samp{#} for starting a comment.
 
+@cindex @samp{-march=} option, i386
+@cindex @samp{-march=} option, x86-64
+@item -march=@var{CPU}
+This option specifies an instruction set architecture for generating
+instructions.  The following architectures are recognized:
+@code{i8086},
+@code{i186},
+@code{i286},
+@code{i386},
+@code{i486},
+@code{i586},
+@code{i686},
+@code{pentium},
+@code{pentiumpro},
+@code{pentiumii},
+@code{pentiumiii},
+@code{pentium4},
+@code{prescott},
+@code{nocona},
+@code{yonah},
+@code{merom},
+@code{k6},
+@code{k6_2},
+@code{athlon},
+@code{sledgehammer},
+@code{opteron},
+@code{k8},
+@code{generic32} and
+@code{generic64}.
+
+This option only affects instructions generated by the assembler. The
+@code{.arch} directive will take precedent.
+
+@cindex @samp{-mtune=} option, i386
+@cindex @samp{-mtune=} option, x86-64
+@item -mtune=@var{CPU}
+This option specifies a processor to optimize for. When used in
+conjunction with the @option{-march} option, only instructions
+of the processor specified by the @option{-march} option will be
+generated.
+
+Valid @var{CPU} values are identical to @option{-march=@var{CPU}}.
+
 @end table
 
 @node i386-Syntax
@@ -709,8 +752,11 @@ supported on the CPU specified.  The cho
 @item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
 @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
 @item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
-@item @samp{k6} @tab @samp{athlon} @samp{sledgehammer}
-@item @samp{.mmx} @samp{.sse} @samp{.sse2} @samp{.sse3} @samp{.3dnow}
+@item @samp{prescott} @tab @samp{nocona} @tab @samp{yonah} @tab @samp{merom}
+@item @samp{k6} @tab @samp{athlon} @tab @samp{sledgehammer} @tab @samp{k8} 
+@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.padlock} @tab @samp{.pacifica}
+@item @samp{.svme}
 @end multitable
 
 Apart from the warning, there are only two other effects on

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2006-06-16 15:22 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-06-13 20:45 PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers H. J. Lu
2006-06-14  8:30 ` Nick Clifton
2006-06-14 15:14   ` H. J. Lu
2006-06-14 17:20     ` Nick Clifton
2006-06-14 18:07       ` H. J. Lu
2006-06-15  8:10         ` Nick Clifton
2006-06-15 14:51           ` Paul Brook
2006-06-15 15:15             ` H. J. Lu
2006-06-15 15:28               ` Paul Brook
2006-06-15 16:20                 ` H. J. Lu
2006-06-15 17:08                   ` Paul Brook
2006-06-15 17:41                     ` H. J. Lu
2006-06-15 18:45                       ` Paul Brook
2006-06-15 18:58                         ` H. J. Lu
2006-06-15 19:02                           ` Paul Brook
2006-06-16  7:27                             ` PATCH: Add -march=/-mtune= to x86 assembler H. J. Lu
2006-06-16 15:26                               ` H. J. Lu
2006-06-15 15:00           ` PATCH: Add --alt-nops=short|long to x86/x86-64 assemblers H. J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).