public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] x86: MONITOR and alike adjustments
@ 2020-02-14 12:45 Jan Beulich
  2020-02-14 12:47 ` [PATCH v2 2/2] x86: fold AddrPrefixOpReg templates Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Jan Beulich @ 2020-02-14 12:45 UTC (permalink / raw)
  To: binutils; +Cc: H.J. Lu

1: Intel: don't swap operands of MONITOR{,X} and MWAIT{,X}
2: x86: fold AddrPrefixOpReg templates

The first patch was sent many years ago, and rejected. Now that
I've looked into this again, I noticed that the reason to reject
it was actually wrong. Hence the (extended) re-submission.

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 2/2] x86: fold AddrPrefixOpReg templates
  2020-02-14 12:45 [PATCH v2 0/2] x86: MONITOR and alike adjustments Jan Beulich
@ 2020-02-14 12:47 ` Jan Beulich
  2020-02-14 12:47 ` [PATCH v2 1/2] x86/Intel: don't swap operands of MONITOR{,X} and MWAIT{,X} Jan Beulich
  2020-02-14 12:52 ` [PATCH v2 0/2] x86: MONITOR and alike adjustments H.J. Lu
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2020-02-14 12:47 UTC (permalink / raw)
  To: binutils; +Cc: H.J. Lu

There's no need to have separate Cpu64 and CpuNo64 templates: There
already is special logic handling the attribute, and all that's needed
is rejecting 16-bit address registers in 64-bit mode. Suppress suffix
guessing and group all involved logic together, outside of suffix
processing (arguably it doesn't even belong in process_suffix()).

Also, since no AddrPrefixOpReg template permits any suffixes, move the
No_*Suf specifiers for them to a central place. Along with this drop
the no longer relevant NoRex64 from there.

gas/
2020-02-XX  Jan Beulich <jbeulich@suse.com>

	* config/tc-i386.c (process_suffix): Don't try to guess a suffix
	for AddrPrefixOpReg templates. Combine the two pieces of
	addrprefixopreg handling. Reject 16-bit address reg in 64-bit
	mode.

opcodes/
2020-02-XX  Jan Beulich <jbeulich@suse.com>

	* i386-opc.tbl (AddrPrefixOpReg): Define.
	(monitor, invlpga, vmload, vmrun, vmsave, clzero, monitorx,
	umonitor, movdir64b, enqcmd, enqcmds): Fold Cpu64 and CpuNo64
	templates. Drop NoRex64.
	* i386-tbl.h: Re-generate.
---
v2: New.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -6297,7 +6297,8 @@ process_suffix (void)
   else if (i.tm.opcode_modifier.size == SIZE64)
     i.suffix = QWORD_MNEM_SUFFIX;
   else if (i.reg_operands
-	   && (i.operands > 1 || i.types[0].bitfield.class == Reg))
+	   && (i.operands > 1 || i.types[0].bitfield.class == Reg)
+	   && !i.tm.opcode_modifier.addrprefixopreg)
     {
       unsigned int numop = i.operands;
 
@@ -6613,28 +6614,13 @@ process_suffix (void)
       /* Now select between word & dword operations via the operand
 	 size prefix, except for instructions that will ignore this
 	 prefix anyway.  */
-      if (i.reg_operands > 0
-	  && i.types[0].bitfield.class == Reg
-	  && i.tm.opcode_modifier.addrprefixopreg
-	  && (i.tm.operand_types[0].bitfield.instance == Accum
-	      || i.operands == 1))
-	{
-	  /* The address size override prefix changes the size of the
-	     first operand.  */
-	  if ((flag_code == CODE_32BIT
-	       && i.op[0].regs->reg_type.bitfield.word)
-	      || (flag_code != CODE_32BIT
-		  && i.op[0].regs->reg_type.bitfield.dword))
-	    if (!add_prefix (ADDR_PREFIX_OPCODE))
-	      return 0;
-	}
-      else if (i.suffix != QWORD_MNEM_SUFFIX
-	       && !i.tm.opcode_modifier.ignoresize
-	       && !i.tm.opcode_modifier.floatmf
-	       && !is_any_vex_encoding (&i.tm)
-	       && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
-		   || (flag_code == CODE_64BIT
-		       && i.tm.opcode_modifier.jump == JUMP_BYTE)))
+      if (i.suffix != QWORD_MNEM_SUFFIX
+	  && !i.tm.opcode_modifier.ignoresize
+	  && !i.tm.opcode_modifier.floatmf
+	  && !is_any_vex_encoding (&i.tm)
+	  && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
+	      || (flag_code == CODE_64BIT
+		  && i.tm.opcode_modifier.jump == JUMP_BYTE)))
 	{
 	  unsigned int prefix = DATA_PREFIX_OPCODE;
 
@@ -6663,39 +6649,70 @@ process_suffix (void)
       break;
     }
 
-  if (i.reg_operands != 0
-      && i.operands > 1
-      && i.tm.opcode_modifier.addrprefixopreg
-      && i.tm.operand_types[0].bitfield.instance != Accum)
+  if (i.tm.opcode_modifier.addrprefixopreg)
     {
-      /* Check invalid register operand when the address size override
-	 prefix changes the size of register operands.  */
-      unsigned int op;
-      enum { need_word, need_dword, need_qword } need;
+      gas_assert (!i.suffix);
+      gas_assert (i.reg_operands);
+
+      if (i.tm.operand_types[0].bitfield.instance == Accum
+	  || i.operands == 1)
+	{
+	  /* The address size override prefix changes the size of the
+	     first operand.  */
+	  if (flag_code == CODE_64BIT
+	      && i.op[0].regs->reg_type.bitfield.word)
+	    {
+	      as_bad (_("16-bit addressing unavailable for `%s'"),
+		      i.tm.name);
+	      return 0;
+	    }
 
-      if (flag_code == CODE_32BIT)
-	need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+	  if ((flag_code == CODE_32BIT
+	       ? i.op[0].regs->reg_type.bitfield.word
+	       : i.op[0].regs->reg_type.bitfield.dword)
+	      && !add_prefix (ADDR_PREFIX_OPCODE))
+	    return 0;
+	}
       else
 	{
-	  if (i.prefix[ADDR_PREFIX])
+	  /* Check invalid register operand when the address size override
+	     prefix changes the size of register operands.  */
+	  unsigned int op;
+	  enum { need_word, need_dword, need_qword } need;
+
+	  if (flag_code == CODE_32BIT)
+	    need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
+	  else if (i.prefix[ADDR_PREFIX])
 	    need = need_dword;
 	  else
 	    need = flag_code == CODE_64BIT ? need_qword : need_word;
-	}
 
-      for (op = 0; op < i.operands; op++)
-	if (i.types[op].bitfield.class == Reg
-	    && ((need == need_word
-		 && !i.op[op].regs->reg_type.bitfield.word)
-		|| (need == need_dword
-		    && !i.op[op].regs->reg_type.bitfield.dword)
-		|| (need == need_qword
-		    && !i.op[op].regs->reg_type.bitfield.qword)))
-	  {
-	    as_bad (_("invalid register operand size for `%s'"),
-		    i.tm.name);
-	    return 0;
-	  }
+	  for (op = 0; op < i.operands; op++)
+	    {
+	      if (i.types[op].bitfield.class != Reg)
+		continue;
+
+	      switch (need)
+		{
+		case need_word:
+		  if (i.op[op].regs->reg_type.bitfield.word)
+		    continue;
+		  break;
+		case need_dword:
+		  if (i.op[op].regs->reg_type.bitfield.dword)
+		    continue;
+		  break;
+		case need_qword:
+		  if (i.op[op].regs->reg_type.bitfield.qword)
+		    continue;
+		  break;
+		}
+
+	      as_bad (_("invalid register operand size for `%s'"),
+		      i.tm.name);
+	      return 0;
+	    }
+	}
     }
 
   return 1;
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -72,6 +72,8 @@
 #define IsStringEsOp0 IsString=IS_STRING_ES_OP0
 #define IsStringEsOp1 IsString=IS_STRING_ES_OP1
 
+#define AddrPrefixOpReg AddrPrefixOpReg|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf
+
 #define VexW0 VexW=VEXW0
 #define VexW1 VexW=VEXW1
 #define VexWIG VexW=VEXWIG
@@ -1568,10 +1570,9 @@ monitor, 0, 0xf01c8, None, 3, CpuSSE3, N
 // monitor is very special. CX and DX are always 32 bits. The
 // address size override prefix can be used to overrride the AX size in
 // all modes.
-monitor, 3, 0xf01c8, None, 3, CpuSSE3|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoAVX, { Acc|Word|Dword, RegC|Dword, RegD|Dword }
-monitor, 3, 0xf01c8, None, 3, CpuSSE3|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64|NoAVX, { Acc|Dword|Qword, RegC|Dword, RegD|Dword }
+monitor, 3, 0xf01c8, None, 3, CpuSSE3, AddrPrefixOpReg|NoAVX, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
 // The 64-bit form exists only for compatibility with older gas.
-monitor, 3, 0xf01c8, None, 3, CpuSSE3|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64|NoAVX, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
+monitor, 3, 0xf01c8, None, 3, CpuSSE3|Cpu64, AddrPrefixOpReg|NoAVX, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
 movddup, 2, 0xf212, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexW=1|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movddup, 2, 0xf20f12, None, 2, CpuSSE3, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
 movshdup, 2, 0xf316, None, 1, CpuAVX, Modrm|Vex|VexOpcode=0|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -2824,21 +2825,17 @@ rdtscp, 0, 0xf01f9, None, 3, CpuRdtscp,
 // AMD Pacifica additions.
 clgi, 0, 0xf01dd, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 invlpga, 0, 0xf01df, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-invlpga, 2, 0xf01df, None, 3, CpuSVME|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword, RegC|Dword }
-invlpga, 2, 0xf01df, None, 3, CpuSVME|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword, RegC|Dword }
+invlpga, 2, 0xf01df, None, 3, CpuSVME, AddrPrefixOpReg, { Acc|Word|Dword|Qword, RegC|Dword }
 skinit, 0, 0xf01de, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 skinit, 1, 0xf01de, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Acc|Dword }
 stgi, 0, 0xf01dc, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 vmload, 0, 0xf01da, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-vmload, 1, 0xf01da, None, 3, CpuSVME|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword }
-vmload, 1, 0xf01da, None, 3, CpuSVME|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword }
+vmload, 1, 0xf01da, None, 3, CpuSVME, AddrPrefixOpReg, { Acc|Word|Dword|Qword }
 vmmcall, 0, 0xf01d9, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 vmrun, 0, 0xf01d8, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-vmrun, 1, 0xf01d8, None, 3, CpuSVME|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword }
-vmrun, 1, 0xf01d8, None, 3, CpuSVME|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword }
+vmrun, 1, 0xf01d8, None, 3, CpuSVME, AddrPrefixOpReg, { Acc|Word|Dword|Qword }
 vmsave, 0, 0xf01db, None, 3, CpuSVME, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-vmsave, 1, 0xf01db, None, 3, CpuSVME|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword }
-vmsave, 1, 0xf01db, None, 3, CpuSVME|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword }
+vmsave, 1, 0xf01db, None, 3, CpuSVME, AddrPrefixOpReg, { Acc|Word|Dword|Qword }
 
 
 // SSE4a instructions
@@ -4683,18 +4680,16 @@ vpclmulhqhqdq, 3, 0x6644, 0x11, 1, CpuVP
 // CLZERO instructions
 
 clzero, 0, 0xf01fc, None, 3, CpuCLZERO, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-clzero, 1, 0xf01fc, None, 3, CpuCLZERO|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword }
-clzero, 1, 0xf01fc, None, 3, CpuCLZERO|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword }
+clzero, 1, 0xf01fc, None, 3, CpuCLZERO, AddrPrefixOpReg, { Acc|Word|Dword|Qword }
 
 // CLZERO instructions end
 
 // MONITORX/MWAITX instructions
 
 monitorx, 0, 0xf01fa, None, 3, CpuMWAITX, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
-monitorx, 3, 0xf01fa, None, 3, CpuMWAITX|CpuNo64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Acc|Word|Dword, RegC|Dword, RegD|Dword }
-monitorx, 3, 0xf01fa, None, 3, CpuMWAITX|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword, RegC|Dword, RegD|Dword }
+monitorx, 3, 0xf01fa, None, 3, CpuMWAITX, AddrPrefixOpReg, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
 // The 64-bit form exists only for compatibility with older gas.
-monitorx, 3, 0xf01fa, None, 3, CpuMWAITX|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
+monitorx, 3, 0xf01fa, None, 3, CpuMWAITX|Cpu64, AddrPrefixOpReg, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
 
 mwaitx, 0, 0xf01fb, None, 3, CpuMWAITX, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { 0 }
 // The 64-bit form exists only for compatibility with older gas.
@@ -4758,8 +4753,7 @@ pconfig, 0, 0x0f01c5, None, 3, CpuPCONFI
 
 // WAITPKG instructions.
 
-umonitor, 1, 0xf30fae, 0x6, 2, CpuWAITPKG|CpuNo64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Reg16|Reg32 }
-umonitor, 1, 0xf30fae, 0x6, 2, CpuWAITPKG|Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg|NoRex64, { Reg32|Reg64 }
+umonitor, 1, 0xf30fae, 0x6, 2, CpuWAITPKG, Modrm|AddrPrefixOpReg, { Reg16|Reg32|Reg64 }
 
 tpause, 1, 0x660fae, 0x6, 2, CpuWAITPKG, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64, { Reg32|Reg64 }
 
@@ -4777,8 +4771,7 @@ cldemote, 1, 0x0f1c, 0x0, 2, CpuCLDEMOTE
 
 movdiri, 2, 0xf38f9, None, 3, CpuMOVDIRI, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
-movdir64b, 2, 0x660f38f8, None, 3, CpuMOVDIR64B|CpuNo64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32 }
-movdir64b, 2, 0x660f38f8, None, 3, CpuMOVDIR64B|Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg32|Reg64 }
+movdir64b, 2, 0x660f38f8, None, 3, CpuMOVDIR64B, Modrm|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // MOVEDIR instructions end.
 
@@ -4798,10 +4791,8 @@ vdpbf16ps, 3, 0xf352, None, 1, CpuAVX512
 
 // ENQCMD instructions.
 
-enqcmd, 2, 0xf20f38f8, None, 3, CpuENQCMD|CpuNo64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32 }
-enqcmd, 2, 0xf20f38f8, None, 3, CpuENQCMD|Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg32|Reg64 }
-enqcmds, 2, 0xf30f38f8, None, 3, CpuENQCMD|CpuNo64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32 }
-enqcmds, 2, 0xf30f38f8, None, 3, CpuENQCMD|Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg32|Reg64 }
+enqcmd, 2, 0xf20f38f8, None, 3, CpuENQCMD, Modrm|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+enqcmds, 2, 0xf30f38f8, None, 3, CpuENQCMD, Modrm|AddrPrefixOpReg, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // ENQCMD instructions end.
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2 1/2] x86/Intel: don't swap operands of MONITOR{,X} and MWAIT{,X}
  2020-02-14 12:45 [PATCH v2 0/2] x86: MONITOR and alike adjustments Jan Beulich
  2020-02-14 12:47 ` [PATCH v2 2/2] x86: fold AddrPrefixOpReg templates Jan Beulich
@ 2020-02-14 12:47 ` Jan Beulich
  2020-02-14 12:52 ` [PATCH v2 0/2] x86: MONITOR and alike adjustments H.J. Lu
  2 siblings, 0 replies; 4+ messages in thread
From: Jan Beulich @ 2020-02-14 12:47 UTC (permalink / raw)
  To: binutils; +Cc: H.J. Lu

Generally, the documentation doesn't allow for any explicit operands
to be specified with MONITOR/MWAIT. To permit the more legible
overriding of the address size via specifying operands, the option is
being retained even in Intel mode, but operand swapping is being
suppressed by this patch. This is both because it makes no sense here
(all of the operands are inputs) and because, as a result, old gcc
(prior to 4.8) actually expects it this way with -mintel-syntax (and
hence gets fixed by this change rather than, as claimed by a reply in
the bug report, broken).

gas/
2020-02-XX  Jan Beulich <jbeulich@suse.com>

	PR gas/14439
	* config/tc-i386.c (md_assemble): Also suppress operand
	swapping for MONITOR{,X} and MWAIT{,X}.
	* testsuite/gas/i386/sse3.s, testsuite/gas/i386/x86-64-sse3.s:
	Add Intel syntax monitor/mwait tests.
	* testsuite/gas/i386/sse3.d, testsuite/gas/i386/x86-64-sse3.d:
	Adjust expectations.
	*testsuite/gas/i386/sse3-intel.d,
	testsuite/gas/i386/x86-64-sse3-intel.d: New.
	* testsuite/gas/i386/i386.exp: Run new tests.
---
v2: Also cover MONITORX/MWAITX. Add testcases.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4323,14 +4323,16 @@ md_assemble (char *line)
   /* Now we've parsed the mnemonic into a set of templates, and have the
      operands at hand.  */
 
-  /* All intel opcodes have reversed operands except for "bound" and
-     "enter".  We also don't reverse intersegment "jmp" and "call"
-     instructions with 2 immediate operands so that the immediate segment
-     precedes the offset, as it does when in AT&T mode. */
+  /* All Intel opcodes have reversed operands except for "bound", "enter"
+     "monitor*", and "mwait*".  We also don't reverse intersegment "jmp"
+     and "call" instructions with 2 immediate operands so that the immediate
+     segment precedes the offset, as it does when in AT&T mode. */
   if (intel_syntax
       && i.operands > 1
       && (strcmp (mnemonic, "bound") != 0)
       && (strcmp (mnemonic, "invlpga") != 0)
+      && (strncmp (mnemonic, "monitor", 7) != 0)
+      && (strncmp (mnemonic, "mwait", 5) != 0)
       && !(operand_type_check (i.types[0], imm)
 	   && operand_type_check (i.types[1], imm)))
     swap_operands ();
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -77,6 +77,7 @@ if [expr ([istarget "i*86-*-*"] ||  [ist
     run_dump_test "sse2-16bit"
     run_dump_test "sub"
     run_dump_test "sse3"
+    run_dump_test "sse3-intel"
     run_dump_test "sib"
     run_dump_test "sib-intel"
     run_dump_test "disp"
@@ -699,6 +700,7 @@ if [expr ([istarget "i*86-*-*"] || [ista
     run_dump_test "x86-64-vmfunc"
     run_dump_test "immed64"
     run_dump_test "x86-64-sse3"
+    run_dump_test "x86-64-sse3-intel"
     run_dump_test "x86-64-crx"
     run_dump_test "x86-64-crx-suffix"
     run_dump_test "x86-64-drx"
--- /dev/null
+++ b/gas/testsuite/gas/i386/sse3-intel.d
@@ -0,0 +1,43 @@
+#objdump: -dwMintel
+#name: i386 SSE3 (Intel disassembly)
+#source: sse3.s
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+000 <foo>:
+[ 	]*[0-9a-f]+:	66 0f d0 01[ 	]+addsubpd xmm0,(XMMWORD PTR )?\[ecx\]
+[ 	]*[0-9a-f]+:	66 0f d0 ca[ 	]+addsubpd xmm1,xmm2
+[ 	]*[0-9a-f]+:	f2 0f d0 13[ 	]+addsubps xmm2,(XMMWORD PTR )?\[ebx\]
+[ 	]*[0-9a-f]+:	f2 0f d0 dc[ 	]+addsubps xmm3,xmm4
+[ 	]*[0-9a-f]+:	df 88 90 90 90 90[ 	]+fisttp WORD PTR \[eax-0x6f6f6f70\]
+[ 	]*[0-9a-f]+:	db 88 90 90 90 90[ 	]+fisttp DWORD PTR \[eax-0x6f6f6f70\]
+[ 	]*[0-9a-f]+:	dd 88 90 90 90 90[ 	]+fisttp QWORD PTR \[eax-0x6f6f6f70\]
+[ 	]*[0-9a-f]+:	66 0f 7c 65 00[ 	]+haddpd xmm4,(XMMWORD PTR )?\[ebp(\+0x0)\]
+[ 	]*[0-9a-f]+:	66 0f 7c ee[ 	]+haddpd xmm5,xmm6
+[ 	]*[0-9a-f]+:	f2 0f 7c 37[ 	]+haddps xmm6,(XMMWORD PTR )?\[edi\]
+[ 	]*[0-9a-f]+:	f2 0f 7c f8[ 	]+haddps xmm7,xmm0
+[ 	]*[0-9a-f]+:	66 0f 7d c1[ 	]+hsubpd xmm0,xmm1
+[ 	]*[0-9a-f]+:	66 0f 7d 0a[ 	]+hsubpd xmm1,(XMMWORD PTR )?\[edx\]
+[ 	]*[0-9a-f]+:	f2 0f 7d d2[ 	]+hsubps xmm2,xmm2
+[ 	]*[0-9a-f]+:	f2 0f 7d 1c 24[ 	]+hsubps xmm3,(XMMWORD PTR )?\[esp\]
+[ 	]*[0-9a-f]+:	f2 0f f0 2e[ 	]+lddqu  xmm5,(XMMWORD PTR )?\[esi\]
+[ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[0-9a-f]+:	f2 0f 12 f7[ 	]+movddup xmm6,xmm7
+[ 	]*[0-9a-f]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[eax\]
+[ 	]*[0-9a-f]+:	f3 0f 16 01[ 	]+movshdup xmm0,(XMMWORD PTR )?\[ecx\]
+[ 	]*[0-9a-f]+:	f3 0f 16 ca[ 	]+movshdup xmm1,xmm2
+[ 	]*[0-9a-f]+:	f3 0f 12 13[ 	]+movsldup xmm2,(XMMWORD PTR )?\[ebx\]
+[ 	]*[0-9a-f]+:	f3 0f 12 dc[ 	]+movsldup xmm3,xmm4
+[ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait *
+[ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait *
+[ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+addr16 monitor *
+[ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+addr16 monitor *
+[ 	]*[0-9a-f]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[eax\]
+[ 	]*[0-9a-f]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[eax\]
+[ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+addr16 monitor *
+[ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait *
+#pass
--- a/gas/testsuite/gas/i386/sse3.d
+++ b/gas/testsuite/gas/i386/sse3.d
@@ -36,4 +36,7 @@ Disassembly of section .text:
   70:	67 0f 01 c8 [ 	]*monitor %ax,%ecx,%edx
   74:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
   78:	f2 0f 12 38 [ 	]*movddup \(%eax\),%xmm7
+[ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor %eax,%ecx,%edx
+[ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+monitor %ax,%ecx,%edx
+[ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait  %eax,%ecx
 #pass
--- a/gas/testsuite/gas/i386/sse3.s
+++ b/gas/testsuite/gas/i386/sse3.s
@@ -35,3 +35,7 @@ foo:
 	.intel_syntax noprefix
 	movddup xmm7,[eax]
 	movddup xmm7,QWORD PTR [eax]
+
+	monitor		eax, ecx, edx
+	monitor		ax, ecx, edx
+	mwait		eax, ecx
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sse3-intel.d
@@ -0,0 +1,46 @@
+#objdump: -dwMintel
+#name: x86-64 SSE3 (Intel disassembly)
+#source: x86-64-sse3.s
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+000 <foo>:
+[ 	]*[a-f0-9]+:	66 0f d0 01[ 	]+addsubpd xmm0,(XMMWORD PTR )?\[rcx\]
+[ 	]*[a-f0-9]+:	66 0f d0 ca[ 	]+addsubpd xmm1,xmm2
+[ 	]*[a-f0-9]+:	f2 0f d0 13[ 	]+addsubps xmm2,(XMMWORD PTR )?\[rbx\]
+[ 	]*[a-f0-9]+:	f2 0f d0 dc[ 	]+addsubps xmm3,xmm4
+[ 	]*[a-f0-9]+:	df 88 90 90 90 00[ 	]+fisttp WORD PTR \[rax\+0x909090\]
+[ 	]*[a-f0-9]+:	db 88 90 90 90 00[ 	]+fisttp DWORD PTR \[rax\+0x909090\]
+[ 	]*[a-f0-9]+:	dd 88 90 90 90 00[ 	]+fisttp QWORD PTR \[rax\+0x909090\]
+[ 	]*[a-f0-9]+:	66 0f 7c 65 00[ 	]+haddpd xmm4,(XMMWORD PTR )?\[rbp(\+0x0)\]
+[ 	]*[a-f0-9]+:	66 0f 7c ee[ 	]+haddpd xmm5,xmm6
+[ 	]*[a-f0-9]+:	f2 0f 7c 37[ 	]+haddps xmm6,(XMMWORD PTR )?\[rdi\]
+[ 	]*[a-f0-9]+:	f2 0f 7c f8[ 	]+haddps xmm7,xmm0
+[ 	]*[a-f0-9]+:	66 0f 7d c1[ 	]+hsubpd xmm0,xmm1
+[ 	]*[a-f0-9]+:	66 0f 7d 0a[ 	]+hsubpd xmm1,(XMMWORD PTR )?\[rdx\]
+[ 	]*[a-f0-9]+:	f2 0f 7d d2[ 	]+hsubps xmm2,xmm2
+[ 	]*[a-f0-9]+:	f2 0f 7d 1c 24[ 	]+hsubps xmm3,(XMMWORD PTR )?\[rsp\]
+[ 	]*[a-f0-9]+:	f2 0f f0 2e[ 	]+lddqu  xmm5,(XMMWORD PTR )?\[rsi\]
+[ 	]*[a-f0-9]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[a-f0-9]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[a-f0-9]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[a-f0-9]+:	f2 0f 12 f7[ 	]+movddup xmm6,xmm7
+[ 	]*[a-f0-9]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[rax\]
+[ 	]*[a-f0-9]+:	f3 0f 16 01[ 	]+movshdup xmm0,(XMMWORD PTR )?\[rcx\]
+[ 	]*[a-f0-9]+:	f3 0f 16 ca[ 	]+movshdup xmm1,xmm2
+[ 	]*[a-f0-9]+:	f3 0f 12 13[ 	]+movsldup xmm2,(XMMWORD PTR )?\[rbx\]
+[ 	]*[a-f0-9]+:	f3 0f 12 dc[ 	]+movsldup xmm3,xmm4
+[ 	]*[a-f0-9]+:	0f 01 c9[ 	]+mwait *
+[ 	]*[a-f0-9]+:	0f 01 c9[ 	]+mwait *
+[ 	]*[a-f0-9]+:	0f 01 c9[ 	]+mwait *
+[ 	]*[a-f0-9]+:	67 0f 01 c8[ 	]+addr32 monitor *
+[ 	]*[a-f0-9]+:	67 0f 01 c8[ 	]+addr32 monitor *
+[ 	]*[a-f0-9]+:	67 0f 01 c8[ 	]+addr32 monitor *
+[ 	]*[a-f0-9]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[rax\]
+[ 	]*[a-f0-9]+:	f2 0f 12 38[ 	]+movddup xmm7,(QWORD PTR )?\[rax\]
+[ 	]*[a-f0-9]+:	0f 01 c8[ 	]+monitor *
+[ 	]*[a-f0-9]+:	67 0f 01 c8[ 	]+addr32 monitor *
+[ 	]*[a-f0-9]+:	0f 01 c9[ 	]+mwait *
+#pass
--- a/gas/testsuite/gas/i386/x86-64-sse3.d
+++ b/gas/testsuite/gas/i386/x86-64-sse3.d
@@ -39,4 +39,7 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	67 0f 01 c8 [ 	]*monitor %eax,%ecx,%edx
 [ 	]*[a-f0-9]+:	f2 0f 12 38 [ 	]*movddup \(%rax\),%xmm7
 [ 	]*[a-f0-9]+:	f2 0f 12 38 [ 	]*movddup \(%rax\),%xmm7
+[ 	]*[0-9a-f]+:	0f 01 c8[ 	]+monitor %rax,%ecx,%edx
+[ 	]*[0-9a-f]+:	67 0f 01 c8[ 	]+monitor %eax,%ecx,%edx
+[ 	]*[0-9a-f]+:	0f 01 c9[ 	]+mwait  %eax,%ecx
 #pass
--- a/gas/testsuite/gas/i386/x86-64-sse3.s
+++ b/gas/testsuite/gas/i386/x86-64-sse3.s
@@ -38,3 +38,7 @@ foo:
 	.intel_syntax noprefix
 	movddup xmm7,[rax]
 	movddup xmm7,QWORD PTR [rax]
+
+	monitor		rax, ecx, edx
+	monitor		eax, ecx, edx
+	mwait		eax, ecx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 0/2] x86: MONITOR and alike adjustments
  2020-02-14 12:45 [PATCH v2 0/2] x86: MONITOR and alike adjustments Jan Beulich
  2020-02-14 12:47 ` [PATCH v2 2/2] x86: fold AddrPrefixOpReg templates Jan Beulich
  2020-02-14 12:47 ` [PATCH v2 1/2] x86/Intel: don't swap operands of MONITOR{,X} and MWAIT{,X} Jan Beulich
@ 2020-02-14 12:52 ` H.J. Lu
  2 siblings, 0 replies; 4+ messages in thread
From: H.J. Lu @ 2020-02-14 12:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: binutils

On Fri, Feb 14, 2020 at 4:45 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> 1: Intel: don't swap operands of MONITOR{,X} and MWAIT{,X}
> 2: x86: fold AddrPrefixOpReg templates
>
> The first patch was sent many years ago, and rejected. Now that
> I've looked into this again, I noticed that the reason to reject
> it was actually wrong. Hence the (extended) re-submission.

Both are OK.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-14 12:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-14 12:45 [PATCH v2 0/2] x86: MONITOR and alike adjustments Jan Beulich
2020-02-14 12:47 ` [PATCH v2 2/2] x86: fold AddrPrefixOpReg templates Jan Beulich
2020-02-14 12:47 ` [PATCH v2 1/2] x86/Intel: don't swap operands of MONITOR{,X} and MWAIT{,X} Jan Beulich
2020-02-14 12:52 ` [PATCH v2 0/2] x86: MONITOR and alike adjustments H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).