public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86/APX: optimize MOVBE
@ 2024-01-12 12:00 Jan Beulich
  2024-01-12 12:28 ` Jan Beulich
  0 siblings, 1 reply; 3+ messages in thread
From: Jan Beulich @ 2024-01-12 12:00 UTC (permalink / raw)
  To: Binutils; +Cc: H.J. Lu, Hu, Lin1

With identical source and destination it can be covered by the NDD-to-
legacy conversion logic as well, even if in this case the original insn
doesn't use an NDD encoding. The size savings are even better here, for
the replacement (BSWAP) not having a ModR/M byte.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7872,7 +7872,8 @@ match_template (char mnem_suffix)
 	  && t + 1 < current_templates.end
 	  && !t[1].opcode_modifier.evex
 	  && t[1].opcode_space <= SPACE_0F38
-	  && t->opcode_modifier.vexvvvv == VexVVVV_DST
+	  && (t->opcode_modifier.vexvvvv == VexVVVV_DST
+	      || t->mnem_off == MN_movbe)
 	  && (i.types[i.operands - 1].bitfield.dword
 	      || i.types[i.operands - 1].bitfield.qword))
 	{
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -118,6 +118,9 @@ Disassembly of section .text:
 \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 	cmovge -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 	cmovle -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 	cmovg  -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0    	movbe  %ax,%ax
+\s*[a-f0-9]+:\s*49 0f c8             	bswap  %r8
+\s*[a-f0-9]+:\s*d5 98 c8             	bswap  %r16
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*62 f4 fd 18 66 c3    	adcx   %rbx,%rax,%rax
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -111,6 +111,9 @@ cmovl  0x90909090(%eax),%edx,%edx
 cmovge 0x90909090(%eax),%edx,%edx
 cmovle 0x90909090(%eax),%edx,%edx
 cmovg  0x90909090(%eax),%edx,%edx
+movbe  %ax,%ax
+movbe  %r8,%r8
+movbe  %r16,%r16
 adcx   %ebx,%eax,%eax
 adcx   %eax,%ebx,%eax
 adcx   %rbx,%rax,%rax
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSi
 // Move after swapping the bytes
 movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// This really is BSWAP, but wants "duplicating" here for easy EVEX -> REX2
+// conversion, restarting with the next sequential template.
+movbe, 0xfc8, Movbe&APX_F, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
 
 // Move with sign extend.
 movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-01-16  2:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-12 12:00 [PATCH] x86/APX: optimize MOVBE Jan Beulich
2024-01-12 12:28 ` Jan Beulich
2024-01-16  2:01   ` Hu, Lin1

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).