public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86/APX: VROUND{P,S}{S,D} can generally be encoded
@ 2024-01-12 12:40 Jan Beulich
  0 siblings, 0 replies; only message in thread
From: Jan Beulich @ 2024-01-12 12:40 UTC (permalink / raw)
  To: Binutils; +Cc: Lili Cui, H.J. Lu

VRNDSCALE{P,S}{S,D} is the AVX512 generalization of these AVX insns. As
long as the immediate has the top 4 bits clear, they are equivalent to
the earlier VEX-encoded insns, and hence can be used to permit use of
eGPR-s in the memory operand. Since this is the normal way of using
these insns, also alter the resulting diagnostic to complain about the
immediate, not the eGPR use.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7068,7 +7068,8 @@ check_VecOperands (const insn_template *
     }
 
   /* Check the special Imm4 cases; must be the first operand.  */
-  if (is_cpu (t, CpuXOP) && t->operands == 5)
+  if ((is_cpu (t, CpuXOP) && t->operands == 5)
+      || (is_cpu (t, CpuAPX_F) && t->opcode_space == SPACE_0F3A))
     {
       if (i.op[0].imms->X_op != O_constant
 	  || !fits_in_imm4 (i.op[0].imms->X_add_number))
@@ -7078,7 +7079,8 @@ check_VecOperands (const insn_template *
 	}
 
       /* Turn off Imm<N> so that update_imm won't complain.  */
-      operand_type_set (&i.types[0], 0);
+      if (t->operands == 5)
+	operand_type_set (&i.types[0], 0);
     }
 
   /* Check vector Disp8 operand.  */
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
@@ -187,10 +187,10 @@
 .*:195: Error: extended GPR cannot be used as base/index for `vrcpps'
 .*:196: Error: extended GPR cannot be used as base/index for `vrcpps'
 .*:197: Error: extended GPR cannot be used as base/index for `vrcpss'
-.*:198: Error: extended GPR cannot be used as base/index for `vroundpd'
-.*:199: Error: extended GPR cannot be used as base/index for `vroundps'
-.*:200: Error: extended GPR cannot be used as base/index for `vroundsd'
-.*:201: Error: extended GPR cannot be used as base/index for `vroundss'
+.*:198: Error: .* 4 bits for `vroundpd'
+.*:199: Error: .* 4 bits for `vroundps'
+.*:200: Error: .* 4 bits for `vroundsd'
+.*:201: Error: .* 4 bits for `vroundss'
 .*:202: Error: extended GPR cannot be used as base/index for `vrsqrtps'
 .*:203: Error: extended GPR cannot be used as base/index for `vrsqrtps'
 .*:204: Error: extended GPR cannot be used as base/index for `vrsqrtss'
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
@@ -195,10 +195,10 @@
 	vrcpps (%r27),%xmm6
 	vrcpps (%r27),%ymm6
 	vrcpss (%r27),%xmm6,%xmm6
-	vroundpd $1,(%r24),%xmm6
-	vroundps $2,(%r24),%xmm6
-	vroundsd $3,(%r24),%xmm6,%xmm3
-	vroundss $4,(%r24),%xmm6,%xmm3
+	vroundpd $0x11,(%r24),%xmm6
+	vroundps $0x22,(%r24),%xmm6
+	vroundsd $0x33,(%r24),%xmm6,%xmm3
+	vroundss $0x44,(%r24),%xmm6,%xmm3
 	vrsqrtps (%r27),%xmm6
 	vrsqrtps (%r27),%ymm6
 	vrsqrtss (%r27),%xmm6,%xmm6
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-intel.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-intel.d
@@ -158,6 +158,10 @@ Disassembly of section \.text:
 [	 ]*[a-f0-9]+:[	 ]*62 da 7f 08 4b b4 87 23 01 00 00[	 ]+tileloadd tmm6,\[r31\+rax\*4\+0x123\]
 [	 ]*[a-f0-9]+:[	 ]*62 da 7d 08 4b b4 87 23 01 00 00[	 ]+tileloaddt1 tmm6,\[r31\+rax\*4\+0x123\]
 [	 ]*[a-f0-9]+:[	 ]*62 da 7e 08 4b b4 87 23 01 00 00[	 ]+tilestored[	 ]+\[r31\+rax\*4\+0x123\],tmm6
+[	 ]*[a-f0-9]+:[	 ]*62 db fd 08 09 30 01[	 ]+vrndscalepd xmm6,XMMWORD PTR \[r24\],(0x)?1
+[	 ]*[a-f0-9]+:[	 ]*62 db 7d 08 08 30 02[	 ]+vrndscaleps xmm6,XMMWORD PTR \[r24\],(0x)?2
+[	 ]*[a-f0-9]+:[	 ]*62 db cd 08 0b 18 03[	 ]+vrndscalesd xmm3,xmm6,QWORD PTR \[r24\],(0x)?3
+[	 ]*[a-f0-9]+:[	 ]*62 db 4d 08 0a 18 04[	 ]+vrndscaless xmm3,xmm6,DWORD PTR \[r24\],(0x)?4
 [	 ]*[a-f0-9]+:[	 ]*62 4c 7c 08 66 8c 87 23 01 00 00[	 ]+wrssd[	 ]+\[r31\+rax\*4\+0x123\],r25d
 [	 ]*[a-f0-9]+:[	 ]*62 4c fc 08 66 bc 87 23 01 00 00[	 ]+wrssq[	 ]+\[r31\+rax\*4\+0x123\],r31
 [	 ]*[a-f0-9]+:[	 ]*62 4c 7d 08 65 8c 87 23 01 00 00[	 ]+wrussd[	 ]+\[r31\+rax\*4\+0x123\],r25d
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.d
@@ -158,6 +158,10 @@ Disassembly of section \.text:
 [	 ]*[a-f0-9]+:[	 ]*62 da 7f 08 4b b4 87 23 01 00 00[	 ]+tileloadd[	 ]+0x123\(%r31,%rax,4\),%tmm6
 [	 ]*[a-f0-9]+:[	 ]*62 da 7d 08 4b b4 87 23 01 00 00[	 ]+tileloaddt1[	 ]+0x123\(%r31,%rax,4\),%tmm6
 [	 ]*[a-f0-9]+:[	 ]*62 da 7e 08 4b b4 87 23 01 00 00[	 ]+tilestored[	 ]+%tmm6,0x123\(%r31,%rax,4\)
+[	 ]*[a-f0-9]+:[	 ]*62 db fd 08 09 30 01[	 ]+vrndscalepd \$0x1,\(%r24\),%xmm6
+[	 ]*[a-f0-9]+:[	 ]*62 db 7d 08 08 30 02[	 ]+vrndscaleps \$0x2,\(%r24\),%xmm6
+[	 ]*[a-f0-9]+:[	 ]*62 db cd 08 0b 18 03[	 ]+vrndscalesd \$0x3,\(%r24\),%xmm6,%xmm3
+[	 ]*[a-f0-9]+:[	 ]*62 db 4d 08 0a 18 04[	 ]+vrndscaless \$0x4,\(%r24\),%xmm6,%xmm3
 [	 ]*[a-f0-9]+:[	 ]*62 4c 7c 08 66 8c 87 23 01 00 00[	 ]+wrssd[	 ]+%r25d,0x123\(%r31,%rax,4\)
 [	 ]*[a-f0-9]+:[	 ]*62 4c fc 08 66 bc 87 23 01 00 00[	 ]+wrssq[	 ]+%r31,0x123\(%r31,%rax,4\)
 [	 ]*[a-f0-9]+:[	 ]*62 4c 7d 08 65 8c 87 23 01 00 00[	 ]+wrussd[	 ]+%r25d,0x123\(%r31,%rax,4\)
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.s
@@ -152,6 +152,10 @@ _start:
 	tileloadd	0x123(%r31,%rax,4),%tmm6
 	tileloaddt1	0x123(%r31,%rax,4),%tmm6
 	tilestored	%tmm6,0x123(%r31,%rax,4)
+	vroundpd $1,(%r24),%xmm6
+	vroundps $2,(%r24),%xmm6
+	vroundsd $3,(%r24),%xmm6,%xmm3
+	vroundss $4,(%r24),%xmm6,%xmm3
 	wrssd	%r25d,0x123(%r31,%rax,4)
 	wrssq	%r31,0x123(%r31,%rax,4)
 	wrussd	%r25d,0x123(%r31,%rax,4)
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1793,6 +1793,10 @@ vrcpps, 0x53, AVX, Modrm|Vex|Space0F|Vex
 vrcpss, 0xf353, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vroundp<sd>, 0x6608 | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vrounds<sd>, 0x660a | <sd:opc>, AVX, Modrm|VexLIG|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+// These are really clones of VRNDSCALE{P,S}{S,D}, with broadcast, masking, SAE,
+// 512-bit operand size, and register sources dropped.
+vroundp<sd>, 0x6608 | <sd:opc>, APX_F, Modrm|Space0F3A|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vrounds<sd>, 0x660a | <sd:opc>, APX_F, Modrm|EVexLIG|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
 vrsqrtps, 0x52, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
 vrsqrtss, 0xf352, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
 vshufp<sd>, 0x<sd:ppfx>c6, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-01-12 12:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-12 12:40 [PATCH] x86/APX: VROUND{P,S}{S,D} can generally be encoded Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).