* [PATCH] x86/APX: VROUND{P,S}{S,D} can generally be encoded
@ 2024-01-12 12:40 Jan Beulich
0 siblings, 0 replies; only message in thread
From: Jan Beulich @ 2024-01-12 12:40 UTC (permalink / raw)
To: Binutils; +Cc: Lili Cui, H.J. Lu
VRNDSCALE{P,S}{S,D} is the AVX512 generalization of these AVX insns. As
long as the immediate has the top 4 bits clear, they are equivalent to
the earlier VEX-encoded insns, and hence can be used to permit use of
eGPR-s in the memory operand. Since this is the normal way of using
these insns, also alter the resulting diagnostic to complain about the
immediate, not the eGPR use.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7068,7 +7068,8 @@ check_VecOperands (const insn_template *
}
/* Check the special Imm4 cases; must be the first operand. */
- if (is_cpu (t, CpuXOP) && t->operands == 5)
+ if ((is_cpu (t, CpuXOP) && t->operands == 5)
+ || (is_cpu (t, CpuAPX_F) && t->opcode_space == SPACE_0F3A))
{
if (i.op[0].imms->X_op != O_constant
|| !fits_in_imm4 (i.op[0].imms->X_add_number))
@@ -7078,7 +7079,8 @@ check_VecOperands (const insn_template *
}
/* Turn off Imm<N> so that update_imm won't complain. */
- operand_type_set (&i.types[0], 0);
+ if (t->operands == 5)
+ operand_type_set (&i.types[0], 0);
}
/* Check vector Disp8 operand. */
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.l
@@ -187,10 +187,10 @@
.*:195: Error: extended GPR cannot be used as base/index for `vrcpps'
.*:196: Error: extended GPR cannot be used as base/index for `vrcpps'
.*:197: Error: extended GPR cannot be used as base/index for `vrcpss'
-.*:198: Error: extended GPR cannot be used as base/index for `vroundpd'
-.*:199: Error: extended GPR cannot be used as base/index for `vroundps'
-.*:200: Error: extended GPR cannot be used as base/index for `vroundsd'
-.*:201: Error: extended GPR cannot be used as base/index for `vroundss'
+.*:198: Error: .* 4 bits for `vroundpd'
+.*:199: Error: .* 4 bits for `vroundps'
+.*:200: Error: .* 4 bits for `vroundsd'
+.*:201: Error: .* 4 bits for `vroundss'
.*:202: Error: extended GPR cannot be used as base/index for `vrsqrtps'
.*:203: Error: extended GPR cannot be used as base/index for `vrsqrtps'
.*:204: Error: extended GPR cannot be used as base/index for `vrsqrtss'
--- a/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
@@ -195,10 +195,10 @@
vrcpps (%r27),%xmm6
vrcpps (%r27),%ymm6
vrcpss (%r27),%xmm6,%xmm6
- vroundpd $1,(%r24),%xmm6
- vroundps $2,(%r24),%xmm6
- vroundsd $3,(%r24),%xmm6,%xmm3
- vroundss $4,(%r24),%xmm6,%xmm3
+ vroundpd $0x11,(%r24),%xmm6
+ vroundps $0x22,(%r24),%xmm6
+ vroundsd $0x33,(%r24),%xmm6,%xmm3
+ vroundss $0x44,(%r24),%xmm6,%xmm3
vrsqrtps (%r27),%xmm6
vrsqrtps (%r27),%ymm6
vrsqrtss (%r27),%xmm6,%xmm6
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-intel.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted-intel.d
@@ -158,6 +158,10 @@ Disassembly of section \.text:
[ ]*[a-f0-9]+:[ ]*62 da 7f 08 4b b4 87 23 01 00 00[ ]+tileloadd tmm6,\[r31\+rax\*4\+0x123\]
[ ]*[a-f0-9]+:[ ]*62 da 7d 08 4b b4 87 23 01 00 00[ ]+tileloaddt1 tmm6,\[r31\+rax\*4\+0x123\]
[ ]*[a-f0-9]+:[ ]*62 da 7e 08 4b b4 87 23 01 00 00[ ]+tilestored[ ]+\[r31\+rax\*4\+0x123\],tmm6
+[ ]*[a-f0-9]+:[ ]*62 db fd 08 09 30 01[ ]+vrndscalepd xmm6,XMMWORD PTR \[r24\],(0x)?1
+[ ]*[a-f0-9]+:[ ]*62 db 7d 08 08 30 02[ ]+vrndscaleps xmm6,XMMWORD PTR \[r24\],(0x)?2
+[ ]*[a-f0-9]+:[ ]*62 db cd 08 0b 18 03[ ]+vrndscalesd xmm3,xmm6,QWORD PTR \[r24\],(0x)?3
+[ ]*[a-f0-9]+:[ ]*62 db 4d 08 0a 18 04[ ]+vrndscaless xmm3,xmm6,DWORD PTR \[r24\],(0x)?4
[ ]*[a-f0-9]+:[ ]*62 4c 7c 08 66 8c 87 23 01 00 00[ ]+wrssd[ ]+\[r31\+rax\*4\+0x123\],r25d
[ ]*[a-f0-9]+:[ ]*62 4c fc 08 66 bc 87 23 01 00 00[ ]+wrssq[ ]+\[r31\+rax\*4\+0x123\],r31
[ ]*[a-f0-9]+:[ ]*62 4c 7d 08 65 8c 87 23 01 00 00[ ]+wrussd[ ]+\[r31\+rax\*4\+0x123\],r25d
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.d
@@ -158,6 +158,10 @@ Disassembly of section \.text:
[ ]*[a-f0-9]+:[ ]*62 da 7f 08 4b b4 87 23 01 00 00[ ]+tileloadd[ ]+0x123\(%r31,%rax,4\),%tmm6
[ ]*[a-f0-9]+:[ ]*62 da 7d 08 4b b4 87 23 01 00 00[ ]+tileloaddt1[ ]+0x123\(%r31,%rax,4\),%tmm6
[ ]*[a-f0-9]+:[ ]*62 da 7e 08 4b b4 87 23 01 00 00[ ]+tilestored[ ]+%tmm6,0x123\(%r31,%rax,4\)
+[ ]*[a-f0-9]+:[ ]*62 db fd 08 09 30 01[ ]+vrndscalepd \$0x1,\(%r24\),%xmm6
+[ ]*[a-f0-9]+:[ ]*62 db 7d 08 08 30 02[ ]+vrndscaleps \$0x2,\(%r24\),%xmm6
+[ ]*[a-f0-9]+:[ ]*62 db cd 08 0b 18 03[ ]+vrndscalesd \$0x3,\(%r24\),%xmm6,%xmm3
+[ ]*[a-f0-9]+:[ ]*62 db 4d 08 0a 18 04[ ]+vrndscaless \$0x4,\(%r24\),%xmm6,%xmm3
[ ]*[a-f0-9]+:[ ]*62 4c 7c 08 66 8c 87 23 01 00 00[ ]+wrssd[ ]+%r25d,0x123\(%r31,%rax,4\)
[ ]*[a-f0-9]+:[ ]*62 4c fc 08 66 bc 87 23 01 00 00[ ]+wrssq[ ]+%r31,0x123\(%r31,%rax,4\)
[ ]*[a-f0-9]+:[ ]*62 4c 7d 08 65 8c 87 23 01 00 00[ ]+wrussd[ ]+%r25d,0x123\(%r31,%rax,4\)
--- a/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-evex-promoted.s
@@ -152,6 +152,10 @@ _start:
tileloadd 0x123(%r31,%rax,4),%tmm6
tileloaddt1 0x123(%r31,%rax,4),%tmm6
tilestored %tmm6,0x123(%r31,%rax,4)
+ vroundpd $1,(%r24),%xmm6
+ vroundps $2,(%r24),%xmm6
+ vroundsd $3,(%r24),%xmm6,%xmm3
+ vroundss $4,(%r24),%xmm6,%xmm3
wrssd %r25d,0x123(%r31,%rax,4)
wrssq %r31,0x123(%r31,%rax,4)
wrussd %r25d,0x123(%r31,%rax,4)
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -1793,6 +1793,10 @@ vrcpps, 0x53, AVX, Modrm|Vex|Space0F|Vex
vrcpss, 0xf353, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vroundp<sd>, 0x6608 | <sd:opc>, AVX, Modrm|Vex|Space0F3A|VexWIG|CheckOperandSize|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
vrounds<sd>, 0x660a | <sd:opc>, AVX, Modrm|VexLIG|Space0F3A|VexVVVV|VexWIG|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
+// These are really clones of VRNDSCALE{P,S}{S,D}, with broadcast, masking, SAE,
+// 512-bit operand size, and register sources dropped.
+vroundp<sd>, 0x6608 | <sd:opc>, APX_F, Modrm|Space0F3A|<sd:vexw>|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM }
+vrounds<sd>, 0x660a | <sd:opc>, APX_F, Modrm|EVexLIG|Space0F3A|VexVVVV|<sd:vexw>|Disp8MemShift|NoSuf, { Imm8, <sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
vrsqrtps, 0x52, AVX, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
vrsqrtss, 0xf352, AVX, Modrm|Vex=3|Space0F|VexVVVV|VexWIG|NoSuf, { Dword|Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vshufp<sd>, 0x<sd:ppfx>c6, AVX, Modrm|Vex|Space0F|VexVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-01-12 12:40 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-12 12:40 [PATCH] x86/APX: VROUND{P,S}{S,D} can generally be encoded Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).