* [PATCH] x86: VCMPSH is Evex.LLIG
@ 2022-04-14 14:12 Jan Beulich
2022-04-14 15:22 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2022-04-14 14:12 UTC (permalink / raw)
To: Binutils
These were mistakenly flagged as Evex.128. Getting the LLIG status right
for insns allowing for SAE is a prereq for planned further work.
--- a/gas/testsuite/gas/i386/evex-lig.s
+++ b/gas/testsuite/gas/i386/evex-lig.s
@@ -1703,6 +1703,13 @@ _start:
vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512 Disp8
vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
+ vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
+ vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
+ vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
+ vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-FP16
+ vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
+ vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16 Disp8
+
.intel_syntax noprefix
vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512
@@ -3403,3 +3410,9 @@ _start:
vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 # AVX512 Disp8
vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 # AVX512
+ vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
+ vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
+ vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
+ vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 # AVX512-FP16
+ vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16 Disp8
+ vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16 Disp8
--- a/gas/testsuite/gas/i386/evex-lig256-intel.d
+++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
[ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
[ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
[ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
#pass
--- a/gas/testsuite/gas/i386/evex-lig256.d
+++ b/gas/testsuite/gas/i386/evex-lig256.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
[ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
[ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
#pass
--- a/gas/testsuite/gas/i386/evex-lig512-intel.d
+++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
[ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
[ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
[ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
#pass
--- a/gas/testsuite/gas/i386/evex-lig512.d
+++ b/gas/testsuite/gas/i386/evex-lig512.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
[ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
[ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
[ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
#pass
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM, RegMask }
-vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
-vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
+vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
+vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: VCMPSH is Evex.LLIG
2022-04-14 14:12 [PATCH] x86: VCMPSH is Evex.LLIG Jan Beulich
@ 2022-04-14 15:22 ` H.J. Lu
2022-04-14 16:24 ` Cui, Lili
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 15:22 UTC (permalink / raw)
To: Jan Beulich, Lili Cui; +Cc: Binutils
On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> These were mistakenly flagged as Evex.128. Getting the LLIG status right
> for insns allowing for SAE is a prereq for planned further work.
>
> --- a/gas/testsuite/gas/i386/evex-lig.s
> +++ b/gas/testsuite/gas/i386/evex-lig.s
> @@ -1703,6 +1703,13 @@ _start:
> vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512 Disp8
> vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
>
> + vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> + vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
> + vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
> + vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-FP16
> + vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
> + vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16 Disp8
> +
> .intel_syntax noprefix
> vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
> vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512
> @@ -3403,3 +3410,9 @@ _start:
> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 # AVX512 Disp8
> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 # AVX512
>
> + vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
> + vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
> + vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
> + vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 # AVX512-FP16
> + vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16 Disp8
> + vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16 Disp8
> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> #pass
> --- a/gas/testsuite/gas/i386/evex-lig256.d
> +++ b/gas/testsuite/gas/i386/evex-lig256.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
> #pass
> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> #pass
> --- a/gas/testsuite/gas/i386/evex-lig512.d
> +++ b/gas/testsuite/gas/i386/evex-lig512.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
> #pass
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
> vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM, RegMask }
>
> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
> -vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> -vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
> +vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> +vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
>
> vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
> vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
>
Lili, does it look OK?
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH] x86: VCMPSH is Evex.LLIG
2022-04-14 15:22 ` H.J. Lu
@ 2022-04-14 16:24 ` Cui, Lili
2022-04-14 16:34 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: Cui, Lili @ 2022-04-14 16:24 UTC (permalink / raw)
To: H.J. Lu, Beulich, Jan; +Cc: Binutils
> -----Original Message-----
> From: H.J. Lu <hjl.tools@gmail.com>
> Sent: Thursday, April 14, 2022 11:22 PM
> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> Cc: Binutils <binutils@sourceware.org>
> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
>
> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > These were mistakenly flagged as Evex.128. Getting the LLIG status
> > right for insns allowing for SAE is a prereq for planned further work.
> >
> > --- a/gas/testsuite/gas/i386/evex-lig.s
> > +++ b/gas/testsuite/gas/i386/evex-lig.s
> > @@ -1703,6 +1703,13 @@ _start:
> > vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512
> Disp8
> > vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
> >
> > + vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> > + vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
> > + vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
> > + vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-
> FP16
> > + vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
> > + vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16
> Disp8
> > +
> > .intel_syntax noprefix
> > vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
> > vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512 @@ -3403,3 +3410,9
> > @@ _start:
> > vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 #
> AVX512 Disp8
> > vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 #
> AVX512
> >
> > + vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
> > + vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
> > + vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
> > + vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 #
> AVX512-FP16
> > + vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16
> Disp8
> > + vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16
> Disp8
> > --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> > +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> > [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd
> xmm6\{k7\}\{z\},xmm5,xmm4
> > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > #pass
> > --- a/gas/testsuite/gas/i386/evex-lig256.d
> > +++ b/gas/testsuite/gas/i386/evex-lig256.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > #pass
> > --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> > +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> > [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd
> xmm6\{k7\}\{z\},xmm5,xmm4
> > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > #pass
> > --- a/gas/testsuite/gas/i386/evex-lig512.d
> > +++ b/gas/testsuite/gas/i386/evex-lig512.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > #pass
> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> > vcmpph, 0xc2, None, CpuAVX512_FP16,
> >
> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> heckRe
> > gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> RegXMM|RegYMM|RegZMM,
> > RegMask } vcmpph, 0xc2, None, CpuAVX512_FP16,
> >
> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> wSuf|No_l
> > Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> > RegMask }
> >
> > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> 1|No_bSu
> > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> uf|No_lSu
> > f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> RegMask
> > } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> 1|No_bSu
> > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> 0xf3c2,
> > None, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> uf|No_lSu
> > f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> RegMask
> > }
> > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> =1|No_bS
> > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> Suf|No_lS
> > +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> =1|No_bS
> > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> 0xf3c2,
> > +None, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> Suf|No_lS
> > +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > +RegMask }
> >
> > vcomish, 0x2f, None, CpuAVX512_FP16,
> >
> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> No_lSuf|N
> > o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> RegXMM }
> > vcomish, 0x2f, None, CpuAVX512_FP16,
> >
> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> No_qSuf|N
> > o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> >
>
> Lili, does it look OK?
Hi Jan,
I confirmed it with software developer manual, it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
Lili.
>
> Thanks.
>
> --
> H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: VCMPSH is Evex.LLIG
2022-04-14 16:24 ` Cui, Lili
@ 2022-04-14 16:34 ` H.J. Lu
2022-04-14 16:37 ` Jan Beulich
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 16:34 UTC (permalink / raw)
To: Cui, Lili; +Cc: Beulich, Jan, Binutils
On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: H.J. Lu <hjl.tools@gmail.com>
> > Sent: Thursday, April 14, 2022 11:22 PM
> > To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> > Cc: Binutils <binutils@sourceware.org>
> > Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
> >
> > On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >
> > > These were mistakenly flagged as Evex.128. Getting the LLIG status
> > > right for insns allowing for SAE is a prereq for planned further work.
> > >
> > > --- a/gas/testsuite/gas/i386/evex-lig.s
> > > +++ b/gas/testsuite/gas/i386/evex-lig.s
> > > @@ -1703,6 +1703,13 @@ _start:
> > > vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512
> > Disp8
> > > vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
> > >
> > > + vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> > > + vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
> > > + vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
> > > + vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-
> > FP16
> > > + vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
> > > + vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16
> > Disp8
> > > +
> > > .intel_syntax noprefix
> > > vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
> > > vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512 @@ -3403,3 +3410,9
> > > @@ _start:
> > > vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 #
> > AVX512 Disp8
> > > vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 #
> > AVX512
> > >
> > > + vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
> > > + vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
> > > + vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
> > > + vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 #
> > AVX512-FP16
> > > + vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16
> > Disp8
> > > + vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16
> > Disp8
> > > --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > > [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> > > [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd
> > xmm6\{k7\}\{z\},xmm5,xmm4
> > > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> > xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > > #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig256.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig256.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> > sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > > #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > > [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> > > [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd
> > xmm6\{k7\}\{z\},xmm5,xmm4
> > > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> > xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > > #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig512.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig512.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > > [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> > sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > > [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > > #pass
> > > --- a/opcodes/i386-opc.tbl
> > > +++ b/opcodes/i386-opc.tbl
> > > @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> > > vcmpph, 0xc2, None, CpuAVX512_FP16,
> > >
> > Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> > heckRe
> > > gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> > RegXMM|RegYMM|RegZMM,
> > > RegMask } vcmpph, 0xc2, None, CpuAVX512_FP16,
> > >
> > Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> > wSuf|No_l
> > > Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> > > RegMask }
> > >
> > > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> > 1|No_bSu
> > > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> > uf|No_lSu
> > > f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > RegMask
> > > } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> > 1|No_bSu
> > > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> > 0xf3c2,
> > > None, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> > uf|No_lSu
> > > f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > RegMask
> > > }
> > > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> > =1|No_bS
> > > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> > Suf|No_lS
> > > +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > > +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> > =1|No_bS
> > > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> > 0xf3c2,
> > > +None, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> > Suf|No_lS
> > > +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > > +RegMask }
> > >
> > > vcomish, 0x2f, None, CpuAVX512_FP16,
> > >
> > Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> > No_lSuf|N
> > > o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> > RegXMM }
> > > vcomish, 0x2f, None, CpuAVX512_FP16,
> > >
> > Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> > No_qSuf|N
> > > o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> > >
> >
> > Lili, does it look OK?
>
> Hi Jan,
> I confirmed it with software developer manual, it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
>
Thank you, Lili.
This is OK.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: VCMPSH is Evex.LLIG
2022-04-14 16:34 ` H.J. Lu
@ 2022-04-14 16:37 ` Jan Beulich
2022-04-14 16:38 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2022-04-14 16:37 UTC (permalink / raw)
To: H.J. Lu; +Cc: Binutils, Cui, Lili
On 14.04.2022 18:34, H.J. Lu wrote:
> On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
>>
>>
>>
>>> -----Original Message-----
>>> From: H.J. Lu <hjl.tools@gmail.com>
>>> Sent: Thursday, April 14, 2022 11:22 PM
>>> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
>>> Cc: Binutils <binutils@sourceware.org>
>>> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
>>>
>>> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> These were mistakenly flagged as Evex.128. Getting the LLIG status
>>>> right for insns allowing for SAE is a prereq for planned further work.
>>>>
>>>> --- a/gas/testsuite/gas/i386/evex-lig.s
>>>> +++ b/gas/testsuite/gas/i386/evex-lig.s
>>>> @@ -1703,6 +1703,13 @@ _start:
>>>> vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512
>>> Disp8
>>>> vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
>>>>
>>>> + vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
>>>> + vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
>>>> + vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
>>>> + vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-
>>> FP16
>>>> + vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
>>>> + vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16
>>> Disp8
>>>> +
>>>> .intel_syntax noprefix
>>>> vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
>>>> vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512 @@ -3403,3 +3410,9
>>>> @@ _start:
>>>> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 #
>>> AVX512 Disp8
>>>> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 #
>>> AVX512
>>>>
>>>> + vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
>>>> + vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
>>>> + vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
>>>> + vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 #
>>> AVX512-FP16
>>>> + vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16
>>> Disp8
>>>> + vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16
>>> Disp8
>>>> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
>>>> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd
>>> xmm6\{k7\}\{z\},xmm5,xmm4
>>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
>>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>> #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig256.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig256.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
>>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>> #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
>>>> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd
>>> xmm6\{k7\}\{z\},xmm5,xmm4
>>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
>>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>> #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig512.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig512.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
>>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>> #pass
>>>> --- a/opcodes/i386-opc.tbl
>>>> +++ b/opcodes/i386-opc.tbl
>>>> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
>>>> vcmpph, 0xc2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
>>> heckRe
>>>> gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
>>> RegXMM|RegYMM|RegZMM,
>>>> RegMask } vcmpph, 0xc2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
>>> wSuf|No_l
>>>> Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
>>>> RegMask }
>>>>
>>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
>>> 1|No_bSu
>>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
>>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
>>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
>>> uf|No_lSu
>>>> f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
>>> RegMask
>>>> } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
>>> 1|No_bSu
>>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
>>> 0xf3c2,
>>>> None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
>>> uf|No_lSu
>>>> f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
>>> RegMask
>>>> }
>>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
>>> =1|No_bS
>>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
>>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
>>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
>>> Suf|No_lS
>>>> +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
>>>> +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
>>> =1|No_bS
>>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
>>> 0xf3c2,
>>>> +None, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
>>> Suf|No_lS
>>>> +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
>>>> +RegMask }
>>>>
>>>> vcomish, 0x2f, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
>>> No_lSuf|N
>>>> o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
>>> RegXMM }
>>>> vcomish, 0x2f, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
>>> No_qSuf|N
>>>> o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
>>>>
>>>
>>> Lili, does it look OK?
>>
>> Hi Jan,
>> I confirmed it with software developer manual, it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
>>
>
> Thank you, Lili.
>
> This is OK.
Thanks, but I'm afraid I'm a little confused by the reply: Do you mean
"okay with the requested addition" (which I was intending to do) or
"okay as is" (leaving it to a subsequent patch to also fix the other
insn)?
Jan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] x86: VCMPSH is Evex.LLIG
2022-04-14 16:37 ` Jan Beulich
@ 2022-04-14 16:38 ` H.J. Lu
0 siblings, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 16:38 UTC (permalink / raw)
To: Jan Beulich; +Cc: Binutils, Cui, Lili
On Thu, Apr 14, 2022 at 9:37 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 14.04.2022 18:34, H.J. Lu wrote:
> > On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
> >>
> >>
> >>
> >>> -----Original Message-----
> >>> From: H.J. Lu <hjl.tools@gmail.com>
> >>> Sent: Thursday, April 14, 2022 11:22 PM
> >>> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> >>> Cc: Binutils <binutils@sourceware.org>
> >>> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
> >>>
> >>> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> These were mistakenly flagged as Evex.128. Getting the LLIG status
> >>>> right for insns allowing for SAE is a prereq for planned further work.
> >>>>
> >>>> --- a/gas/testsuite/gas/i386/evex-lig.s
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig.s
> >>>> @@ -1703,6 +1703,13 @@ _start:
> >>>> vrndscaless $123, -512(%edx), %xmm5, %xmm6{%k7} # AVX512
> >>> Disp8
> >>>> vrndscaless $123, -516(%edx), %xmm5, %xmm6{%k7} # AVX512
> >>>>
> >>>> + vcmpsh $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> >>>> + vcmpsh $123, {sae}, %xmm4, %xmm5, %k5{%k7} # AVX512-FP16
> >>>> + vcmpsh $123, (%ecx), %xmm5, %k5 # AVX512-FP16
> >>>> + vcmpsh $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7} # AVX512-
> >>> FP16
> >>>> + vcmpsh $123, 254(%ecx), %xmm5, %k5 # AVX512-FP16 Disp8
> >>>> + vcmpsh $123, -256(%edx), %xmm5, %k5{%k7} # AVX512-FP16
> >>> Disp8
> >>>> +
> >>>> .intel_syntax noprefix
> >>>> vaddsd xmm6{k7}, xmm5, xmm4 # AVX512
> >>>> vaddsd xmm6{k7}{z}, xmm5, xmm4 # AVX512 @@ -3403,3 +3410,9
> >>>> @@ _start:
> >>>> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-512], 123 #
> >>> AVX512 Disp8
> >>>> vrndscaless xmm6{k7}, xmm5, DWORD PTR [edx-516], 123 #
> >>> AVX512
> >>>>
> >>>> + vcmpsh k5, xmm5, xmm4, 123 # AVX512-FP16
> >>>> + vcmpsh k5{k7}, xmm5, xmm4, {sae}, 123 # AVX512-FP16
> >>>> + vcmpsh k5, xmm5, WORD PTR [ecx], 123 # AVX512-FP16
> >>>> + vcmpsh k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123 #
> >>> AVX512-FP16
> >>>> + vcmpsh k5, xmm5, WORD PTR [ecx+254], 123 # AVX512-FP16
> >>> Disp8
> >>>> + vcmpsh k5{k7}, xmm5, WORD PTR [edx-256], 123 # AVX512-FP16
> >>> Disp8
> >>>> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4 vaddsd
> >>> xmm6\{k7\}\{z\},xmm5,xmm4
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> >>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>> #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig256.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig256.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 2f 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 af 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> >>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 00 02 00 00 7b vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a 72 80 7b vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 2f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 ec 7b vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 29 7b vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 28 c2 69 7f 7b vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 2f c2 6a 80 7b vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>> #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4 vaddsd xmm6\{k7\},xmm5,xmm4
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4 vaddsd
> >>> xmm6\{k7\}\{z\},xmm5,xmm4
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd
> >>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>> #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig512.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig512.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 4f 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 cf 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >>>> [ ]*[a-f0-9]+: 62 f1 d7 1f 58 f4 vaddsd \{rn-
> >>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 00 02 00 00 7b vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a 72 80 7b vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> [ ]*[a-f0-9]+: 62 f3 55 4f 0a b2 fc fd ff ff 7b vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 ec 7b vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 1f c2 ec 7b vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 29 7b vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 ac f4 c0 1d fe ff 7b vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 48 c2 69 7f 7b vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[ ]*[a-f0-9]+: 62 f3 56 4f c2 6a 80 7b vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>> #pass
> >>>> --- a/opcodes/i386-opc.tbl
> >>>> +++ b/opcodes/i386-opc.tbl
> >>>> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> >>>> vcmpph, 0xc2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> >>> heckRe
> >>>> gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> >>> RegXMM|RegYMM|RegZMM,
> >>>> RegMask } vcmpph, 0xc2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> >>> wSuf|No_l
> >>>> Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> >>>> RegMask }
> >>>>
> >>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> >>> 1|No_bSu
> >>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> >>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> >>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> >>> uf|No_lSu
> >>>> f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> >>> RegMask
> >>>> } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> >>> 1|No_bSu
> >>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> >>> 0xf3c2,
> >>>> None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> >>> uf|No_lSu
> >>>> f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> >>> RegMask
> >>>> }
> >>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> >>> =1|No_bS
> >>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> >>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> >>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> >>> Suf|No_lS
> >>>> +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> >>>> +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> >>> =1|No_bS
> >>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> >>> 0xf3c2,
> >>>> +None, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> >>> Suf|No_lS
> >>>> +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> >>>> +RegMask }
> >>>>
> >>>> vcomish, 0x2f, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> >>> No_lSuf|N
> >>>> o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> >>> RegXMM }
> >>>> vcomish, 0x2f, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> >>> No_qSuf|N
> >>>> o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> >>>>
> >>>
> >>> Lili, does it look OK?
> >>
> >> Hi Jan,
> >> I confirmed it with software developer manual, it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
> >>
> >
> > Thank you, Lili.
> >
> > This is OK.
>
> Thanks, but I'm afraid I'm a little confused by the reply: Do you mean
> "okay with the requested addition" (which I was intending to do) or
Your patch is OK to install.
> "okay as is" (leaving it to a subsequent patch to also fix the other
> insn)?
>
> Jan
>
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-04-14 16:39 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-14 14:12 [PATCH] x86: VCMPSH is Evex.LLIG Jan Beulich
2022-04-14 15:22 ` H.J. Lu
2022-04-14 16:24 ` Cui, Lili
2022-04-14 16:34 ` H.J. Lu
2022-04-14 16:37 ` Jan Beulich
2022-04-14 16:38 ` H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).