public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] x86: VCMPSH is Evex.LLIG
@ 2022-04-14 14:12 Jan Beulich
  2022-04-14 15:22 ` H.J. Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2022-04-14 14:12 UTC (permalink / raw)
  To: Binutils

These were mistakenly flagged as Evex.128. Getting the LLIG status right
for insns allowing for SAE is a prereq for planned further work.

--- a/gas/testsuite/gas/i386/evex-lig.s
+++ b/gas/testsuite/gas/i386/evex-lig.s
@@ -1703,6 +1703,13 @@ _start:
 	vrndscaless	$123, -512(%edx), %xmm5, %xmm6{%k7}	 # AVX512 Disp8
 	vrndscaless	$123, -516(%edx), %xmm5, %xmm6{%k7}	 # AVX512
 
+	vcmpsh	$123, %xmm4, %xmm5, %k5	# AVX512-FP16
+	vcmpsh	$123, {sae}, %xmm4, %xmm5, %k5{%k7}	# AVX512-FP16
+	vcmpsh	$123, (%ecx), %xmm5, %k5	# AVX512-FP16
+	vcmpsh	$123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}	# AVX512-FP16
+	vcmpsh	$123, 254(%ecx), %xmm5, %k5	# AVX512-FP16 Disp8
+	vcmpsh	$123, -256(%edx), %xmm5, %k5{%k7}	# AVX512-FP16 Disp8
+
 	.intel_syntax noprefix
 	vaddsd	xmm6{k7}, xmm5, xmm4	 # AVX512
 	vaddsd	xmm6{k7}{z}, xmm5, xmm4	 # AVX512
@@ -3403,3 +3410,9 @@ _start:
 	vrndscaless	xmm6{k7}, xmm5, DWORD PTR [edx-512], 123	 # AVX512 Disp8
 	vrndscaless	xmm6{k7}, xmm5, DWORD PTR [edx-516], 123	 # AVX512
 
+	vcmpsh	k5, xmm5, xmm4, 123	# AVX512-FP16
+	vcmpsh	k5{k7}, xmm5, xmm4, {sae}, 123	# AVX512-FP16
+	vcmpsh	k5, xmm5, WORD PTR [ecx], 123	# AVX512-FP16
+	vcmpsh	k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123	# AVX512-FP16
+	vcmpsh	k5, xmm5, WORD PTR [ecx+254], 123	# AVX512-FP16 Disp8
+	vcmpsh	k5{k7}, xmm5, WORD PTR [edx-256], 123	# AVX512-FP16 Disp8
--- a/gas/testsuite/gas/i386/evex-lig256-intel.d
+++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 00 02 00 00 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a 72 80 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 fc fd ff ff 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 ec 7b 	vcmpsh k5,xmm5,xmm4,0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 29 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 ac f4 c0 1d fe ff 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 69 7f 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 6a 80 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
 [ 	]*[a-f0-9]+:	62 f1 d7 2f 58 f4    	vaddsd xmm6\{k7\},xmm5,xmm4
 [ 	]*[a-f0-9]+:	62 f1 d7 af 58 f4    	vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
 [ 	]*[a-f0-9]+:	62 f1 d7 1f 58 f4    	vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 00 02 00 00 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a 72 80 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 fc fd ff ff 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 ec 7b 	vcmpsh k5,xmm5,xmm4,0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 29 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 ac f4 c0 1d fe ff 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 69 7f 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 6a 80 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
 #pass
--- a/gas/testsuite/gas/i386/evex-lig256.d
+++ b/gas/testsuite/gas/i386/evex-lig256.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 00 02 00 00 7b 	vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a 72 80 7b 	vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 fc fd ff ff 7b 	vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 ec 7b 	vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 29 7b 	vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 ac f4 c0 1d fe ff 7b 	vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 69 7f 7b 	vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 6a 80 7b 	vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
 [ 	]*[a-f0-9]+:	62 f1 d7 2f 58 f4    	vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f1 d7 af 58 f4    	vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f1 d7 1f 58 f4    	vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 00 02 00 00 7b 	vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a 72 80 7b 	vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 2f 0a b2 fc fd ff ff 7b 	vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 ec 7b 	vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 29 7b 	vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 ac f4 c0 1d fe ff 7b 	vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 28 c2 69 7f 7b 	vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 2f c2 6a 80 7b 	vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
 #pass
--- a/gas/testsuite/gas/i386/evex-lig512-intel.d
+++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 00 02 00 00 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a 72 80 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 fc fd ff ff 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 ec 7b 	vcmpsh k5,xmm5,xmm4,0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 29 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 ac f4 c0 1d fe ff 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 69 7f 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 6a 80 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
 [ 	]*[a-f0-9]+:	62 f1 d7 4f 58 f4    	vaddsd xmm6\{k7\},xmm5,xmm4
 [ 	]*[a-f0-9]+:	62 f1 d7 cf 58 f4    	vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
 [ 	]*[a-f0-9]+:	62 f1 d7 1f 58 f4    	vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 00 02 00 00 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a 72 80 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 fc fd ff ff 7b 	vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 ec 7b 	vcmpsh k5,xmm5,xmm4,0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 29 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 ac f4 c0 1d fe ff 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 69 7f 7b 	vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 6a 80 7b 	vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
 #pass
--- a/gas/testsuite/gas/i386/evex-lig512.d
+++ b/gas/testsuite/gas/i386/evex-lig512.d
@@ -1536,6 +1536,12 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 00 02 00 00 7b 	vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a 72 80 7b 	vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 fc fd ff ff 7b 	vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 ec 7b 	vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 29 7b 	vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 ac f4 c0 1d fe ff 7b 	vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 69 7f 7b 	vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 6a 80 7b 	vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
 [ 	]*[a-f0-9]+:	62 f1 d7 4f 58 f4    	vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f1 d7 cf 58 f4    	vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
 [ 	]*[a-f0-9]+:	62 f1 d7 1f 58 f4    	vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
@@ -3063,4 +3069,10 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 00 02 00 00 7b 	vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a 72 80 7b 	vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
 [ 	]*[a-f0-9]+:	62 f3 55 4f 0a b2 fc fd ff ff 7b 	vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 ec 7b 	vcmpsh \$0x7b,%xmm4,%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 1f c2 ec 7b 	vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 29 7b 	vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 ac f4 c0 1d fe ff 7b 	vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
+[ 	]*[a-f0-9]+:	62 f3 56 48 c2 69 7f 7b 	vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
+[ 	]*[a-f0-9]+:	62 f3 56 4f c2 6a 80 7b 	vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
 #pass
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
 vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
 vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM, RegMask }
 
-vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
-vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
-vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
+vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
+vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
+vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
 
 vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
 vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, RegXMM, RegXMM }


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: VCMPSH is Evex.LLIG
  2022-04-14 14:12 [PATCH] x86: VCMPSH is Evex.LLIG Jan Beulich
@ 2022-04-14 15:22 ` H.J. Lu
  2022-04-14 16:24   ` Cui, Lili
  0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 15:22 UTC (permalink / raw)
  To: Jan Beulich, Lili Cui; +Cc: Binutils

On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> These were mistakenly flagged as Evex.128. Getting the LLIG status right
> for insns allowing for SAE is a prereq for planned further work.
>
> --- a/gas/testsuite/gas/i386/evex-lig.s
> +++ b/gas/testsuite/gas/i386/evex-lig.s
> @@ -1703,6 +1703,13 @@ _start:
>         vrndscaless     $123, -512(%edx), %xmm5, %xmm6{%k7}      # AVX512 Disp8
>         vrndscaless     $123, -516(%edx), %xmm5, %xmm6{%k7}      # AVX512
>
> +       vcmpsh  $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> +       vcmpsh  $123, {sae}, %xmm4, %xmm5, %k5{%k7}     # AVX512-FP16
> +       vcmpsh  $123, (%ecx), %xmm5, %k5        # AVX512-FP16
> +       vcmpsh  $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}   # AVX512-FP16
> +       vcmpsh  $123, 254(%ecx), %xmm5, %k5     # AVX512-FP16 Disp8
> +       vcmpsh  $123, -256(%edx), %xmm5, %k5{%k7}       # AVX512-FP16 Disp8
> +
>         .intel_syntax noprefix
>         vaddsd  xmm6{k7}, xmm5, xmm4     # AVX512
>         vaddsd  xmm6{k7}{z}, xmm5, xmm4  # AVX512
> @@ -3403,3 +3410,9 @@ _start:
>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-512], 123         # AVX512 Disp8
>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-516], 123         # AVX512
>
> +       vcmpsh  k5, xmm5, xmm4, 123     # AVX512-FP16
> +       vcmpsh  k5{k7}, xmm5, xmm4, {sae}, 123  # AVX512-FP16
> +       vcmpsh  k5, xmm5, WORD PTR [ecx], 123   # AVX512-FP16
> +       vcmpsh  k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123  # AVX512-FP16
> +       vcmpsh  k5, xmm5, WORD PTR [ecx+254], 123       # AVX512-FP16 Disp8
> +       vcmpsh  k5{k7}, xmm5, WORD PTR [edx-256], 123   # AVX512-FP16 Disp8
> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>  #pass
> --- a/gas/testsuite/gas/i386/evex-lig256.d
> +++ b/gas/testsuite/gas/i386/evex-lig256.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
>  #pass
> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd xmm6\{k7\}\{z\},xmm5,xmm4
>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR \[ecx\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR \[ecx\+0xfe\],0x7b
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>  #pass
> --- a/gas/testsuite/gas/i386/evex-lig512.d
> +++ b/gas/testsuite/gas/i386/evex-lig512.d
> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh \$0x7b,%xmm4,%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh \$0x7b,\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-0x100\(%edx\),%xmm5,%k5\{%k7\}
>  #pass
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
>  vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegMask }
>  vcmpph, 0xc2, None, CpuAVX512_FP16, Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM, RegMask }
>
> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
> -vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> -vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM, RegMask }
> +vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> +vcmpsh, 0xf3c2, None, CpuAVX512_FP16, Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM, RegMask }
>
>  vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex, RegXMM }
>  vcomish, 0x2f, None, CpuAVX512_FP16, Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
>

Lili, does it look OK?

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH] x86: VCMPSH is Evex.LLIG
  2022-04-14 15:22 ` H.J. Lu
@ 2022-04-14 16:24   ` Cui, Lili
  2022-04-14 16:34     ` H.J. Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Cui, Lili @ 2022-04-14 16:24 UTC (permalink / raw)
  To: H.J. Lu, Beulich, Jan; +Cc: Binutils



> -----Original Message-----
> From: H.J. Lu <hjl.tools@gmail.com>
> Sent: Thursday, April 14, 2022 11:22 PM
> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> Cc: Binutils <binutils@sourceware.org>
> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
> 
> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> >
> > These were mistakenly flagged as Evex.128. Getting the LLIG status
> > right for insns allowing for SAE is a prereq for planned further work.
> >
> > --- a/gas/testsuite/gas/i386/evex-lig.s
> > +++ b/gas/testsuite/gas/i386/evex-lig.s
> > @@ -1703,6 +1703,13 @@ _start:
> >         vrndscaless     $123, -512(%edx), %xmm5, %xmm6{%k7}      # AVX512
> Disp8
> >         vrndscaless     $123, -516(%edx), %xmm5, %xmm6{%k7}      # AVX512
> >
> > +       vcmpsh  $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> > +       vcmpsh  $123, {sae}, %xmm4, %xmm5, %k5{%k7}     # AVX512-FP16
> > +       vcmpsh  $123, (%ecx), %xmm5, %k5        # AVX512-FP16
> > +       vcmpsh  $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}   # AVX512-
> FP16
> > +       vcmpsh  $123, 254(%ecx), %xmm5, %k5     # AVX512-FP16 Disp8
> > +       vcmpsh  $123, -256(%edx), %xmm5, %k5{%k7}       # AVX512-FP16
> Disp8
> > +
> >         .intel_syntax noprefix
> >         vaddsd  xmm6{k7}, xmm5, xmm4     # AVX512
> >         vaddsd  xmm6{k7}{z}, xmm5, xmm4  # AVX512 @@ -3403,3 +3410,9
> > @@ _start:
> >         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-512], 123         #
> AVX512 Disp8
> >         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-516], 123         #
> AVX512
> >
> > +       vcmpsh  k5, xmm5, xmm4, 123     # AVX512-FP16
> > +       vcmpsh  k5{k7}, xmm5, xmm4, {sae}, 123  # AVX512-FP16
> > +       vcmpsh  k5, xmm5, WORD PTR [ecx], 123   # AVX512-FP16
> > +       vcmpsh  k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123  #
> AVX512-FP16
> > +       vcmpsh  k5, xmm5, WORD PTR [ecx+254], 123       # AVX512-FP16
> Disp8
> > +       vcmpsh  k5{k7}, xmm5, WORD PTR [edx-256], 123   # AVX512-FP16
> Disp8
> > --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> > +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> >  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd
> xmm6\{k7\}\{z\},xmm5,xmm4
> >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >  #pass
> > --- a/gas/testsuite/gas/i386/evex-lig256.d
> > +++ b/gas/testsuite/gas/i386/evex-lig256.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >  #pass
> > --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> > +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> >  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd
> xmm6\{k7\}\{z\},xmm5,xmm4
> >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> \[ecx\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> \[ecx\+0xfe\],0x7b
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >  #pass
> > --- a/gas/testsuite/gas/i386/evex-lig512.d
> > +++ b/gas/testsuite/gas/i386/evex-lig512.d
> > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4
> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> \$0x7b,%xmm4,%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> \$0x7b,\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >  #pass
> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> > vcmpph, 0xc2, None, CpuAVX512_FP16,
> >
> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> heckRe
> > gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> RegXMM|RegYMM|RegZMM,
> > RegMask }  vcmpph, 0xc2, None, CpuAVX512_FP16,
> >
> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> wSuf|No_l
> > Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> > RegMask }
> >
> > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> 1|No_bSu
> > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> uf|No_lSu
> > f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> RegMask
> > } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> 1|No_bSu
> > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> 0xf3c2,
> > None, CpuAVX512_FP16,
> >
> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> uf|No_lSu
> > f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> RegMask
> > }
> > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> =1|No_bS
> > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> Suf|No_lS
> > +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> =1|No_bS
> > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> 0xf3c2,
> > +None, CpuAVX512_FP16,
> >
> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> Suf|No_lS
> > +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > +RegMask }
> >
> >  vcomish, 0x2f, None, CpuAVX512_FP16,
> >
> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> No_lSuf|N
> > o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> RegXMM }
> > vcomish, 0x2f, None, CpuAVX512_FP16,
> >
> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> No_qSuf|N
> > o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> >
> 
> Lili, does it look OK?

Hi Jan,
I confirmed it with software developer manual,  it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!

Lili.
> 
> Thanks.
> 
> --
> H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: VCMPSH is Evex.LLIG
  2022-04-14 16:24   ` Cui, Lili
@ 2022-04-14 16:34     ` H.J. Lu
  2022-04-14 16:37       ` Jan Beulich
  0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 16:34 UTC (permalink / raw)
  To: Cui, Lili; +Cc: Beulich, Jan, Binutils

On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: H.J. Lu <hjl.tools@gmail.com>
> > Sent: Thursday, April 14, 2022 11:22 PM
> > To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> > Cc: Binutils <binutils@sourceware.org>
> > Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
> >
> > On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> > >
> > > These were mistakenly flagged as Evex.128. Getting the LLIG status
> > > right for insns allowing for SAE is a prereq for planned further work.
> > >
> > > --- a/gas/testsuite/gas/i386/evex-lig.s
> > > +++ b/gas/testsuite/gas/i386/evex-lig.s
> > > @@ -1703,6 +1703,13 @@ _start:
> > >         vrndscaless     $123, -512(%edx), %xmm5, %xmm6{%k7}      # AVX512
> > Disp8
> > >         vrndscaless     $123, -516(%edx), %xmm5, %xmm6{%k7}      # AVX512
> > >
> > > +       vcmpsh  $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> > > +       vcmpsh  $123, {sae}, %xmm4, %xmm5, %k5{%k7}     # AVX512-FP16
> > > +       vcmpsh  $123, (%ecx), %xmm5, %k5        # AVX512-FP16
> > > +       vcmpsh  $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}   # AVX512-
> > FP16
> > > +       vcmpsh  $123, 254(%ecx), %xmm5, %k5     # AVX512-FP16 Disp8
> > > +       vcmpsh  $123, -256(%edx), %xmm5, %k5{%k7}       # AVX512-FP16
> > Disp8
> > > +
> > >         .intel_syntax noprefix
> > >         vaddsd  xmm6{k7}, xmm5, xmm4     # AVX512
> > >         vaddsd  xmm6{k7}{z}, xmm5, xmm4  # AVX512 @@ -3403,3 +3410,9
> > > @@ _start:
> > >         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-512], 123         #
> > AVX512 Disp8
> > >         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-516], 123         #
> > AVX512
> > >
> > > +       vcmpsh  k5, xmm5, xmm4, 123     # AVX512-FP16
> > > +       vcmpsh  k5{k7}, xmm5, xmm4, {sae}, 123  # AVX512-FP16
> > > +       vcmpsh  k5, xmm5, WORD PTR [ecx], 123   # AVX512-FP16
> > > +       vcmpsh  k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123  #
> > AVX512-FP16
> > > +       vcmpsh  k5, xmm5, WORD PTR [ecx+254], 123       # AVX512-FP16
> > Disp8
> > > +       vcmpsh  k5{k7}, xmm5, WORD PTR [edx-256], 123   # AVX512-FP16
> > Disp8
> > > --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> > >  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd
> > xmm6\{k7\}\{z\},xmm5,xmm4
> > >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> > xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > >  #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig256.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig256.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> > sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > >  #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> > >  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd
> > xmm6\{k7\}\{z\},xmm5,xmm4
> > >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> > xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> > xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> > \[ecx\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> > \[ecx\+0xfe\],0x7b
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> > k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> > >  #pass
> > > --- a/gas/testsuite/gas/i386/evex-lig512.d
> > > +++ b/gas/testsuite/gas/i386/evex-lig512.d
> > > @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4
> > vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> > >  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> > sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> > > @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> > \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> > 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> > >  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> > 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> > \$0x7b,%xmm4,%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> > \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> > \$0x7b,\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> > 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> > > +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> > \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> > > +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> > 0x100\(%edx\),%xmm5,%k5\{%k7\}
> > >  #pass
> > > --- a/opcodes/i386-opc.tbl
> > > +++ b/opcodes/i386-opc.tbl
> > > @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> > > vcmpph, 0xc2, None, CpuAVX512_FP16,
> > >
> > Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> > heckRe
> > > gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> > RegXMM|RegYMM|RegZMM,
> > > RegMask }  vcmpph, 0xc2, None, CpuAVX512_FP16,
> > >
> > Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> > wSuf|No_l
> > > Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> > > RegMask }
> > >
> > > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> > 1|No_bSu
> > > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > > -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> > uf|No_lSu
> > > f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > RegMask
> > > } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> > 1|No_bSu
> > > f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> > 0xf3c2,
> > > None, CpuAVX512_FP16,
> > >
> > Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> > uf|No_lSu
> > > f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > RegMask
> > > }
> > > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> > =1|No_bS
> > > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> > > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> > > +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> > Suf|No_lS
> > > +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> > > +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> > =1|No_bS
> > > +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> > > +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> > 0xf3c2,
> > > +None, CpuAVX512_FP16,
> > >
> > +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> > Suf|No_lS
> > > +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> > > +RegMask }
> > >
> > >  vcomish, 0x2f, None, CpuAVX512_FP16,
> > >
> > Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> > No_lSuf|N
> > > o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> > RegXMM }
> > > vcomish, 0x2f, None, CpuAVX512_FP16,
> > >
> > Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> > No_qSuf|N
> > > o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> > >
> >
> > Lili, does it look OK?
>
> Hi Jan,
> I confirmed it with software developer manual,  it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
>

Thank you, Lili.

This is OK.

-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: VCMPSH is Evex.LLIG
  2022-04-14 16:34     ` H.J. Lu
@ 2022-04-14 16:37       ` Jan Beulich
  2022-04-14 16:38         ` H.J. Lu
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2022-04-14 16:37 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Binutils, Cui, Lili

On 14.04.2022 18:34, H.J. Lu wrote:
> On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
>>
>>
>>
>>> -----Original Message-----
>>> From: H.J. Lu <hjl.tools@gmail.com>
>>> Sent: Thursday, April 14, 2022 11:22 PM
>>> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
>>> Cc: Binutils <binutils@sourceware.org>
>>> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
>>>
>>> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> These were mistakenly flagged as Evex.128. Getting the LLIG status
>>>> right for insns allowing for SAE is a prereq for planned further work.
>>>>
>>>> --- a/gas/testsuite/gas/i386/evex-lig.s
>>>> +++ b/gas/testsuite/gas/i386/evex-lig.s
>>>> @@ -1703,6 +1703,13 @@ _start:
>>>>         vrndscaless     $123, -512(%edx), %xmm5, %xmm6{%k7}      # AVX512
>>> Disp8
>>>>         vrndscaless     $123, -516(%edx), %xmm5, %xmm6{%k7}      # AVX512
>>>>
>>>> +       vcmpsh  $123, %xmm4, %xmm5, %k5 # AVX512-FP16
>>>> +       vcmpsh  $123, {sae}, %xmm4, %xmm5, %k5{%k7}     # AVX512-FP16
>>>> +       vcmpsh  $123, (%ecx), %xmm5, %k5        # AVX512-FP16
>>>> +       vcmpsh  $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}   # AVX512-
>>> FP16
>>>> +       vcmpsh  $123, 254(%ecx), %xmm5, %k5     # AVX512-FP16 Disp8
>>>> +       vcmpsh  $123, -256(%edx), %xmm5, %k5{%k7}       # AVX512-FP16
>>> Disp8
>>>> +
>>>>         .intel_syntax noprefix
>>>>         vaddsd  xmm6{k7}, xmm5, xmm4     # AVX512
>>>>         vaddsd  xmm6{k7}{z}, xmm5, xmm4  # AVX512 @@ -3403,3 +3410,9
>>>> @@ _start:
>>>>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-512], 123         #
>>> AVX512 Disp8
>>>>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-516], 123         #
>>> AVX512
>>>>
>>>> +       vcmpsh  k5, xmm5, xmm4, 123     # AVX512-FP16
>>>> +       vcmpsh  k5{k7}, xmm5, xmm4, {sae}, 123  # AVX512-FP16
>>>> +       vcmpsh  k5, xmm5, WORD PTR [ecx], 123   # AVX512-FP16
>>>> +       vcmpsh  k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123  #
>>> AVX512-FP16
>>>> +       vcmpsh  k5, xmm5, WORD PTR [ecx+254], 123       # AVX512-FP16
>>> Disp8
>>>> +       vcmpsh  k5{k7}, xmm5, WORD PTR [edx-256], 123   # AVX512-FP16
>>> Disp8
>>>> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd
>>> xmm6\{k7\}\{z\},xmm5,xmm4
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
>>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>>  #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig256.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig256.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
>>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>>  #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd
>>> xmm6\{k7\}\{z\},xmm5,xmm4
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
>>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
>>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
>>> \[ecx\+0xfe\],0x7b
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
>>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
>>>>  #pass
>>>> --- a/gas/testsuite/gas/i386/evex-lig512.d
>>>> +++ b/gas/testsuite/gas/i386/evex-lig512.d
>>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4
>>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
>>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
>>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
>>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
>>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
>>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
>>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
>>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
>>> \$0x7b,%xmm4,%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
>>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
>>> \$0x7b,\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
>>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
>>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
>>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
>>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
>>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
>>>>  #pass
>>>> --- a/opcodes/i386-opc.tbl
>>>> +++ b/opcodes/i386-opc.tbl
>>>> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
>>>> vcmpph, 0xc2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
>>> heckRe
>>>> gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
>>> RegXMM|RegYMM|RegZMM,
>>>> RegMask }  vcmpph, 0xc2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
>>> wSuf|No_l
>>>> Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
>>>> RegMask }
>>>>
>>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
>>> 1|No_bSu
>>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
>>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
>>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
>>> uf|No_lSu
>>>> f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
>>> RegMask
>>>> } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
>>> 1|No_bSu
>>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
>>> 0xf3c2,
>>>> None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
>>> uf|No_lSu
>>>> f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
>>> RegMask
>>>> }
>>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
>>> =1|No_bS
>>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
>>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
>>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
>>> Suf|No_lS
>>>> +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
>>>> +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
>>> =1|No_bS
>>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
>>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
>>> 0xf3c2,
>>>> +None, CpuAVX512_FP16,
>>>>
>>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
>>> Suf|No_lS
>>>> +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
>>>> +RegMask }
>>>>
>>>>  vcomish, 0x2f, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
>>> No_lSuf|N
>>>> o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
>>> RegXMM }
>>>> vcomish, 0x2f, None, CpuAVX512_FP16,
>>>>
>>> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
>>> No_qSuf|N
>>>> o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
>>>>
>>>
>>> Lili, does it look OK?
>>
>> Hi Jan,
>> I confirmed it with software developer manual,  it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
>>
> 
> Thank you, Lili.
> 
> This is OK.

Thanks, but I'm afraid I'm a little confused by the reply: Do you mean
"okay with the requested addition" (which I was intending to do) or
"okay as is" (leaving it to a subsequent patch to also fix the other
insn)?

Jan


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] x86: VCMPSH is Evex.LLIG
  2022-04-14 16:37       ` Jan Beulich
@ 2022-04-14 16:38         ` H.J. Lu
  0 siblings, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2022-04-14 16:38 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Binutils, Cui, Lili

On Thu, Apr 14, 2022 at 9:37 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 14.04.2022 18:34, H.J. Lu wrote:
> > On Thu, Apr 14, 2022 at 9:24 AM Cui, Lili <lili.cui@intel.com> wrote:
> >>
> >>
> >>
> >>> -----Original Message-----
> >>> From: H.J. Lu <hjl.tools@gmail.com>
> >>> Sent: Thursday, April 14, 2022 11:22 PM
> >>> To: Beulich, Jan <JBeulich@suse.com>; Cui, Lili <lili.cui@intel.com>
> >>> Cc: Binutils <binutils@sourceware.org>
> >>> Subject: Re: [PATCH] x86: VCMPSH is Evex.LLIG
> >>>
> >>> On Thu, Apr 14, 2022 at 7:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> These were mistakenly flagged as Evex.128. Getting the LLIG status
> >>>> right for insns allowing for SAE is a prereq for planned further work.
> >>>>
> >>>> --- a/gas/testsuite/gas/i386/evex-lig.s
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig.s
> >>>> @@ -1703,6 +1703,13 @@ _start:
> >>>>         vrndscaless     $123, -512(%edx), %xmm5, %xmm6{%k7}      # AVX512
> >>> Disp8
> >>>>         vrndscaless     $123, -516(%edx), %xmm5, %xmm6{%k7}      # AVX512
> >>>>
> >>>> +       vcmpsh  $123, %xmm4, %xmm5, %k5 # AVX512-FP16
> >>>> +       vcmpsh  $123, {sae}, %xmm4, %xmm5, %k5{%k7}     # AVX512-FP16
> >>>> +       vcmpsh  $123, (%ecx), %xmm5, %k5        # AVX512-FP16
> >>>> +       vcmpsh  $123, -123456(%esp, %esi, 8), %xmm5, %k5{%k7}   # AVX512-
> >>> FP16
> >>>> +       vcmpsh  $123, 254(%ecx), %xmm5, %k5     # AVX512-FP16 Disp8
> >>>> +       vcmpsh  $123, -256(%edx), %xmm5, %k5{%k7}       # AVX512-FP16
> >>> Disp8
> >>>> +
> >>>>         .intel_syntax noprefix
> >>>>         vaddsd  xmm6{k7}, xmm5, xmm4     # AVX512
> >>>>         vaddsd  xmm6{k7}{z}, xmm5, xmm4  # AVX512 @@ -3403,3 +3410,9
> >>>> @@ _start:
> >>>>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-512], 123         #
> >>> AVX512 Disp8
> >>>>         vrndscaless     xmm6{k7}, xmm5, DWORD PTR [edx-516], 123         #
> >>> AVX512
> >>>>
> >>>> +       vcmpsh  k5, xmm5, xmm4, 123     # AVX512-FP16
> >>>> +       vcmpsh  k5{k7}, xmm5, xmm4, {sae}, 123  # AVX512-FP16
> >>>> +       vcmpsh  k5, xmm5, WORD PTR [ecx], 123   # AVX512-FP16
> >>>> +       vcmpsh  k5{k7}, xmm5, WORD PTR [esp+esi*8-123456], 123  #
> >>> AVX512-FP16
> >>>> +       vcmpsh  k5, xmm5, WORD PTR [ecx+254], 123       # AVX512-FP16
> >>> Disp8
> >>>> +       vcmpsh  k5{k7}, xmm5, WORD PTR [edx-256], 123   # AVX512-FP16
> >>> Disp8
> >>>> --- a/gas/testsuite/gas/i386/evex-lig256-intel.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig256-intel.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4       vaddsd
> >>> xmm6\{k7\}\{z\},xmm5,xmm4
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> >>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>>  #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig256.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig256.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 2f 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 af 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> >>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 00 02 00 00 7b        vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a 72 80 7b         vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 2f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 ec 7b    vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 29 7b    vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 28 c2 69 7f 7b         vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 2f c2 6a 80 7b         vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>>  #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig512-intel.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig512-intel.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4       vaddsd xmm6\{k7\},xmm5,xmm4
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4       vaddsd
> >>> xmm6\{k7\}\{z\},xmm5,xmm4
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd
> >>> xmm6\{k7\},xmm5,xmm4,\{rn-sae\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx\+0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x200\],0x7b
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless
> >>> xmm6\{k7\},xmm5,DWORD PTR \[edx-0x204\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh k5,xmm5,xmm4,0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> k5\{k7\},xmm5,xmm4,\{sae\},0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[esp\+esi\*8-0x1e240\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh k5,xmm5,WORD PTR
> >>> \[ecx\+0xfe\],0x7b
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh
> >>> k5\{k7\},xmm5,WORD PTR \[edx-0x100\],0x7b
> >>>>  #pass
> >>>> --- a/gas/testsuite/gas/i386/evex-lig512.d
> >>>> +++ b/gas/testsuite/gas/i386/evex-lig512.d
> >>>> @@ -1536,6 +1536,12 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 4f 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 cf 58 f4
> >>> vaddsd %xmm4,%xmm5,%xmm6\{%k7\}\{z\}
> >>>>  [      ]*[a-f0-9]+:    62 f1 d7 1f 58 f4       vaddsd \{rn-
> >>> sae\},%xmm4,%xmm5,%xmm6\{%k7\}
> >>>> @@ -3063,4 +3069,10 @@ Disassembly of section .text:
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 00 02 00 00 7b        vrndscaless
> >>> \$0x7b,0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a 72 80 7b         vrndscaless \$0x7b,-
> >>> 0x200\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>>  [      ]*[a-f0-9]+:    62 f3 55 4f 0a b2 fc fd ff ff 7b        vrndscaless \$0x7b,-
> >>> 0x204\(%edx\),%xmm5,%xmm6\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 ec 7b    vcmpsh
> >>> \$0x7b,%xmm4,%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 1f c2 ec 7b    vcmpsh
> >>> \$0x7b,\{sae\},%xmm4,%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 29 7b    vcmpsh
> >>> \$0x7b,\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 ac f4 c0 1d fe ff 7b     vcmpsh \$0x7b,-
> >>> 0x1e240\(%esp,%esi,8\),%xmm5,%k5\{%k7\}
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 48 c2 69 7f 7b         vcmpsh
> >>> \$0x7b,0xfe\(%ecx\),%xmm5,%k5
> >>>> +[      ]*[a-f0-9]+:    62 f3 56 4f c2 6a 80 7b         vcmpsh \$0x7b,-
> >>> 0x100\(%edx\),%xmm5,%k5\{%k7\}
> >>>>  #pass
> >>>> --- a/opcodes/i386-opc.tbl
> >>>> +++ b/opcodes/i386-opc.tbl
> >>>> @@ -3739,10 +3739,10 @@ vcmp<avx_frel>ph, 0xc2, 0x<avx_frel:imm>
> >>>> vcmpph, 0xc2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|Masking=2|Space0F3A|VexVVVV|VexW0|Broadcast|Disp8ShiftVL|C
> >>> heckRe
> >>>> gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> RegXMM|RegYMM|RegZMM|Word|Unspecified|BaseIndex,
> >>> RegXMM|RegYMM|RegZMM,
> >>>> RegMask }  vcmpph, 0xc2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex512|Masking=2|Space0F3A|VexVVVV=1|VexW0|No_bSuf|No_
> >>> wSuf|No_l
> >>>> Suf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegZMM, RegZMM,
> >>>> RegMask }
> >>>>
> >>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> >>> 1|No_bSu
> >>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> >>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> >>>> -vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> >>> uf|No_lSu
> >>>> f|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> >>> RegMask
> >>>> } -vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift=
> >>> 1|No_bSu
> >>>> f|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } -vcmpsh,
> >>> 0xf3c2,
> >>>> None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVex128|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_wS
> >>> uf|No_lSu
> >>>> f|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> >>> RegMask
> >>>> }
> >>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> >>> =1|No_bS
> >>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, {
> >>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask }
> >>>> +vcmp<avx_frel>sh, 0xf3c2, 0x<avx_frel:imm>, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> >>> Suf|No_lS
> >>>> +uf|No_sSuf|No_qSuf|No_ldSuf|ImmExt|SAE, { Imm8, RegXMM, RegXMM,
> >>>> +RegMask } vcmpsh, 0xf3c2, None, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|Disp8MemShift
> >>> =1|No_bS
> >>>> +uf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8,
> >>>> +RegXMM|Word|Unspecified|BaseIndex, RegXMM, RegMask } vcmpsh,
> >>> 0xf3c2,
> >>>> +None, CpuAVX512_FP16,
> >>>>
> >>> +Modrm|EVexLIG|Masking=2|Space0F3A|VexVVVV|VexW0|No_bSuf|No_w
> >>> Suf|No_lS
> >>>> +uf|No_sSuf|No_qSuf|No_ldSuf|SAE, { Imm8, Imm8, RegXMM, RegXMM,
> >>>> +RegMask }
> >>>>
> >>>>  vcomish, 0x2f, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVexLIG|EVexMap5|VexW0|Disp8MemShift=1|No_bSuf|No_wSuf|
> >>> No_lSuf|N
> >>>> o_sSuf|No_qSuf|No_ldSuf, { RegXMM|Word|Unspecified|BaseIndex,
> >>> RegXMM }
> >>>> vcomish, 0x2f, None, CpuAVX512_FP16,
> >>>>
> >>> Modrm|EVexLIG|EVexMap5|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|
> >>> No_qSuf|N
> >>>> o_ldSuf|SAE, { Imm8, RegXMM, RegXMM }
> >>>>
> >>>
> >>> Lili, does it look OK?
> >>
> >> Hi Jan,
> >> I confirmed it with software developer manual,  it really should be LLIG. And by the way I checked all AVX512_FP16 instructions, vfpclasssh has the same issue, could you help fix them together, thanks!
> >>
> >
> > Thank you, Lili.
> >
> > This is OK.
>
> Thanks, but I'm afraid I'm a little confused by the reply: Do you mean
> "okay with the requested addition" (which I was intending to do) or

Your patch is OK to install.

> "okay as is" (leaving it to a subsequent patch to also fix the other
> insn)?
>
> Jan
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-04-14 16:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-14 14:12 [PATCH] x86: VCMPSH is Evex.LLIG Jan Beulich
2022-04-14 15:22 ` H.J. Lu
2022-04-14 16:24   ` Cui, Lili
2022-04-14 16:34     ` H.J. Lu
2022-04-14 16:37       ` Jan Beulich
2022-04-14 16:38         ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).