Re: [PATCH v5 3/8] x86: re-work insn/suffix recognition

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

From: Jan Beulich <jbeulich@suse.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: Binutils <binutils@sourceware.org>
Subject: Re: [PATCH v5 3/8] x86: re-work insn/suffix recognition
Date: Mon, 31 Oct 2022 12:40:11 +0100	[thread overview]
Message-ID: <b90e9111-e6a8-4d57-ba7f-ffe2f9d5663e@suse.com> (raw)
In-Reply-To: <CAMe9rOoyaE7ATG+nbMOCHuChaUeA8dXGkkoqd6LdiF7q=5BhtQ@mail.gmail.com>

On 28.10.2022 18:12, H.J. Lu wrote:
> On Fri, Oct 28, 2022 at 2:00 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 27.10.2022 19:21, H.J. Lu wrote:
>>> On Tue, Oct 25, 2022 at 12:26 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>> @@ -989,13 +976,13 @@ emms, 0xf77, None, CpuMMX, No_bSuf|No_wS
>>>>  // copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's
>>>>  // spec). AMD's spec, having been in existence for much longer, failed to
>>>>  // recognize that and specified movd for 32- and 64-bit operations.
>>>> -movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
>>>> +movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Reg32|Unspecified|BaseIndex, RegXMM }
>>>>  movd, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
>>>>  movd, 0x660f6e, None, CpuSSE2, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
>>>>  movd, 0x660f6e, None, CpuSSE2|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegXMM }
>>>>  movd, 0xf6e, None, CpuMMX, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegMMX }
>>>>  movd, 0xf6e, None, CpuMMX|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegMMX }
>>>> -movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>> +movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>>  movq, 0x66d6, None, CpuAVX, Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
>>>>  movq, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
>>>>  movq, 0xf30f7e, None, CpuSSE2, Load|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM }
>>>> @@ -1159,7 +1146,7 @@ andpd<sse2>, 0x660f54, None, <sse2:cpu>,
>>>>  cmp<frel>pd<sse2>, 0x660fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
>>>>  cmp<frel>sd<sse2>, 0xf20fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
>>>>  cmppd<sse2>, 0x660fc2, None, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
>>>> -cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>> +cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Pass2, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>>  comisd<sse2>, 0x660f2f, None, <sse2:cpu>, Modrm|<sse2:scal>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>>  cvtpi2pd, 0x660f2a, None, CpuSSE2, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, RegXMM }
>>>>  cvtpi2pd, 0xf3e6, None, CpuAVX, Modrm|Vex|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
>>>> @@ -1184,7 +1171,7 @@ movlpd, 0x6613, None, CpuAVX, Modrm|Vex|
>>>>  movlpd, 0x660f12, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex, RegXMM }
>>>>  movmskpd<sse2>, 0x660f50, None, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
>>>>  movntpd<sse2>, 0x660f2b, None, <sse2:cpu>, Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
>>>> -movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
>>>> +movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex, RegXMM }
>>>>  movsd, 0xf210, None, CpuAVX, D|Modrm|Vex=3|Space0F|VexVVVV=1|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, RegXMM }
>>>>  movsd, 0xf20f10, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
>>>>  movupd<sse2>, 0x660f10, None, <sse2:cpu>, D|Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
>>>>
>>>
>>> All these instructions with Pass2 have a vector register operand.  Can you
>>> use it instead?
>>
>> I probably could, but that would go against your request of limiting as
>> much as possible the allocation of a copy of the input line: Far more
>> insns would then also undergo this setup for a possible 2nd parsing pass.
>> It may be possible to limit the set some by taking into account further
>> attributes (like, leaving aside move-with-sign-extend which you object
>> to being corrected, SSE2AVX), but the collection of those checks would
>> likely be clumsy and harder to maintain than the new attribute. For
>> example the consistency check in i386-gen would then not be possible
>> anymore, and hence the whole thing would become _silently_ dependent on
>> the ordering of templates in the file.
> 
> Order dependency is the part of the implementation.
> 
>> Furthermore keying this condition to insn attributes would be less
>> future proof. While it may not be very likely for new colliding
>> mnemonics to appear, if one would which then doesn't fit the chosen
>> pattern, the tweaking of the conditional would be more fragile than
>> the mere adding of Pass2 in the appropriate template. (Even if you
>> continue to object to it, simply consider the last patch of this series
>> an example.)
>>
> 
> I like to avoid the second pass for many common instructions.
> The current assembler may not be ideal.  But it works.

So am I understanding correctly that you'd like me to drop Pass2 again,
accepting that a few too many insns would have the input buffer cloned
then?

Jan

next prev parent reply	other threads:[~2022-10-31 11:40 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-25  7:23 [PATCH v5 0/8] x86: suffix handling changes Jan Beulich
2022-10-25  7:24 ` [PATCH v5 1/8] x86: constify parse_insn()'s input Jan Beulich
2022-10-25  7:25 ` [PATCH v5 1/8] x86: introduce Pass2 insn attribute Jan Beulich
2022-10-25  7:30   ` [PATCH v5 2/8] " Jan Beulich
2022-10-25  7:26 ` [PATCH v5 3/8] x86: re-work insn/suffix recognition Jan Beulich
2022-10-27 17:21   ` H.J. Lu
2022-10-28  9:00     ` Jan Beulich
2022-10-28 16:12       ` H.J. Lu
2022-10-31 11:40         ` Jan Beulich [this message]
2022-10-31 16:59           ` H.J. Lu
2022-10-25  7:26 ` [PATCH v5 4/8] ix86: don't recognize/derive Q suffix in the common case Jan Beulich
2022-10-25  7:27 ` [PATCH v5 5/8] x86-64: allow HLE store of accumulator to absolute 32-bit address Jan Beulich
2022-10-25  7:27 ` [PATCH v5 6/8] x86: move bad-use-of-TLS-reloc check Jan Beulich
2022-10-25  7:28 ` [PATCH v5 7/8] x86: drop (now) stray IsString Jan Beulich
2022-10-25  7:29 ` [PATCH v5 8/8] x86: further re-work insn/suffix recognition to also cover MOVSX Jan Beulich
2022-10-25 17:10   ` H.J. Lu
2022-10-26  9:07     ` Jan Beulich
2022-10-27  0:11       ` H.J. Lu
2022-10-27  6:31         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b90e9111-e6a8-4d57-ba7f-ffe2f9d5663e@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=hjl.tools@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).