public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: Jan Beulich <jbeulich@suse.com>
Cc: Binutils <binutils@sourceware.org>
Subject: Re: [PATCH v5 3/8] x86: re-work insn/suffix recognition
Date: Mon, 31 Oct 2022 09:59:03 -0700	[thread overview]
Message-ID: <CAMe9rOoJ-BGWNRsu6P1K5OqAdTA-E02UP18H_gtqvZKUHPZt+w@mail.gmail.com> (raw)
In-Reply-To: <b90e9111-e6a8-4d57-ba7f-ffe2f9d5663e@suse.com>

On Mon, Oct 31, 2022 at 4:40 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 28.10.2022 18:12, H.J. Lu wrote:
> > On Fri, Oct 28, 2022 at 2:00 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 27.10.2022 19:21, H.J. Lu wrote:
> >>> On Tue, Oct 25, 2022 at 12:26 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> @@ -989,13 +976,13 @@ emms, 0xf77, None, CpuMMX, No_bSuf|No_wS
> >>>>  // copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's
> >>>>  // spec). AMD's spec, having been in existence for much longer, failed to
> >>>>  // recognize that and specified movd for 32- and 64-bit operations.
> >>>> -movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM }
> >>>> +movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Reg32|Unspecified|BaseIndex, RegXMM }
> >>>>  movd, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM }
> >>>>  movd, 0x660f6e, None, CpuSSE2, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM }
> >>>>  movd, 0x660f6e, None, CpuSSE2|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegXMM }
> >>>>  movd, 0xf6e, None, CpuMMX, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegMMX }
> >>>>  movd, 0xf6e, None, CpuMMX|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegMMX }
> >>>> -movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>> +movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>>  movq, 0x66d6, None, CpuAVX, Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM }
> >>>>  movq, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM }
> >>>>  movq, 0xf30f7e, None, CpuSSE2, Load|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM }
> >>>> @@ -1159,7 +1146,7 @@ andpd<sse2>, 0x660f54, None, <sse2:cpu>,
> >>>>  cmp<frel>pd<sse2>, 0x660fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>>  cmp<frel>sd<sse2>, 0xf20fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM }
> >>>>  cmppd<sse2>, 0x660fc2, None, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>> -cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>> +cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Pass2, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>>  comisd<sse2>, 0x660f2f, None, <sse2:cpu>, Modrm|<sse2:scal>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>>  cvtpi2pd, 0x660f2a, None, CpuSSE2, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, RegXMM }
> >>>>  cvtpi2pd, 0xf3e6, None, CpuAVX, Modrm|Vex|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
> >>>> @@ -1184,7 +1171,7 @@ movlpd, 0x6613, None, CpuAVX, Modrm|Vex|
> >>>>  movlpd, 0x660f12, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex, RegXMM }
> >>>>  movmskpd<sse2>, 0x660f50, None, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
> >>>>  movntpd<sse2>, 0x660f2b, None, <sse2:cpu>, Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, Xmmword|Unspecified|BaseIndex }
> >>>> -movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM }
> >>>> +movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex, RegXMM }
> >>>>  movsd, 0xf210, None, CpuAVX, D|Modrm|Vex=3|Space0F|VexVVVV=1|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, RegXMM }
> >>>>  movsd, 0xf20f10, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM }
> >>>>  movupd<sse2>, 0x660f10, None, <sse2:cpu>, D|Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
> >>>>
> >>>
> >>> All these instructions with Pass2 have a vector register operand.  Can you
> >>> use it instead?
> >>
> >> I probably could, but that would go against your request of limiting as
> >> much as possible the allocation of a copy of the input line: Far more
> >> insns would then also undergo this setup for a possible 2nd parsing pass.
> >> It may be possible to limit the set some by taking into account further
> >> attributes (like, leaving aside move-with-sign-extend which you object
> >> to being corrected, SSE2AVX), but the collection of those checks would
> >> likely be clumsy and harder to maintain than the new attribute. For
> >> example the consistency check in i386-gen would then not be possible
> >> anymore, and hence the whole thing would become _silently_ dependent on
> >> the ordering of templates in the file.
> >
> > Order dependency is the part of the implementation.
> >
> >> Furthermore keying this condition to insn attributes would be less
> >> future proof. While it may not be very likely for new colliding
> >> mnemonics to appear, if one would which then doesn't fit the chosen
> >> pattern, the tweaking of the conditional would be more fragile than
> >> the mere adding of Pass2 in the appropriate template. (Even if you
> >> continue to object to it, simply consider the last patch of this series
> >> an example.)
> >>
> >
> > I like to avoid the second pass for many common instructions.
> > The current assembler may not be ideal.  But it works.
>
> So am I understanding correctly that you'd like me to drop Pass2 again,
> accepting that a few too many insns would have the input buffer cloned
> then?
>

Yes.  It is too much to require the second pass for common instructions.


-- 
H.J.

  reply	other threads:[~2022-10-31 16:59 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-25  7:23 [PATCH v5 0/8] x86: suffix handling changes Jan Beulich
2022-10-25  7:24 ` [PATCH v5 1/8] x86: constify parse_insn()'s input Jan Beulich
2022-10-25  7:25 ` [PATCH v5 1/8] x86: introduce Pass2 insn attribute Jan Beulich
2022-10-25  7:30   ` [PATCH v5 2/8] " Jan Beulich
2022-10-25  7:26 ` [PATCH v5 3/8] x86: re-work insn/suffix recognition Jan Beulich
2022-10-27 17:21   ` H.J. Lu
2022-10-28  9:00     ` Jan Beulich
2022-10-28 16:12       ` H.J. Lu
2022-10-31 11:40         ` Jan Beulich
2022-10-31 16:59           ` H.J. Lu [this message]
2022-10-25  7:26 ` [PATCH v5 4/8] ix86: don't recognize/derive Q suffix in the common case Jan Beulich
2022-10-25  7:27 ` [PATCH v5 5/8] x86-64: allow HLE store of accumulator to absolute 32-bit address Jan Beulich
2022-10-25  7:27 ` [PATCH v5 6/8] x86: move bad-use-of-TLS-reloc check Jan Beulich
2022-10-25  7:28 ` [PATCH v5 7/8] x86: drop (now) stray IsString Jan Beulich
2022-10-25  7:29 ` [PATCH v5 8/8] x86: further re-work insn/suffix recognition to also cover MOVSX Jan Beulich
2022-10-25 17:10   ` H.J. Lu
2022-10-26  9:07     ` Jan Beulich
2022-10-27  0:11       ` H.J. Lu
2022-10-27  6:31         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAMe9rOoJ-BGWNRsu6P1K5OqAdTA-E02UP18H_gtqvZKUHPZt+w@mail.gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=binutils@sourceware.org \
    --cc=jbeulich@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).