From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lj1-x235.google.com (mail-lj1-x235.google.com [IPv6:2a00:1450:4864:20::235]) by sourceware.org (Postfix) with ESMTPS id 573F938582A2 for ; Fri, 28 Oct 2022 16:12:43 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 573F938582A2 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-lj1-x235.google.com with SMTP id h12so2210350ljg.9 for ; Fri, 28 Oct 2022 09:12:43 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:from:to:cc:subject:date:message-id:reply-to; bh=ulmJTSVmpNPgOyIf37GIAfSHBFl5Vp/QYJSJVI7lbl0=; b=jbTY2bBPhKoVq2qBf4Hzrs3+EKx/D5kI3DJLUofKLfBKWPITnOu5S4buGnf01ewmUP sHBC3GaoteB2sY/npIckkUdZuphkPqD1J+RrSyYNz4COGVpr3wi3z0AEJsD3d1uVgskM aR/OJgUS2NX3cgv6rOF8yqR8RPDlXLzTKwABwXlcXLQad0BCbfTMA49tUbzhkiYcSLID ZjZJqmcjDhk8HA4TUYJxxr0rrU4GOmkMI+79IuaJ9ci69HdasIrjmVAGUx7jwa4jdbu/ HWcxGM0TMV61yJGjYitYmYfPsadKJkNov6nHUm7v8XGt5y7OA37s6M5wUioc+Eh8SI19 4tIA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=ulmJTSVmpNPgOyIf37GIAfSHBFl5Vp/QYJSJVI7lbl0=; b=cM30boyr7FVoSMQTB3VidHBvU2xARInmfNQf3wJpbjZ4KuBZ30v6a9r5xIqbpu1Eb3 nPutSbhbubthyYiApP6vmm78ruX+KQCzqdrfJ4Hz+gcPmzxHANkwBTBOb4Eed1V/DH2D m+ZPYk80T/zYcKs9TBBGx9aDwlJbdxVWWWGxCEUSVzJ1hIIDyHWw4wxPPrlzwWr0dnYa V2Bk3IADsDawzSRd1U/RsGmUFQbVNJ1JBaePzWuvBJWEPUYK9lCMng4Q1prFG5cvOT/g IbXP9mwRE64Rf83/VcxrAJ6QVlqiuKTM50mKJYqA/W0lpX29Vm+LvmphdwCjBb5WrDfE Yc5Q== X-Gm-Message-State: ACrzQf1/xWUFkK82srD47CCGr+Bl8g93HR/tWQQsk3xn8zUnDyYzazoZ Qln5AI94vYPvsD1PmPz/NydLR1J9NQ4FRcCl504f+foC X-Google-Smtp-Source: AMsMyM7SWTVs4WTOpaBugQoabvaZvpi52d4c0u6IpFR+nNh0dghvW9mqG7pYymPD7AcFmZbIERy/KO1VJmRUgGDjFL4= X-Received: by 2002:a2e:9d5a:0:b0:25e:2c67:edaf with SMTP id y26-20020a2e9d5a000000b0025e2c67edafmr105025ljj.437.1666973561757; Fri, 28 Oct 2022 09:12:41 -0700 (PDT) MIME-Version: 1.0 References: <7250dab9-e218-e6dd-4c74-23da9f611ab4@suse.com> In-Reply-To: From: "H.J. Lu" Date: Fri, 28 Oct 2022 09:12:05 -0700 Message-ID: Subject: Re: [PATCH v5 3/8] x86: re-work insn/suffix recognition To: Jan Beulich Cc: Binutils Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-3017.2 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On Fri, Oct 28, 2022 at 2:00 AM Jan Beulich wrote: > > On 27.10.2022 19:21, H.J. Lu wrote: > > On Tue, Oct 25, 2022 at 12:26 AM Jan Beulich wrote: > >> @@ -989,13 +976,13 @@ emms, 0xf77, None, CpuMMX, No_bSuf|No_wS > >> // copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's > >> // spec). AMD's spec, having been in existence for much longer, failed to > >> // recognize that and specified movd for 32- and 64-bit operations. > >> -movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM } > >> +movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Reg32|Unspecified|BaseIndex, RegXMM } > >> movd, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM } > >> movd, 0x660f6e, None, CpuSSE2, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM } > >> movd, 0x660f6e, None, CpuSSE2|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegXMM } > >> movd, 0xf6e, None, CpuMMX, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegMMX } > >> movd, 0xf6e, None, CpuMMX|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegMMX } > >> -movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> +movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> movq, 0x66d6, None, CpuAVX, Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM } > >> movq, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM } > >> movq, 0xf30f7e, None, CpuSSE2, Load|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM } > >> @@ -1159,7 +1146,7 @@ andpd, 0x660f54, None, , > >> cmppd, 0x660fc2, , , Modrm||||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM } > >> cmpsd, 0xf20fc2, , , Modrm||||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM } > >> cmppd, 0x660fc2, None, , Modrm|||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM } > >> -cmpsd, 0xf20fc2, None, , Modrm|||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> +cmpsd, 0xf20fc2, None, , Modrm|||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Pass2, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> comisd, 0x660f2f, None, , Modrm||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> cvtpi2pd, 0x660f2a, None, CpuSSE2, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, RegXMM } > >> cvtpi2pd, 0xf3e6, None, CpuAVX, Modrm|Vex|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM } > >> @@ -1184,7 +1171,7 @@ movlpd, 0x6613, None, CpuAVX, Modrm|Vex| > >> movlpd, 0x660f12, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex, RegXMM } > >> movmskpd, 0x660f50, None, , Modrm||IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 } > >> movntpd, 0x660f2b, None, , Modrm||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, Xmmword|Unspecified|BaseIndex } > >> -movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM } > >> +movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex, RegXMM } > >> movsd, 0xf210, None, CpuAVX, D|Modrm|Vex=3|Space0F|VexVVVV=1|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, RegXMM } > >> movsd, 0xf20f10, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >> movupd, 0x660f10, None, , D|Modrm||No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } > >> > > > > All these instructions with Pass2 have a vector register operand. Can you > > use it instead? > > I probably could, but that would go against your request of limiting as > much as possible the allocation of a copy of the input line: Far more > insns would then also undergo this setup for a possible 2nd parsing pass. > It may be possible to limit the set some by taking into account further > attributes (like, leaving aside move-with-sign-extend which you object > to being corrected, SSE2AVX), but the collection of those checks would > likely be clumsy and harder to maintain than the new attribute. For > example the consistency check in i386-gen would then not be possible > anymore, and hence the whole thing would become _silently_ dependent on > the ordering of templates in the file. Order dependency is the part of the implementation. > Furthermore keying this condition to insn attributes would be less > future proof. While it may not be very likely for new colliding > mnemonics to appear, if one would which then doesn't fit the chosen > pattern, the tweaking of the conditional would be more fragile than > the mere adding of Pass2 in the appropriate template. (Even if you > continue to object to it, simply consider the last patch of this series > an example.) > I like to avoid the second pass for many common instructions. The current assembler may not be ideal. But it works. -- H.J.