public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: "Kong, Lingling" <lingling.kong@intel.com>
Cc: "Beulich, Jan" <JBeulich@suse.com>,
	"Jiang, Haochen" <haochen.jiang@intel.com>,
	 "binutils@sourceware.org" <binutils@sourceware.org>
Subject: Re: [PATCH 03/10] Support Intel AVX-NE-CONVERT
Date: Mon, 24 Oct 2022 12:25:43 -0700	[thread overview]
Message-ID: <CAMe9rOo=OL3YX9DbPq46AWqosmPHArf2=+p1WB3nJnjGMe48pA@mail.gmail.com> (raw)
In-Reply-To: <DM4PR11MB54874F4EA9EB35A8209CBA0FEC2E9@DM4PR11MB5487.namprd11.prod.outlook.com>

On Sun, Oct 23, 2022 at 10:59 PM Kong, Lingling <lingling.kong@intel.com> wrote:
>
>
> Hi Jan,
>
> Thanks you so much for reviewing this patch.  I really appreciate it. For these review comments, I have made some changes.
> Sorry, Forgot to delete i386-init.h and  i386-tbl.h in last email attachment.
>
> > -----Original Message-----
> > From: Jan Beulich <jbeulich@suse.com>
> > Sent: Friday, October 14, 2022 8:59 PM
> > To: Jiang, Haochen <haochen.jiang@intel.com>
> > Cc: hjl.tools@gmail.com; Kong, Lingling <lingling.kong@intel.com>;
> > binutils@sourceware.org
> > Subject: Re: [PATCH 03/10] Support Intel AVX-NE-CONVERT
> >
> > On 14.10.2022 11:12, Haochen Jiang wrote:
> > > --- a/opcodes/i386-dis.c
> > > +++ b/opcodes/i386-dis.c
> > > @@ -937,6 +937,8 @@ enum
> > >    MOD_VEX_0F385E_X86_64_P_3_W_0,
> > >    MOD_VEX_0F388C,
> > >    MOD_VEX_0F388E,
> > > +  MOD_VEX_0F38B0,
> > > +  MOD_VEX_0F38B1,
> > >    MOD_VEX_0F3A30_L_0,
> > >    MOD_VEX_0F3A31_L_0,
> > >    MOD_VEX_0F3A32_L_0,
> > > @@ -1132,6 +1134,9 @@ enum
> > >    PREFIX_VEX_0F3851_W_0,
> > >    PREFIX_VEX_0F385C_X86_64,
> > >    PREFIX_VEX_0F385E_X86_64,
> > > +  PREFIX_VEX_0F3872,
> > > +  PREFIX_VEX_0F38B0,
> > > +  PREFIX_VEX_0F38B1,
> >
> > PREFIX_VEX_0F38B0_M_1_W_0 and PREFIX_VEX_0F38B1_M_1_W_0
> > respectively, please. These enumerators are supposed to show the already
> > decoded components. Plus they need to be unambiguous if insns appear which
> > vary in (here) permitted ModR/M forms or the VEX.W bit.
>
> Yes, I changed it to PREFIX_VEX_0F38B0_M_1_W_0 and PREFIX_VEX_0F38B1_M_1_W_0.
>
> > > @@ -1526,8 +1531,11 @@ enum
> > >    VEX_W_0F385E_X86_64_P_1,
> > >    VEX_W_0F385E_X86_64_P_2,
> > >    VEX_W_0F385E_X86_64_P_3,
> > > +  VEX_W_0F3872_P_1,
> > >    VEX_W_0F3878,
> > >    VEX_W_0F3879,
> > > +  VEX_W_0F38B0,
> > > +  VEX_W_0F38B1,
> >
> > VEX_W_0F38B0_M_1 and VEX_W_0F38B1_M_1 respectively, please.
> >
> > > @@ -7618,6 +7654,14 @@ static const struct dis386 vex_w_table[][2] = {
> > >      /* VEX_W_0F3879 */
> > >      { "vpbroadcastw",      { XM, EXw }, PREFIX_DATA },
> > >    },
> > > +  {
> > > +  /* VEX_W_0F38B0 */
> >
> > Nit: Indentation.
>
>  Fixed.
>
> > > @@ -8428,6 +8472,14 @@ static const struct dis386 mod_table[][2] = {
> > >      /* MOD_VEX_0F388E */
> > >      { "vpmaskmov%DQ",      { Mx, Vex, XM }, PREFIX_DATA },
> > >    },
> > > +  {
> > > +    /* MOD_VEX_0F38B0*/
> > > +    { VEX_W_TABLE (VEX_W_0F38B0) },
> > > +  },
> > > +  {
> > > +    /* MOD_VEX_0F38B1*/
> > > +    { VEX_W_TABLE (VEX_W_0F38B1) },
> > > +  },
> >
> > Nit: Blanks missing in both of the comments.
>
> Fixed.
>
> > > --- a/opcodes/i386-gen.c
> > > +++ b/opcodes/i386-gen.c
> > > @@ -249,6 +249,8 @@ static initializer cpu_flag_init[] =
> > >      "CPU_AVX2_FLAGS|CpuAVX_IFMA" },
> > >    { "CPU_AVX_VNNI_INT8_FLAGS",
> > >      "CPU_AVX2_FLAGS|CpuAVX_VNNI_INT8" },
> > > +  { "CPU_AVX_NE_CONVERT_FLAGS",
> > > +    "CPU_AVX2_FLAGS|CpuAVX_NE_CONVERT" },
> > >    { "CPU_IAMCU_FLAGS",
> > >      "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
> > >    { "CPU_ADX_FLAGS",
> > > @@ -447,6 +449,8 @@ static initializer cpu_flag_init[] =
> > >      "CpuAVX_IFMA" },
> > >    { "CPU_ANY_AVX_VNNI_INT8_FLAGS",
> > >      "CpuAVX_VNNI_INT8" },
> > > +  { "CPU_ANY_AVX_NE_CONVERT_FLAGS",
> > > +    "CpuAVX_NE_CONVERT" },
> > >  };
> >
> > Like for patches 1 and 2 - CPU_ANY_AVX2_FLAGS also need adjusting.
>
> Fixed.
>
> > > --- a/opcodes/i386-opc.tbl
> > > +++ b/opcodes/i386-opc.tbl
> > > @@ -3027,6 +3027,21 @@ movdir64b, 0x660f38f8, None, CpuMOVDIR64B,
> > > Modrm|AddrPrefixOpReg, { Unspecified|
> > >
> > >  // MOVEDIR instructions end.
> > >
> > > +// AVX_NE_CONVERT instructions.
> > > +
> > > +vbcstnebf162ps, 0xf3b1, None, CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Word|Unspecified|BaseIndex, RegXMM|RegYMM}
> > > +vbcstnesh2ps,   0x66b1, None, CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Word|Unspecified|BaseIndex, RegXMM|RegYMM}
> >
> > Why CheckRegSize?
>
> Removed the CheckRegSize.
>
> > > +vcvtneoph2ps,   0xb0,   None,   CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex,
> > RegXMM|RegYMM}
> > > +vcvtneebf162ps, 0xf3b0, None,   CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex,
> > RegXMM|RegYMM}
> > > +vcvtneeph2ps,   0x66b0, None,   CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex,
> > RegXMM|RegYMM}
> > > +vcvtneobf162ps, 0xf2b0, None,   CpuAVX_NE_CONVERT,
> > Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No
> > _sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex,
> > RegXMM|RegYMM}
> > > +vcvtneps2bf16,  0xf372, None,   CpuAVX_NE_CONVERT,
> > Modrm|PseudoVexPrefix|Vex128|Space0F38|VexW0|CheckRegSize|No_bSuf|
> > No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf,
> > {Xmmword|RegXMM|Unspecified|BaseIndex, RegXMM}
> > > +vcvtneps2bf16,  0xf372, None,   CpuAVX_NE_CONVERT,
> > Modrm|PseudoVexPrefix|Vex256|Space0F38|VexW0|CheckRegSize|No_bSuf|
> > No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf,
> > {Ymmword|RegYMM|Unspecified|BaseIndex, RegXMM}
> > > +vcvtneps2bf16x, 0xf372, None,   CpuAVX_NE_CONVERT,
> > Modrm|PseudoVexPrefix|Vex128|Space0F38|VexW0|CheckRegSize|No_bSuf|
> > No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax,
> > {Xmmword|RegXMM|Unspecified|BaseIndex, RegXMM}
> > > +vcvtneps2bf16y,  0xf372, None,   CpuAVX_NE_CONVERT,
> > Modrm|PseudoVexPrefix|Vex256|Space0F38|VexW0|CheckRegSize|No_bSuf|
> > No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax,
> > {Ymmword|RegYMM|Unspecified|BaseIndex, RegXMM}
> >
> > Nit: Excess blank after the first comma on the last of the above lines, and excess
> > blanks on all vcvt* lines before CpuAVX_NE_CONVERT.
>
> Fixed.
>
> > Unnecessary Xmmword / Ymmword on the last four lines. And there again - why
> > CheckRegSize? But these would anyway benefit from using the AVX instance of
> > the <xy> template - then AT&T syntax handling for the suffix-less variant would
> > automatically be correct (it isn't in your code). Problem is that it wasn't clear
> > that there'll be a need to re-use xy's AVX instance this late in the file, so the
> > overloading of the name (commit e07ae9a3efee) will need undoing. To keep
> > names short (see commit 390ddd6f6812) I'd recommend using Vxy and Exy (for
> > the VEX and EVEX variants respectively).
> >
> > Like for the earlier patches I disagree with the uses of PseudoVexPrefix here (as
> > said - the attribute should really go away again, and I have a patch doing so), but
> > then ...
>
> Removed the CheckRegSize, Xmmword / Ymmword and PseudoVexPrefix.
> And added <Vxy> and <Exy>.
>
> > > +
> > > +// AVX_NE_CONVERT instructions end.
> > > +
> > >  // AVX512_BF16 instructions.
> > >
> > >  vcvtne2ps2bf16, 0xf272, None, CpuAVX512_BF16,
> > >
> > Modrm|Space0F38|VexVVVV|Masking=3|VexW0|Broadcast|Disp8ShiftVL|Chec
> > kRe
> > > gSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {
> > > RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex,
> > > RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> >
> > ... the new insns (at least the ones having AVX512 counterparts) need to be
> > placed after the AVX512 ones (which you'll note use the <xy> template.
> >
> > Jan
>
> Yes, Now placed after the AVX512 ones.
>
> Thanks,
> Lingling

Please add some comments to explain how <Exy> and <Vxy> are
used, something like <Vxy> is used on VEX instructions with x/y
suffixes and <Exy> is used on EVEX instructions with x/y suffixes.

Thanks.

-- 
H.J.

  reply	other threads:[~2022-10-24 19:26 UTC|newest]

Thread overview: 120+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-14  9:12 [PATCH 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions Haochen Jiang
2022-10-14  9:12 ` [PATCH 01/10] Support Intel AVX-IFMA Haochen Jiang
2022-10-14  9:52   ` Jan Beulich
2022-10-14 18:10     ` H.J. Lu
2022-10-16  6:39       ` Jan Beulich
2022-10-17 22:23         ` H.J. Lu
2022-10-18  5:33           ` Jan Beulich
2022-10-18 21:28             ` H.J. Lu
2022-10-19  6:01               ` Jan Beulich
2022-10-19 21:27                 ` H.J. Lu
2022-10-20  6:15                   ` Jan Beulich
2022-10-24  2:07     ` Jiang, Haochen
2022-10-24  5:53     ` Jiang, Haochen
2022-10-24 19:09       ` H.J. Lu
2022-10-25  6:29       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 02/10] Support Intel AVX-VNNI-INT8 Haochen Jiang
2022-10-14 10:57   ` Jan Beulich
2022-10-21  3:22     ` Jiang, Haochen
2022-10-25  1:52       ` H.J. Lu
2022-10-14  9:12 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang
2022-10-14 12:58   ` Jan Beulich
2022-10-24  5:37     ` Kong, Lingling
2022-10-24  5:59     ` Kong, Lingling
2022-10-24 19:25       ` H.J. Lu [this message]
2022-10-25  6:44       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 04/10] Support Intel CMPccXADD Haochen Jiang
2022-10-14 13:46   ` Jan Beulich
2022-10-14 18:27     ` H.J. Lu
2022-10-14 21:51       ` H.J. Lu
2022-10-16  6:34         ` Jan Beulich
2022-10-17 23:31           ` H.J. Lu
2022-10-16  6:25       ` Jan Beulich
2022-10-17 23:44         ` H.J. Lu
2022-10-16  6:19     ` Jan Beulich
2022-10-24  2:30     ` Jiang, Haochen
2022-10-24 19:12       ` H.J. Lu
2022-10-24  5:55     ` Jiang, Haochen
2022-10-25  6:53       ` Jan Beulich
2022-10-26  3:03         ` Jiang, Haochen
2022-10-26  8:49           ` Jan Beulich
2022-10-27  3:09             ` Jiang, Haochen
2022-10-27  6:37               ` Jan Beulich
2022-10-28  0:59                 ` Jiang, Haochen
2022-10-14  9:12 ` [PATCH 05/10] Add handler for more i386_cpu_flags Haochen Jiang
2022-10-14 13:53   ` Jan Beulich
2022-10-14  9:12 ` [PATCH 06/10] Support Intel RAO-INT Haochen Jiang
2022-10-14 14:38   ` Jan Beulich
2022-10-16  6:15     ` Jan Beulich
2022-10-24  3:12     ` Jiang, Haochen
2022-10-24 19:17       ` H.J. Lu
2022-10-24  5:56     ` Jiang, Haochen
2022-10-25  7:01       ` Jan Beulich
2022-10-26  5:16         ` Jiang, Haochen
2022-10-26  8:56           ` Jan Beulich
2022-10-27  3:50             ` Jiang, Haochen
2022-10-27  6:39               ` Jan Beulich
2022-10-27 18:46                 ` H.J. Lu
2022-10-28  6:52                   ` Jan Beulich
2022-10-28  8:10                     ` Jiang, Haochen
2022-10-28  8:22                       ` Jan Beulich
2022-10-28  8:31                         ` Jiang, Haochen
2022-10-28  8:40                           ` Jan Beulich
2022-10-28 16:08                             ` H.J. Lu
2022-10-31  9:41                               ` Jan Beulich
2022-10-31 16:49                                 ` H.J. Lu
2022-11-06 12:50         ` Kong, Lingling
2022-11-07  9:24           ` Jan Beulich
2022-11-07 13:37             ` Kong, Lingling
2022-11-07 20:03               ` H.J. Lu
2022-10-17 23:23   ` H.J. Lu
2022-10-18  5:38     ` Jan Beulich
2022-10-14  9:12 ` [PATCH 07/10] Support Intel WRMSRNS Haochen Jiang
2022-10-17  7:17   ` Jan Beulich
2022-10-24  2:52     ` Jiang, Haochen
2022-10-24  5:56     ` Jiang, Haochen
2022-10-24 19:14       ` H.J. Lu
2022-10-25  7:04       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 08/10] Support Intel MSRLIST Haochen Jiang
2022-10-17  7:20   ` Jan Beulich
2022-10-24  3:03     ` Jiang, Haochen
2022-10-24  5:56     ` Jiang, Haochen
2022-10-24 19:15       ` H.J. Lu
2022-10-25  7:07       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 09/10] Support Intel AMX-FP16 Haochen Jiang
2022-10-17  7:35   ` Jan Beulich
2022-10-18  9:01     ` Cui, Lili
2022-10-18  9:23       ` Jan Beulich
2022-10-18  9:33         ` Jiang, Haochen
2022-10-19 10:33         ` Cui, Lili
2022-10-19 13:35           ` Jan Beulich
2022-10-19 14:05             ` Cui, Lili
2022-10-19 14:09               ` Jan Beulich
2022-10-19 14:41                 ` Cui, Lili
2022-10-19 15:04                   ` Jan Beulich
2022-10-19 15:21                     ` Cui, Lili
2022-10-19 14:01           ` Jiang, Haochen
2022-10-19 14:13             ` Jan Beulich
2022-10-19 14:58               ` Jiang, Haochen
2022-10-25  6:02         ` Jan Beulich
2022-10-25 13:05           ` Cui, Lili
2022-10-14  9:12 ` [PATCH 10/10] Support Intel PREFETCHI Haochen Jiang
2022-10-17  8:15   ` Jan Beulich
2022-10-25 13:03     ` Cui, Lili
2022-10-25 15:41       ` Jan Beulich
2022-10-25 15:52       ` Jan Beulich
2022-10-25 17:01         ` H.J. Lu
2022-10-26 13:42           ` Cui, Lili
2022-10-26 13:53             ` Jan Beulich
2022-10-27  6:04               ` Cui, Lili
2022-10-27  6:45                 ` Jan Beulich
2022-10-27  7:01                   ` Cui, Lili
2022-10-27  7:15                     ` Jan Beulich
2022-10-27  7:43                       ` Cui, Lili
2022-10-28  9:03                       ` Cui, Lili
2022-10-28 15:54                     ` H.J. Lu
2022-10-31 13:23                       ` Cui, Lili
2022-10-31 14:45                     ` Mike Frysinger
2022-10-31 16:25                       ` H.J. Lu
2022-10-19 14:55 [PATCH v2 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions Haochen Jiang
2022-10-19 14:56 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang
2022-10-19 15:15 [PATCH v2 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions (Resend) Haochen Jiang
2022-10-19 15:15 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMe9rOo=OL3YX9DbPq46AWqosmPHArf2=+p1WB3nJnjGMe48pA@mail.gmail.com' \
    --to=hjl.tools@gmail.com \
    --cc=JBeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=haochen.jiang@intel.com \
    --cc=lingling.kong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).