Re: [PATCH 03/10] Support Intel AVX-NE-CONVERT

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

From: Jan Beulich <jbeulich@suse.com>
To: Haochen Jiang <haochen.jiang@intel.com>
Cc: hjl.tools@gmail.com, Kong Lingling <lingling.kong@intel.com>,
	binutils@sourceware.org
Subject: Re: [PATCH 03/10] Support Intel AVX-NE-CONVERT
Date: Fri, 14 Oct 2022 14:58:33 +0200	[thread overview]
Message-ID: <29239b15-58d7-c57d-caf6-a23c40814f3e@suse.com> (raw)
In-Reply-To: <20221014091248.4920-4-haochen.jiang@intel.com>

On 14.10.2022 11:12, Haochen Jiang wrote:
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -937,6 +937,8 @@ enum
>    MOD_VEX_0F385E_X86_64_P_3_W_0,
>    MOD_VEX_0F388C,
>    MOD_VEX_0F388E,
> +  MOD_VEX_0F38B0,
> +  MOD_VEX_0F38B1,
>    MOD_VEX_0F3A30_L_0,
>    MOD_VEX_0F3A31_L_0,
>    MOD_VEX_0F3A32_L_0,
> @@ -1132,6 +1134,9 @@ enum
>    PREFIX_VEX_0F3851_W_0,
>    PREFIX_VEX_0F385C_X86_64,
>    PREFIX_VEX_0F385E_X86_64,
> +  PREFIX_VEX_0F3872,
> +  PREFIX_VEX_0F38B0,
> +  PREFIX_VEX_0F38B1,

PREFIX_VEX_0F38B0_M_1_W_0 and PREFIX_VEX_0F38B1_M_1_W_0 respectively,
please. These enumerators are supposed to show the already decoded
components. Plus they need to be unambiguous if insns appear which
vary in (here) permitted ModR/M forms or the VEX.W bit.

> @@ -1526,8 +1531,11 @@ enum
>    VEX_W_0F385E_X86_64_P_1,
>    VEX_W_0F385E_X86_64_P_2,
>    VEX_W_0F385E_X86_64_P_3,
> +  VEX_W_0F3872_P_1,
>    VEX_W_0F3878,
>    VEX_W_0F3879,
> +  VEX_W_0F38B0,
> +  VEX_W_0F38B1,

VEX_W_0F38B0_M_1 and VEX_W_0F38B1_M_1 respectively, please.

> @@ -7618,6 +7654,14 @@ static const struct dis386 vex_w_table[][2] = {
>      /* VEX_W_0F3879 */
>      { "vpbroadcastw",	{ XM, EXw }, PREFIX_DATA },
>    },
> +  {
> +  /* VEX_W_0F38B0 */

Nit: Indentation.

> @@ -8428,6 +8472,14 @@ static const struct dis386 mod_table[][2] = {
>      /* MOD_VEX_0F388E */
>      { "vpmaskmov%DQ",	{ Mx, Vex, XM }, PREFIX_DATA },
>    },
> +  {
> +    /* MOD_VEX_0F38B0*/
> +    { VEX_W_TABLE (VEX_W_0F38B0) },
> +  },
> +  {
> +    /* MOD_VEX_0F38B1*/
> +    { VEX_W_TABLE (VEX_W_0F38B1) },
> +  },

Nit: Blanks missing in both of the comments.

> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -249,6 +249,8 @@ static initializer cpu_flag_init[] =
>      "CPU_AVX2_FLAGS|CpuAVX_IFMA" },
>    { "CPU_AVX_VNNI_INT8_FLAGS",
>      "CPU_AVX2_FLAGS|CpuAVX_VNNI_INT8" },
> +  { "CPU_AVX_NE_CONVERT_FLAGS",
> +    "CPU_AVX2_FLAGS|CpuAVX_NE_CONVERT" },
>    { "CPU_IAMCU_FLAGS",
>      "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
>    { "CPU_ADX_FLAGS",
> @@ -447,6 +449,8 @@ static initializer cpu_flag_init[] =
>      "CpuAVX_IFMA" },
>    { "CPU_ANY_AVX_VNNI_INT8_FLAGS",
>      "CpuAVX_VNNI_INT8" },
> +  { "CPU_ANY_AVX_NE_CONVERT_FLAGS",
> +    "CpuAVX_NE_CONVERT" },
>  };

Like for patches 1 and 2 - CPU_ANY_AVX2_FLAGS also need adjusting.

> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3027,6 +3027,21 @@ movdir64b, 0x660f38f8, None, CpuMOVDIR64B, Modrm|AddrPrefixOpReg, { Unspecified|
>  
>  // MOVEDIR instructions end.
>  
> +// AVX_NE_CONVERT instructions.
> +
> +vbcstnebf162ps, 0xf3b1, None, CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Word|Unspecified|BaseIndex, RegXMM|RegYMM}
> +vbcstnesh2ps,   0x66b1, None, CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Word|Unspecified|BaseIndex, RegXMM|RegYMM}

Why CheckRegSize?

> +vcvtneoph2ps,   0xb0,   None,   CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM}
> +vcvtneebf162ps, 0xf3b0, None,   CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM}
> +vcvtneeph2ps,   0x66b0, None,   CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM}
> +vcvtneobf162ps, 0xf2b0, None,   CpuAVX_NE_CONVERT, Modrm|Vex|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Xmmword|Ymmword|Unspecified|BaseIndex, RegXMM|RegYMM}
> +vcvtneps2bf16,  0xf372, None,   CpuAVX_NE_CONVERT, Modrm|PseudoVexPrefix|Vex128|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Xmmword|RegXMM|Unspecified|BaseIndex, RegXMM}
> +vcvtneps2bf16,  0xf372, None,   CpuAVX_NE_CONVERT, Modrm|PseudoVexPrefix|Vex256|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {Ymmword|RegYMM|Unspecified|BaseIndex, RegXMM}
> +vcvtneps2bf16x, 0xf372, None,   CpuAVX_NE_CONVERT, Modrm|PseudoVexPrefix|Vex128|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, {Xmmword|RegXMM|Unspecified|BaseIndex, RegXMM}
> +vcvtneps2bf16y,  0xf372, None,   CpuAVX_NE_CONVERT, Modrm|PseudoVexPrefix|Vex256|Space0F38|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ATTSyntax, {Ymmword|RegYMM|Unspecified|BaseIndex, RegXMM}

Nit: Excess blank after the first comma on the last of the above lines, and
excess blanks on all vcvt* lines before CpuAVX_NE_CONVERT.

Unnecessary Xmmword / Ymmword on the last four lines. And there again - why
CheckRegSize? But these would anyway benefit from using the AVX instance of
the <xy> template - then AT&T syntax handling for the suffix-less variant
would automatically be correct (it isn't in your code). Problem is that it
wasn't clear that there'll be a need to re-use xy's AVX instance this late
in the file, so the overloading of the name (commit e07ae9a3efee) will need
undoing. To keep names short (see commit 390ddd6f6812) I'd recommend using
Vxy and Exy (for the VEX and EVEX variants respectively).

Like for the earlier patches I disagree with the uses of PseudoVexPrefix
here (as said - the attribute should really go away again, and I have a
patch doing so), but then ...

> +
> +// AVX_NE_CONVERT instructions end.
> +
>  // AVX512_BF16 instructions.
>  
>  vcvtne2ps2bf16, 0xf272, None, CpuAVX512_BF16, Modrm|Space0F38|VexVVVV|Masking=3|VexW0|Broadcast|Disp8ShiftVL|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }

... the new insns (at least the ones having AVX512 counterparts) need to be
placed after the AVX512 ones (which you'll note use the <xy> template.

Jan

next prev parent reply	other threads:[~2022-10-14 12:58 UTC|newest]

Thread overview: 120+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-14  9:12 [PATCH 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions Haochen Jiang
2022-10-14  9:12 ` [PATCH 01/10] Support Intel AVX-IFMA Haochen Jiang
2022-10-14  9:52   ` Jan Beulich
2022-10-14 18:10     ` H.J. Lu
2022-10-16  6:39       ` Jan Beulich
2022-10-17 22:23         ` H.J. Lu
2022-10-18  5:33           ` Jan Beulich
2022-10-18 21:28             ` H.J. Lu
2022-10-19  6:01               ` Jan Beulich
2022-10-19 21:27                 ` H.J. Lu
2022-10-20  6:15                   ` Jan Beulich
2022-10-24  2:07     ` Jiang, Haochen
2022-10-24  5:53     ` Jiang, Haochen
2022-10-24 19:09       ` H.J. Lu
2022-10-25  6:29       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 02/10] Support Intel AVX-VNNI-INT8 Haochen Jiang
2022-10-14 10:57   ` Jan Beulich
2022-10-21  3:22     ` Jiang, Haochen
2022-10-25  1:52       ` H.J. Lu
2022-10-14  9:12 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang
2022-10-14 12:58   ` Jan Beulich [this message]
2022-10-24  5:37     ` Kong, Lingling
2022-10-24  5:59     ` Kong, Lingling
2022-10-24 19:25       ` H.J. Lu
2022-10-25  6:44       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 04/10] Support Intel CMPccXADD Haochen Jiang
2022-10-14 13:46   ` Jan Beulich
2022-10-14 18:27     ` H.J. Lu
2022-10-14 21:51       ` H.J. Lu
2022-10-16  6:34         ` Jan Beulich
2022-10-17 23:31           ` H.J. Lu
2022-10-16  6:25       ` Jan Beulich
2022-10-17 23:44         ` H.J. Lu
2022-10-16  6:19     ` Jan Beulich
2022-10-24  2:30     ` Jiang, Haochen
2022-10-24 19:12       ` H.J. Lu
2022-10-24  5:55     ` Jiang, Haochen
2022-10-25  6:53       ` Jan Beulich
2022-10-26  3:03         ` Jiang, Haochen
2022-10-26  8:49           ` Jan Beulich
2022-10-27  3:09             ` Jiang, Haochen
2022-10-27  6:37               ` Jan Beulich
2022-10-28  0:59                 ` Jiang, Haochen
2022-10-14  9:12 ` [PATCH 05/10] Add handler for more i386_cpu_flags Haochen Jiang
2022-10-14 13:53   ` Jan Beulich
2022-10-14  9:12 ` [PATCH 06/10] Support Intel RAO-INT Haochen Jiang
2022-10-14 14:38   ` Jan Beulich
2022-10-16  6:15     ` Jan Beulich
2022-10-24  3:12     ` Jiang, Haochen
2022-10-24 19:17       ` H.J. Lu
2022-10-24  5:56     ` Jiang, Haochen
2022-10-25  7:01       ` Jan Beulich
2022-10-26  5:16         ` Jiang, Haochen
2022-10-26  8:56           ` Jan Beulich
2022-10-27  3:50             ` Jiang, Haochen
2022-10-27  6:39               ` Jan Beulich
2022-10-27 18:46                 ` H.J. Lu
2022-10-28  6:52                   ` Jan Beulich
2022-10-28  8:10                     ` Jiang, Haochen
2022-10-28  8:22                       ` Jan Beulich
2022-10-28  8:31                         ` Jiang, Haochen
2022-10-28  8:40                           ` Jan Beulich
2022-10-28 16:08                             ` H.J. Lu
2022-10-31  9:41                               ` Jan Beulich
2022-10-31 16:49                                 ` H.J. Lu
2022-11-06 12:50         ` Kong, Lingling
2022-11-07  9:24           ` Jan Beulich
2022-11-07 13:37             ` Kong, Lingling
2022-11-07 20:03               ` H.J. Lu
2022-10-17 23:23   ` H.J. Lu
2022-10-18  5:38     ` Jan Beulich
2022-10-14  9:12 ` [PATCH 07/10] Support Intel WRMSRNS Haochen Jiang
2022-10-17  7:17   ` Jan Beulich
2022-10-24  2:52     ` Jiang, Haochen
2022-10-24  5:56     ` Jiang, Haochen
2022-10-24 19:14       ` H.J. Lu
2022-10-25  7:04       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 08/10] Support Intel MSRLIST Haochen Jiang
2022-10-17  7:20   ` Jan Beulich
2022-10-24  3:03     ` Jiang, Haochen
2022-10-24  5:56     ` Jiang, Haochen
2022-10-24 19:15       ` H.J. Lu
2022-10-25  7:07       ` Jan Beulich
2022-10-14  9:12 ` [PATCH 09/10] Support Intel AMX-FP16 Haochen Jiang
2022-10-17  7:35   ` Jan Beulich
2022-10-18  9:01     ` Cui, Lili
2022-10-18  9:23       ` Jan Beulich
2022-10-18  9:33         ` Jiang, Haochen
2022-10-19 10:33         ` Cui, Lili
2022-10-19 13:35           ` Jan Beulich
2022-10-19 14:05             ` Cui, Lili
2022-10-19 14:09               ` Jan Beulich
2022-10-19 14:41                 ` Cui, Lili
2022-10-19 15:04                   ` Jan Beulich
2022-10-19 15:21                     ` Cui, Lili
2022-10-19 14:01           ` Jiang, Haochen
2022-10-19 14:13             ` Jan Beulich
2022-10-19 14:58               ` Jiang, Haochen
2022-10-25  6:02         ` Jan Beulich
2022-10-25 13:05           ` Cui, Lili
2022-10-14  9:12 ` [PATCH 10/10] Support Intel PREFETCHI Haochen Jiang
2022-10-17  8:15   ` Jan Beulich
2022-10-25 13:03     ` Cui, Lili
2022-10-25 15:41       ` Jan Beulich
2022-10-25 15:52       ` Jan Beulich
2022-10-25 17:01         ` H.J. Lu
2022-10-26 13:42           ` Cui, Lili
2022-10-26 13:53             ` Jan Beulich
2022-10-27  6:04               ` Cui, Lili
2022-10-27  6:45                 ` Jan Beulich
2022-10-27  7:01                   ` Cui, Lili
2022-10-27  7:15                     ` Jan Beulich
2022-10-27  7:43                       ` Cui, Lili
2022-10-28  9:03                       ` Cui, Lili
2022-10-28 15:54                     ` H.J. Lu
2022-10-31 13:23                       ` Cui, Lili
2022-10-31 14:45                     ` Mike Frysinger
2022-10-31 16:25                       ` H.J. Lu
2022-10-19 14:55 [PATCH v2 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions Haochen Jiang
2022-10-19 14:56 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang
2022-10-19 15:15 [PATCH v2 0/10] Add new Intel Sierra Forest, Grand Ridge, Granite Rapids Instructions (Resend) Haochen Jiang
2022-10-19 15:15 ` [PATCH 03/10] Support Intel AVX-NE-CONVERT Haochen Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=29239b15-58d7-c57d-caf6-a23c40814f3e@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=haochen.jiang@intel.com \
    --cc=hjl.tools@gmail.com \
    --cc=lingling.kong@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).