From: Jan Beulich <jbeulich@suse.com>
To: Haochen Jiang <haochen.jiang@intel.com>
Cc: hjl.tools@gmail.com, binutils@sourceware.org
Subject: Re: [PATCH] Support Intel AVX10.1
Date: Thu, 27 Jul 2023 13:23:09 +0200 [thread overview]
Message-ID: <9f3625f9-82b6-3fd4-fd9e-f7c79b10f4b3@suse.com> (raw)
In-Reply-To: <20230727071550.1814187-1-haochen.jiang@intel.com>
On 27.07.2023 09:15, Haochen Jiang wrote:
> @@ -1845,7 +1846,9 @@ cpu_flags_match (const insn_template *t)
> i386_cpu_flags cpu = cpu_arch_flags;
>
> /* AVX512VL is no standalone feature - match it and then strip it. */
> - if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
> + if (x.bitfield.cpuavx512vl
> + && !cpu.bitfield.cpuavx512vl
> + && !cpu.bitfield.cpuavx10_1)
> return match;
> x.bitfield.cpuavx512vl = 0;
I _think_ the code change is correct, but the comment needs updating
(then also clarifying what the intention here is).
> @@ -6382,7 +6386,10 @@ check_VecOperands (const insn_template *t)
> cpu = cpu_flags_and (t->cpu_flags, avx512);
> if (!cpu_flags_all_zero (&cpu)
> && !t->cpu_flags.bitfield.cpuavx512vl
> - && !cpu_arch_flags.bitfield.cpuavx512vl)
> + && !cpu_arch_flags.bitfield.cpuavx512vl
> + && (!t->cpu_flags.bitfield.cpuavx10_1
> + || (t->cpu_flags.bitfield.cpuavx10_1
> + && !cpu_arch_flags.bitfield.cpuavx10_1)))
This first of all can be simplified to
if (!cpu_flags_all_zero (&cpu)
&& !t->cpu_flags.bitfield.cpuavx512vl
&& !cpu_arch_flags.bitfield.cpuavx512vl
&& (!t->cpu_flags.bitfield.cpuavx10_1
|| !cpu_arch_flags.bitfield.cpuavx10_1))
which doesn't look quite right. But of course the two features also
aren't symmetric, so I may well be wrong. First of all the remark at
the very bottom of this mail needs resolving, though. Also for ...
> @@ -13794,7 +13801,8 @@ static bool check_register (const reg_entry *r)
> if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
> return false;
>
> - if (!cpu_arch_flags.bitfield.cpuavx512f)
> + if (!cpu_arch_flags.bitfield.cpuavx512f
> + && !cpu_arch_flags.bitfield.cpuavx10_1)
> {
> if (r->reg_type.bitfield.zmmword
> || r->reg_type.bitfield.class == RegMask)
> @@ -13826,7 +13834,8 @@ static bool check_register (const reg_entry *r)
> mode, and require EVEX encoding. */
> if (r->reg_flags & RegVRex)
> {
> - if (!cpu_arch_flags.bitfield.cpuavx512f
> + if ((!cpu_arch_flags.bitfield.cpuavx512f
> + && !cpu_arch_flags.bitfield.cpuavx10_1)
> || flag_code != CODE_64BIT)
> return false;
... the changes to make here.
> --- a/gas/testsuite/gas/i386/i386.exp
> +++ b/gas/testsuite/gas/i386/i386.exp
> @@ -506,6 +506,7 @@ if [gas_32_check] then {
> run_dump_test "sm4"
> run_dump_test "sm4-intel"
> run_list_test "pbndkb-inval"
> + run_list_test "avx10_1-inval"
> run_list_test "sg"
> run_dump_test "clzero"
> run_dump_test "invlpgb"
Only an inval test? I'm inclined to say you either want both here, or
leave to just the 64-bit testing.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-avx10_1.s
> @@ -0,0 +1,97 @@
> +# Check AVX10.1 instructions
> +
> + .text
> +_start:
> + .arch .noavx512f
This implies ...
> + kaddd %k1, %k2, %k3
> + kaddb %k1, %k2, %k3
> + kaddw %k1, %k2, %k3
> + kaddq %k1, %k2, %k3
> + kmovb (%ecx), %k5
> + kmovb %k5, -123456(%esp,%esi,8)
> + kmovd -123456(%esp,%esi,8), %k5
> + kmovd %ebp, %k5
> + kmovw %k5, (%ecx)
> + kmovw %k5, %ebp
> + vaddpd %xmm4, %xmm5, %xmm6{%k7}
> + vaddpd (%ecx), %xmm5, %xmm6{%k7}
> + vaddpd (%eax){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd 2048(%edx), %xmm5, %xmm6{%k7}
> + vaddpd -2064(%edx), %xmm5, %xmm6{%k7}
> + vaddpd 1024(%edx){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd %zmm4, %zmm5, %zmm6{%k7}{z}
> + vaddpd -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> + vaddpd 8192(%edx), %zmm5, %zmm6{%k7}
> + vaddpd -4096(%edx), %ymm5, %ymm6{%k7}
> + vaddpd 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> + vaddpd -2048(%edx){1to8}, %zmm5, %zmm6{%k7}
> + vgf2p8affineqb $0xab, %xmm4, %xmm5, %xmm6{%k7}
> + vgf2p8affineqb $123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> + vgf2p8affineqb $123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> + vgf2p8affineinvqb $123, 2032(%edx), %xmm5, %xmm6{%k7}
> + vgf2p8affineinvqb $0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
> + vgf2p8mulb %zmm4, %zmm5, %zmm6{%k7}
> + vgf2p8mulb -123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
> + vgf2p8mulb 8192(%edx), %zmm5, %zmm6{%k7}
> + vaesenc %ymm24, %ymm26, %ymm22
> + vaesdec -123456(%esp,%esi,8), %xmm15, %xmm16
> + vaesenclast %xmm24, %xmm26, %xmm27
> + vaesdeclast 4064(%edx), %ymm25, %ymm26
> + vaesdec %zmm24, %zmm26, %zmm22
> + vaesdeclast (%ecx), %zmm26, %zmm27
> + vpclmulqdq $0xab, %xmm22, %xmm22, %xmm23
> + vpclmulqdq $123, 2032(%edx), %xmm22, %xmm23
> + vpclmulqdq $123, -123456(%esp,%esi,8), %ymm16, %ymm14
> + vpclmulhqhqdq %xmm22, %xmm23, %xmm24
> + vpclmullqhqdq %xmm14, %xmm15, %xmm16
> + vpclmulhqlqdq %ymm22, %ymm23, %ymm24
> + vpclmullqlqdq %zmm14, %zmm15, %zmm16
> +
> + .arch .noavx512vl
... this, so for the test to be useful I think the two parts of the
test need to be swapped.
> + kaddd %k1, %k2, %k3
> + kaddb %k1, %k2, %k3
> + kaddw %k1, %k2, %k3
> + kaddq %k1, %k2, %k3
> + kmovb (%ecx), %k5
> + kmovb %k5, -123456(%esp,%esi,8)
> + kmovd -123456(%esp,%esi,8), %k5
> + kmovd %ebp, %k5
> + kmovw %k5, (%ecx)
> + kmovw %k5, %ebp
There's also little point in having these twice. Having them once in
the more restricted case (noavx512f) ought to suffice.
> --- a/gas/testsuite/gas/i386/xmmhi32.s
> +++ b/gas/testsuite/gas/i386/xmmhi32.s
> @@ -26,6 +26,7 @@ xmm:
> vmovdqa ymm24, ymm0
>
> .arch .noavx512f
> + .arch .noavx10.1
> vaddps xmm0, xmm1, xmm8
> vaddps xmm0, xmm1, xmm16
> vaddps xmm0, xmm1, xmm24
This (and alike) addition(s) point out another issue: People may be
using .noavx512{f,vl} to make sure they'll know if they wrongly use
certain insns. That protection becomes void with the additions as
you presently make them. This also relates to the first comment below
on i386-gen.c.
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -168,6 +168,8 @@ static const dependency isa_dependencies[] =
> "AVX2" },
> { "FRED",
> "LKGS" },
> + { "AVX10_1",
> + "AVX2" },
This can't be quite right (as in: is insufficient): There's no
restriction to the low 16 XMM/YMM registers in AVX10.1, so some of
AVX512 is also a prereq.
To also address the earlier comment, maybe we need an artificial (i.e.
not user selectable) feature underlying both AVX10 and AVX512? (But I
haven't properly thought this through, so there may be issues with
such an approach as well.)
> @@ -1217,7 +1220,7 @@ static void
> output_i386_opcode (FILE *table, const char *name, char *str,
> char *last, int lineno)
> {
> - unsigned int i, length, prefix = 0, space = 0;
> + unsigned int i, j, length, prefix = 0, space = 0, k = 0;
> char *base_opcode, *extension_opcode, *end, *ident;
> char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
> unsigned long long opcode;
> @@ -1315,6 +1318,20 @@ output_i386_opcode (FILE *table, const char *name, char *str,
> ident = mkident (name);
> fprintf (table, " { MN_%s, 0x%0*llx%s, %u,",
> ident, 2 * (int)length, opcode, end, i);
> +
> + j = strlen(ident);
> + /* All AVX512F based instructions are usable for AVX10.1 except
> + AVX512PF/ER/4FMAPS/4VNNIW/VP2INTERSECT. */
> + if (strstr (cpu_flags, "AVX512")
> + && !strstr (cpu_flags, "AVX512PF")
> + && !strstr (cpu_flags, "AVX512ER")
> + && !strstr (cpu_flags, "4FMAPS")
> + && !strstr (cpu_flags, "4VNNIW")
> + && !strstr (cpu_flags, "VP2INTERSECT"))
> + {
> + cpu_flags = concat (cpu_flags, "|AVX10_1", NULL);
> + k = 1;
> + }
> free (ident);
Unless you know for sure that there aren't going to be further AVX512
sub-features, this looks pretty fragile.
The doc also lists AVX10.1/256 as a possible mode (see e.g. table 1-3),
which isn't reflected throughout the patch at all.
Jan
next prev parent reply other threads:[~2023-07-27 11:23 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-27 7:15 Haochen Jiang
2023-07-27 11:23 ` Jan Beulich [this message]
2023-07-28 2:50 ` Jiang, Haochen
2023-07-28 6:53 ` Jan Beulich
2023-08-01 2:18 ` Jiang, Haochen
2023-08-01 6:49 ` Jan Beulich
2023-08-04 7:45 ` Jiang, Haochen
2023-08-04 7:57 ` Jan Beulich
2023-08-14 6:45 ` [PATCH v2] " Haochen Jiang
2023-08-14 8:19 ` Jan Beulich
2023-08-14 8:46 ` Jiang, Haochen
2023-08-14 10:33 ` Jan Beulich
2023-08-14 10:35 ` Jan Beulich
2023-08-15 8:32 ` Jiang, Haochen
2023-08-15 14:10 ` Jan Beulich
2023-08-16 8:21 ` Jiang, Haochen
2023-08-16 8:59 ` Jan Beulich
2023-08-17 9:08 ` Jan Beulich
2023-08-18 6:53 ` Jan Beulich
2023-08-18 13:03 ` Jan Beulich
2023-08-23 2:20 ` Jiang, Haochen
2023-08-23 3:34 ` [RFC][PATCH v3] " Haochen Jiang
2023-08-23 7:17 ` Jan Beulich
2023-08-23 5:54 ` [PATCH v2] " Jan Beulich
2023-08-23 6:21 ` Jiang, Haochen
2023-08-23 6:24 ` Jan Beulich
2023-08-23 6:25 ` Jiang, Haochen
2023-08-23 6:39 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9f3625f9-82b6-3fd4-fd9e-f7c79b10f4b3@suse.com \
--to=jbeulich@suse.com \
--cc=binutils@sourceware.org \
--cc=haochen.jiang@intel.com \
--cc=hjl.tools@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).