Re: [PATCH] Support Intel AVX10.1

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

From: Jan Beulich <jbeulich@suse.com>
To: Haochen Jiang <haochen.jiang@intel.com>
Cc: hjl.tools@gmail.com, binutils@sourceware.org
Subject: Re: [PATCH] Support Intel AVX10.1
Date: Thu, 27 Jul 2023 13:23:09 +0200	[thread overview]
Message-ID: <9f3625f9-82b6-3fd4-fd9e-f7c79b10f4b3@suse.com> (raw)
In-Reply-To: <20230727071550.1814187-1-haochen.jiang@intel.com>

On 27.07.2023 09:15, Haochen Jiang wrote:
> @@ -1845,7 +1846,9 @@ cpu_flags_match (const insn_template *t)
>        i386_cpu_flags cpu = cpu_arch_flags;
>  
>        /* AVX512VL is no standalone feature - match it and then strip it.  */
> -      if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
> +      if (x.bitfield.cpuavx512vl
> +	  && !cpu.bitfield.cpuavx512vl
> +	  && !cpu.bitfield.cpuavx10_1)
>  	return match;
>        x.bitfield.cpuavx512vl = 0;

I _think_ the code change is correct, but the comment needs updating
(then also clarifying what the intention here is).

> @@ -6382,7 +6386,10 @@ check_VecOperands (const insn_template *t)
>    cpu = cpu_flags_and (t->cpu_flags, avx512);
>    if (!cpu_flags_all_zero (&cpu)
>        && !t->cpu_flags.bitfield.cpuavx512vl
> -      && !cpu_arch_flags.bitfield.cpuavx512vl)
> +      && !cpu_arch_flags.bitfield.cpuavx512vl
> +      && (!t->cpu_flags.bitfield.cpuavx10_1
> +	  || (t->cpu_flags.bitfield.cpuavx10_1
> +	      && !cpu_arch_flags.bitfield.cpuavx10_1)))

This first of all can be simplified to

  if (!cpu_flags_all_zero (&cpu)
      && !t->cpu_flags.bitfield.cpuavx512vl
      && !cpu_arch_flags.bitfield.cpuavx512vl
      && (!t->cpu_flags.bitfield.cpuavx10_1
	  || !cpu_arch_flags.bitfield.cpuavx10_1))

which doesn't look quite right. But of course the two features also
aren't symmetric, so I may well be wrong. First of all the remark at
the very bottom of this mail needs resolving, though. Also for ...

> @@ -13794,7 +13801,8 @@ static bool check_register (const reg_entry *r)
>    if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
>      return false;
>  
> -  if (!cpu_arch_flags.bitfield.cpuavx512f)
> +  if (!cpu_arch_flags.bitfield.cpuavx512f
> +      && !cpu_arch_flags.bitfield.cpuavx10_1)
>      {
>        if (r->reg_type.bitfield.zmmword
>  	  || r->reg_type.bitfield.class == RegMask)
> @@ -13826,7 +13834,8 @@ static bool check_register (const reg_entry *r)
>       mode, and require EVEX encoding.  */
>    if (r->reg_flags & RegVRex)
>      {
> -      if (!cpu_arch_flags.bitfield.cpuavx512f
> +      if ((!cpu_arch_flags.bitfield.cpuavx512f
> +	   && !cpu_arch_flags.bitfield.cpuavx10_1)
>  	  || flag_code != CODE_64BIT)
>  	return false;

... the changes to make here.

> --- a/gas/testsuite/gas/i386/i386.exp
> +++ b/gas/testsuite/gas/i386/i386.exp
> @@ -506,6 +506,7 @@ if [gas_32_check] then {
>      run_dump_test "sm4"
>      run_dump_test "sm4-intel"
>      run_list_test "pbndkb-inval"
> +    run_list_test "avx10_1-inval"
>      run_list_test "sg"
>      run_dump_test "clzero"
>      run_dump_test "invlpgb"

Only an inval test? I'm inclined to say you either want both here, or
leave to just the 64-bit testing.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-avx10_1.s
> @@ -0,0 +1,97 @@
> +# Check AVX10.1 instructions
> +
> +	.text
> +_start:
> +	.arch .noavx512f

This implies ...

> +	kaddd	%k1, %k2, %k3
> +	kaddb	%k1, %k2, %k3
> +	kaddw	%k1, %k2, %k3
> +	kaddq	%k1, %k2, %k3
> +	kmovb   (%ecx), %k5
> +	kmovb   %k5, -123456(%esp,%esi,8)
> +	kmovd   -123456(%esp,%esi,8), %k5
> +	kmovd   %ebp, %k5
> +	kmovw   %k5, (%ecx)
> +	kmovw   %k5, %ebp
> +	vaddpd  %xmm4, %xmm5, %xmm6{%k7}
> +	vaddpd  (%ecx), %xmm5, %xmm6{%k7}
> +	vaddpd  (%eax){1to2}, %xmm5, %xmm6{%k7}
> +	vaddpd  2048(%edx), %xmm5, %xmm6{%k7}
> +	vaddpd  -2064(%edx), %xmm5, %xmm6{%k7}
> +	vaddpd  1024(%edx){1to2}, %xmm5, %xmm6{%k7}
> +	vaddpd  -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
> +	vaddpd  %zmm4, %zmm5, %zmm6{%k7}{z}
> +	vaddpd  -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> +	vaddpd  8192(%edx), %zmm5, %zmm6{%k7}
> +	vaddpd  -4096(%edx), %ymm5, %ymm6{%k7}
> +	vaddpd  1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> +	vaddpd  -2048(%edx){1to8}, %zmm5, %zmm6{%k7}
> +	vgf2p8affineqb	$0xab, %xmm4, %xmm5, %xmm6{%k7}
> +	vgf2p8affineqb	$123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> +	vgf2p8affineqb	$123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> +	vgf2p8affineinvqb	$123, 2032(%edx), %xmm5, %xmm6{%k7}
> +	vgf2p8affineinvqb	$0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
> +	vgf2p8mulb	%zmm4, %zmm5, %zmm6{%k7}
> +	vgf2p8mulb	-123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
> +	vgf2p8mulb	8192(%edx), %zmm5, %zmm6{%k7}
> +	vaesenc	%ymm24, %ymm26, %ymm22
> +	vaesdec	-123456(%esp,%esi,8), %xmm15, %xmm16
> +	vaesenclast	%xmm24, %xmm26, %xmm27
> +	vaesdeclast     4064(%edx), %ymm25, %ymm26
> +	vaesdec		%zmm24, %zmm26, %zmm22
> +	vaesdeclast	(%ecx), %zmm26, %zmm27
> +	vpclmulqdq	$0xab, %xmm22, %xmm22, %xmm23
> +	vpclmulqdq	$123, 2032(%edx), %xmm22, %xmm23
> +	vpclmulqdq	$123, -123456(%esp,%esi,8), %ymm16, %ymm14
> +	vpclmulhqhqdq	%xmm22, %xmm23, %xmm24
> +	vpclmullqhqdq	%xmm14, %xmm15, %xmm16
> +	vpclmulhqlqdq	%ymm22, %ymm23, %ymm24
> +	vpclmullqlqdq	%zmm14, %zmm15, %zmm16
> +
> +	.arch .noavx512vl

... this, so for the test to be useful I think the two parts of the
test need to be swapped.

> +	kaddd	%k1, %k2, %k3
> +	kaddb	%k1, %k2, %k3
> +	kaddw	%k1, %k2, %k3
> +	kaddq	%k1, %k2, %k3
> +	kmovb   (%ecx), %k5
> +	kmovb   %k5, -123456(%esp,%esi,8)
> +	kmovd   -123456(%esp,%esi,8), %k5
> +	kmovd   %ebp, %k5
> +	kmovw   %k5, (%ecx)
> +	kmovw   %k5, %ebp

There's also little point in having these twice. Having them once in
the more restricted case (noavx512f) ought to suffice.

> --- a/gas/testsuite/gas/i386/xmmhi32.s
> +++ b/gas/testsuite/gas/i386/xmmhi32.s
> @@ -26,6 +26,7 @@ xmm:
>  	vmovdqa	ymm24, ymm0
>  
>  	.arch .noavx512f
> +	.arch .noavx10.1
>  	vaddps	xmm0, xmm1, xmm8
>  	vaddps	xmm0, xmm1, xmm16
>  	vaddps	xmm0, xmm1, xmm24

This (and alike) addition(s) point out another issue: People may be
using .noavx512{f,vl} to make sure they'll know if they wrongly use
certain insns. That protection becomes void with the additions as
you presently make them. This also relates to the first comment below
on i386-gen.c.

> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -168,6 +168,8 @@ static const dependency isa_dependencies[] =
>      "AVX2" },
>    { "FRED",
>      "LKGS" },
> +  { "AVX10_1",
> +    "AVX2" },

This can't be quite right (as in: is insufficient): There's no
restriction to the low 16 XMM/YMM registers in AVX10.1, so some of
AVX512 is also a prereq.

To also address the earlier comment, maybe we need an artificial (i.e.
not user selectable) feature underlying both AVX10 and AVX512? (But I
haven't properly thought this through, so there may be issues with
such an approach as well.)

> @@ -1217,7 +1220,7 @@ static void
>  output_i386_opcode (FILE *table, const char *name, char *str,
>  		    char *last, int lineno)
>  {
> -  unsigned int i, length, prefix = 0, space = 0;
> +  unsigned int i, j, length, prefix = 0, space = 0, k = 0;
>    char *base_opcode, *extension_opcode, *end, *ident;
>    char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
>    unsigned long long opcode;
> @@ -1315,6 +1318,20 @@ output_i386_opcode (FILE *table, const char *name, char *str,
>    ident = mkident (name);
>    fprintf (table, "  { MN_%s, 0x%0*llx%s, %u,",
>  	   ident, 2 * (int)length, opcode, end, i);
> +
> +  j = strlen(ident);
> +  /* All AVX512F based instructions are usable for AVX10.1 except
> +     AVX512PF/ER/4FMAPS/4VNNIW/VP2INTERSECT.  */
> +  if (strstr (cpu_flags, "AVX512")
> +      && !strstr (cpu_flags, "AVX512PF")
> +      && !strstr (cpu_flags, "AVX512ER")
> +      && !strstr (cpu_flags, "4FMAPS")
> +      && !strstr (cpu_flags, "4VNNIW")
> +      && !strstr (cpu_flags, "VP2INTERSECT"))
> +    {
> +      cpu_flags = concat (cpu_flags, "|AVX10_1", NULL);
> +      k = 1;
> +    }
>    free (ident);

Unless you know for sure that there aren't going to be further AVX512
sub-features, this looks pretty fragile.

The doc also lists AVX10.1/256 as a possible mode (see e.g. table 1-3),
which isn't reflected throughout the patch at all.

Jan

next prev parent reply	other threads:[~2023-07-27 11:23 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-27  7:15 Haochen Jiang
2023-07-27 11:23 ` Jan Beulich [this message]
2023-07-28  2:50   ` Jiang, Haochen
2023-07-28  6:53 ` Jan Beulich
2023-08-01  2:18   ` Jiang, Haochen
2023-08-01  6:49     ` Jan Beulich
2023-08-04  7:45       ` Jiang, Haochen
2023-08-04  7:57         ` Jan Beulich
2023-08-14  6:45           ` [PATCH v2] " Haochen Jiang
2023-08-14  8:19             ` Jan Beulich
2023-08-14  8:46               ` Jiang, Haochen
2023-08-14 10:33                 ` Jan Beulich
2023-08-14 10:35                   ` Jan Beulich
2023-08-15  8:32                   ` Jiang, Haochen
2023-08-15 14:10                     ` Jan Beulich
2023-08-16  8:21                       ` Jiang, Haochen
2023-08-16  8:59                         ` Jan Beulich
2023-08-17  9:08                           ` Jan Beulich
2023-08-18  6:53                             ` Jan Beulich
2023-08-18 13:03             ` Jan Beulich
2023-08-23  2:20               ` Jiang, Haochen
2023-08-23  3:34                 ` [RFC][PATCH v3] " Haochen Jiang
2023-08-23  7:17                   ` Jan Beulich
2023-08-23  5:54                 ` [PATCH v2] " Jan Beulich
2023-08-23  6:21                   ` Jiang, Haochen
2023-08-23  6:24                     ` Jan Beulich
2023-08-23  6:25                       ` Jiang, Haochen
2023-08-23  6:39                         ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9f3625f9-82b6-3fd4-fd9e-f7c79b10f4b3@suse.com \
    --to=jbeulich@suse.com \
    --cc=binutils@sourceware.org \
    --cc=haochen.jiang@intel.com \
    --cc=hjl.tools@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).