> On 19.09.2023 17:25, Cui, Lili wrote:

> > --- a/gas/config/tc-i386.c

> > +++ b/gas/config/tc-i386.c

> > @@ -1945,6 +1945,30 @@ cpu_flags_match (const insn_template *t)

> >                         && (!x.bitfield.cpuvpclmulqdq ||

> cpu.bitfield.cpuvpclmulqdq))

> >                       match |= CPU_FLAGS_ARCH_MATCH;

> >             }

> > +        else if (x.bitfield.cpuapx_f)

> > +          {

> > +            if (cpu.bitfield.cpuapx_f

> > +                      && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe)

> > +                      && (!x.bitfield.cpuept || cpu.bitfield.cpuept)

> > +                      && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid)

> > +                      && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2)

> > +                      && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2)

> > +                      && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi)

> > +                      && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx)

> > +                      && (!x.bitfield.cpusha || cpu.bitfield.cpusha)

> > +                      && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw)

> > +                      && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq)

> > +                      && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f)

> > +                      && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk)

> > +                      && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b)

> > +                      && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri)

> > +                      && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd)

> > +                      && (!x.bitfield.cpukl || cpu.bitfield.cpukl)

> > +                      && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl)

> > +                      && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd)

> > +                      && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int))

> > +                    match |= CPU_FLAGS_ARCH_MATCH;

> > +          }

> >           else

> >             match |= CPU_FLAGS_ARCH_MATCH;

> >

>

> This is getting unwieldy, so I think we will need to think of a better way of

> expressing both "multiple ISAs need to be enabled" and "one of a set of ISAs

> needs to be enabled". It's only the mix of these expressed in a uniform way in

> the insn table that requires these extra conditionals. With the size of

> i386_cpu_attr greatly shrunk as of recently, I wonder if we couldn't simply add

> a 2nd instance of it to insn_template. One would be "all of these are required",

> while the other would be "any one of these is sufficient".

>



I didn't find a better way to distinguish these two types of requirements in insn_template.

I wrote a new function "cpu_flags_not_or_check" to replace these clumsy judgments.



static INLINE int

cpu_flags_not_or_check (const union i386_cpu_flags *x,

                        const union i386_cpu_flags *y)

{

  switch (ARRAY_SIZE(x->array))

    {

    case 5:

      if ((~x->array[4] | y->array[4]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 4:

      if ((~x->array[3] | y->array[3]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 3:

      if ((~x->array[2] | y->array[2]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 2:

      if ((~x->array[1] | y->array[1]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 1:

     return ((~x->array[1] | y->array[1]) == 0Xffffffff);

      break;

    default:

      abort ();

    }

}



> > @@ -3850,7 +3874,10 @@ is_any_vex_encoding (const insn_template *t)

> > static INLINE bool  is_any_apx_encoding (void)  {

> > -  return i.rex2 || i.rex2_encoding;

> > +  return i.rex2

> > +    || i.rex2_encoding

> > +    || (i.vex.register_specifier

> > +      && i.vex.register_specifier->reg_flags & RegRex2);

>

> Nit: For readability as well as for consistency this wants indenting

> differently:

>

>   return i.rex2

>             || i.rex2_encoding

>             || (i.vex.register_specifier

>                 && i.vex.register_specifier->reg_flags & RegRex2);

>

> or possibly (slightly shorter)

>

>   return i.rex2 || i.rex2_encoding

>             || (i.vex.register_specifier

>                 && i.vex.register_specifier->reg_flags & RegRex2);

>

> In any event you want to avoid trailing blanks on any line.

>



Done.



> > @@ -3859,6 +3886,12 @@ is_any_apx_rex2_encoding (void)

> >    return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;  }

> >

> > +static INLINE bool

> > +is_any_apx_evex_encoding (void)

> > +{

> > +  return i.rex2 && i.vex.length == 4; }

>

> This doesn't feel right: {evex} use would demand this encoding even if

> i.rex2 is still zero.

>

> Also - what is "any" in the name (also of the earlier predicate) intending to

> express? is_any_vex_encoding() is named the way it is because it covers both

> VEX and EVEX.

>



Yes, you are right, I found  this feature is redundant. It is only used in output_insn, see below, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template), I removed this function.



if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding ())



> > @@ -4129,6 +4162,50 @@ build_rex2_prefix (void)

> >                           | (i.rex2 << 4) | i.rex);

> >  }

> >

> > +/* Build the EVEX prefix (4-byte) for evex insn

> > +   | 62h |

> > +   | `R`X`B`R' | B'mmm |

> > +   | W | v`v`v`v | `x' | pp |

> > +   | z| L'L | b | `v | aaa |

> > +*/

> > +static void

> > +build_evex_insns_with_extend_evex_prefix (void)

>

> The name is somewhat odd and doesn't fit that of other similar functions.

> In particular this function doesn't build an entire insn, but still just the prefix.

> So perhaps build_apx_evex_prefix()?

>



It is better, replaced.



> > +{

> > +  build_evex_prefix ();

> > +  if (i.rex2 & REX_R)

> > +    &= 0xef;

> > +  if (i.vex.register_specifier

> > +      && register_number (i.vex.register_specifier) > 0xf)

> > +    i.vex.bytes[3] &=0xf7;

>

> Nit: Missing blank.

>

> But: Is this needed? Doesn't build_evex_prefix() fill this bit already, which isn't

> new in APX?

>



V4 is used for vector register In build_evex_prefix(), we need to update V4 with GPR32.



      /* The upper 16 registers are encoded in the fourth byte of the

         EVEX prefix.  */

      if (!(i.vex.register_specifier->reg_flags & RegVRex))

        i.vex.bytes[3] = 0x8;



> > +  if (i.rex2 & REX_B)

> > +    i.vex.bytes[1] |= 0x08;

> > +  if (i.rex2 & REX_X)

> > +    i.vex.bytes[2] &= 0xfb;

> > +}

> > +

> > +/* Build the EVEX prefix (4-byte) for legacy insn

> > +   | 62h |

> > +   | `R`X`B`R' | B'100 |

> > +   | W | v`v`v`v | `x' | pp |

> > +   | 000 | ND | `v | NF | 00 |

> > +   For legacy insn without ndd nor nf, [vvvvv] must be all zero.  */

> > +static void build_legacy_insns_with_apx_encoding (void)

>

> As per above, maybe build_extended_evex_prefix()? Or, ...

>

> > +{

> > +  /* map{0,1} of legacy space without ndd or nf could use rex2

> > +prefix.  */

> > +  if (i.tm.opcode_space <= SPACE_0F

> > +      && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)

> > +    return build_rex2_prefix ();

>

> ... because of this, build_apx_prefix()? Yet I think the call to this function might

> better remain in the caller.

>



I deleted this function, since we handle rex2 following rex, and handle others following VEX and EVEX.



> > +  if (i.prefix[DATA_PREFIX] != 0)

> > +    {

> > +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;

> > +      i.prefix[DATA_PREFIX] = 0;

> > +    }

>

> While this looks to be correct for the case when the prefix was derived from an

> insn template and the use of 16-bit operands, I don't think it is uniformly

> correct when "data16" was used as a prefix explicitly. In such a case either

> REX2 encoding needs to be used, or an error needs emitting.

>

> You may further want to assert that i.tm.opcode_modifier.opcodeprefix is still

> zero ahead of the assignment.

>



For REX2 encoding, we add no special handling, just follow REX.

For EVEX-promoted encoding, such as “data16 aand   %r25d,0x123(%r31,%rax,4)”, the following existing code will report an error.



  if (is_any_vex_encoding (&i.tm)

      || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX

      || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)

    {

      /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */

      if (i.prefix[DATA_PREFIX])

        {

          as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));

          return;

        }



> > @@ -10057,7 +10136,7 @@ output_insn (void)

> >

> >        /* Since the VEX/EVEX prefix contains the implicit prefix, we

> >          don't need the explicit prefix.  */

> > -      if (!is_any_vex_encoding (&i.tm))

> > +      if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding

> > + ())

> >         {

> >           switch (i.tm.opcode_modifier.opcodeprefix)

>

> I'm not convinced the use of this predicate is appropriate here. I'd generally

> have expected is_any_vex_encoding() to be extended to also detect all cases

> of EVEX encodings in APX.



Removed this function, as mentioned before, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template).



>

> > --- a/opcodes/i386-dis-evex-len.h

> > +++ b/opcodes/i386-dis-evex-len.h

>

> As for the earlier patch, I'll look at the disassembler changes separately.

>

> > @@ -1121,6 +1122,15 @@ process_i386_opcode_modifier (FILE *table,

> char *mod, unsigned int space,

> >         fprintf (stderr,

> >                        "%s: %d: W modifier without Word/Dword/Qword

> operand(s)\n",

> >                        filename, lineno);

> > +      if (modifiers[Vex].value

> > +        || (space > SPACE_0F

> > +            && !(space == SPACE_EVEXMAP4

> > +                       || modifiers[EVex].value

> > +                       || modifiers[Disp8MemShift].value

> > +                       || modifiers[Broadcast].value

> > +                       || modifiers[Masking].value

> > +                       || modifiers[SAE].value)))

>

> First of all, this wants simplifying to

>

>       if (modifiers[Vex].value

>              || (space > SPACE_0F

>                  && space != SPACE_EVEXMAP4

>                  && !modifiers[EVex].value

>                  && !modifiers[Disp8MemShift].value

>                  && !modifiers[Broadcast].value

>                  && !modifiers[Masking].value

>                  && !modifiers[SAE].value))

>

> which helps readability and makes more obvious that this parallels tc-

> i386.c:is_evex_encoding(). Such a connection, where updates need to be

> made in sync, needs pointing out in code comments at both sites.

>



Done.



> Yet of course this condition won't hold anymore for combined VEX/EVEX

> templates.

>



I rebased master and as you predicted this doesn't work, one entry contains both VEX and EVEX, VEX requires No_egpr=1 and EVEX requires No_egpr=0,

Finally I chose to add "No_egpr=1" for it. And added the following judgment in check_EgprOperands.



check_EgprOperands (const insn_template *t)

{

-  if (t->opcode_modifier.noegpr)

if (t->opcode_modifier.noegpr && !need_evex_encoding())



> > +      modifiers[No_egpr].value = 1;

> >      }

>

> And then - shouldn't at least part of this already be put in place in patch 1?

>



Done in patch 1.



> Finally, to avoid the split between where this attribute gets set, wouldn't it be

> possible to also handle the XSAVE/XRSTOR variants here rather than directly in

> the opcode table?

>



Done in patch 1.



> > @@ -187,6 +188,7 @@ mov, 0xf24, i386|No64,

> > D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te

> >

> >  // Move after swapping the bytes

> >  movbe, 0x0f38f0, Movbe,

> D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, {

> > Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

> > +movbe, 0x60, Movbe|APX_F|x64,

> > +D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVex128|EVexMap4, {

> > +Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,

> > +Reg16|Reg32|Reg64 }

>

> In new code please omit redundant Word, Dword, and alike.

>



Done.



> I further wonder if it wouldn't help if i386-gen inserted the x64 for all APX

> templates, rather than open-coding that on every single template.

> Or alternatively put

>

> #define APX_F APX_F|x64

>

> earlier in the file.

>



Done.



> > @@ -300,6 +302,9 @@ sbb, 0x18, 0,

> >

> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInd

> ex }

> > +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, {

> >

> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIn

> dex }

>

> Looking at just the additions up to here, I'm getting the impression that in this

> patch - despite its title - you only add non-ND, non-NF insn forms for

> previously non-VEX-encoded insns. This could do with clarifying, by both

> making the title more concise and by stating the exact scope of the work done

> in the description.

>



Done.



> > @@ -1312,13 +1330,16 @@ getsec, 0xf37, SMX, NoSuf, {}

> >

> >  invept, 0x660f3880, EPT|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invept, 0x660f3880, EPT|x64,

> > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }

> > +invept, 0xf3f0, APX_F|EPT|x64,

> Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> > +{ Oword|Unspecified|BaseIndex, Reg64 }

> >  invvpid, 0x660f3881, EPT|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invvpid, 0x660f3881, EPT|x64,

> > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }

> > +invvpid, 0xf3f1, APX_F|EPT|x64,

> Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> > +{ Oword|Unspecified|BaseIndex, Reg64 }

> >

> >  // INVPCID instruction

> >

> >  invpcid, 0x660f3882, INVPCID|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invpcid, 0x660f3882,

> > INVPCID|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex,

> Reg64

> > }

> > +invpcid, 0xf3f2, APX_F|INVPCID|x64,

> > +Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> { Oword|Unspecified|BaseIndex,

> > +Reg64 }

>

> I don't think NoRex64 belongs in any EVEX template.

>



Removed it from APX_F EVEX template.



> >  crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, {

> > Reg8|Reg64|Unspecified|BaseIndex, Reg64 }

> > +crc32, 0xf0, APX_F|x64,

> > +W|Modrm|No_wSuf|No_lSuf|No_sSuf|EVex128|EVexMap4, {

> > +Reg8|Reg64|Unspecified|BaseIndex, Reg64 }

>

> There's quite a bit of logic in tc-i386.c to get CRC32 right. I wonder if you can

> really get away without adjusting that logic to also take effect on the EVEX

> encodings.

>



Thanks for reminding, checked crc32 logic in tc-i386.c, it mainly focuses on base_opcode and i.rex W bit, for base_opcode part we can , for i.rex.w bit, we need apply it to evex.w bit,

For crc32 case we use else and APX_F only supports 64 bit mode, it can make sure we use i.rex & REX_W for evex.w.  and added some new test case to make sure encode is right.



Thanks for the reminder. I checked the crc32 logic in tc-i386.c, mainly focusing on the base_opcode and i.rex W bits. For the base_opcode part, we can inherit it. For the i.rex.w bit, we need to apply it to the evex.w bit. ,

For the following code in build_evex_prefix(), crc32 takes the else branch and APX_F only supports 64 bit mode, it can ensures we use i.rex.w for evex.w.  And added some new test cases to ensure correct encoding.



  /* Check the REX.W bit and VEXW.  */

  if (i.tm.opcode_modifier.vexw == VEXWIG)

    w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;

  else if (i.tm.opcode_modifier.vexw)

    w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;

  else

    w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;



+      crc32q  %r31, %r22

+      crc32q  (%r31), %r22

+      crc32b  %r19b, %r17

+      crc32b  %r19b, %r21d

+      crc32b (%r19),%ebx

+      crc32l  %r31d, %r23d

+      crc32l  (%r31), %r23d

+      crc32w  %r31w, %r21d

+      crc32w (%r31),%r21d

+      crc32   %rax, %r18

> > @@ -3408,3 +3487,4 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}  eretu,

> > 0xf30f01ca, FRED|x64, NoSuf, {}

> >

> >  // FRED instructions end.

> > +

>

> Nit: Stray change.



Done.



Thanks,

Lili.