> On 19.09.2023 17:25, Cui, Lili wrote: > > --- a/gas/config/tc-i386.c > > +++ b/gas/config/tc-i386.c > > @@ -1945,6 +1945,30 @@ cpu_flags_match (const insn_template *t) > > && (!x.bitfield.cpuvpclmulqdq || > cpu.bitfield.cpuvpclmulqdq)) > > match |= CPU_FLAGS_ARCH_MATCH; > > } > > + else if (x.bitfield.cpuapx_f) > > + { > > + if (cpu.bitfield.cpuapx_f > > + && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe) > > + && (!x.bitfield.cpuept || cpu.bitfield.cpuept) > > + && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid) > > + && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2) > > + && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2) > > + && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi) > > + && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx) > > + && (!x.bitfield.cpusha || cpu.bitfield.cpusha) > > + && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw) > > + && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq) > > + && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f) > > + && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk) > > + && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b) > > + && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri) > > + && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd) > > + && (!x.bitfield.cpukl || cpu.bitfield.cpukl) > > + && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl) > > + && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd) > > + && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int)) > > + match |= CPU_FLAGS_ARCH_MATCH; > > + } > > else > > match |= CPU_FLAGS_ARCH_MATCH; > > > > This is getting unwieldy, so I think we will need to think of a better way of > expressing both "multiple ISAs need to be enabled" and "one of a set of ISAs > needs to be enabled". It's only the mix of these expressed in a uniform way in > the insn table that requires these extra conditionals. With the size of > i386_cpu_attr greatly shrunk as of recently, I wonder if we couldn't simply add > a 2nd instance of it to insn_template. One would be "all of these are required", > while the other would be "any one of these is sufficient". > I didn't find a better way to distinguish these two types of requirements in insn_template. I wrote a new function "cpu_flags_not_or_check" to replace these clumsy judgments. static INLINE int cpu_flags_not_or_check (const union i386_cpu_flags *x, const union i386_cpu_flags *y) { switch (ARRAY_SIZE(x->array)) { case 5: if ((~x->array[4] | y->array[4]) != 0xffffffff) return 0; /* Fall through. */ case 4: if ((~x->array[3] | y->array[3]) != 0xffffffff) return 0; /* Fall through. */ case 3: if ((~x->array[2] | y->array[2]) != 0xffffffff) return 0; /* Fall through. */ case 2: if ((~x->array[1] | y->array[1]) != 0xffffffff) return 0; /* Fall through. */ case 1: return ((~x->array[1] | y->array[1]) == 0Xffffffff); break; default: abort (); } } > > @@ -3850,7 +3874,10 @@ is_any_vex_encoding (const insn_template *t) > > static INLINE bool is_any_apx_encoding (void) { > > - return i.rex2 || i.rex2_encoding; > > + return i.rex2 > > + || i.rex2_encoding > > + || (i.vex.register_specifier > > + && i.vex.register_specifier->reg_flags & RegRex2); > > Nit: For readability as well as for consistency this wants indenting > differently: > > return i.rex2 > || i.rex2_encoding > || (i.vex.register_specifier > && i.vex.register_specifier->reg_flags & RegRex2); > > or possibly (slightly shorter) > > return i.rex2 || i.rex2_encoding > || (i.vex.register_specifier > && i.vex.register_specifier->reg_flags & RegRex2); > > In any event you want to avoid trailing blanks on any line. > Done. > > @@ -3859,6 +3886,12 @@ is_any_apx_rex2_encoding (void) > > return (i.rex2 && i.vex.length == 2) || i.rex2_encoding; } > > > > +static INLINE bool > > +is_any_apx_evex_encoding (void) > > +{ > > + return i.rex2 && i.vex.length == 4; } > > This doesn't feel right: {evex} use would demand this encoding even if > i.rex2 is still zero. > > Also - what is "any" in the name (also of the earlier predicate) intending to > express? is_any_vex_encoding() is named the way it is because it covers both > VEX and EVEX. > Yes, you are right, I found this feature is redundant. It is only used in output_insn, see below, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template), I removed this function. if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding ()) > > @@ -4129,6 +4162,50 @@ build_rex2_prefix (void) > > | (i.rex2 << 4) | i.rex); > > } > > > > +/* Build the EVEX prefix (4-byte) for evex insn > > + | 62h | > > + | `R`X`B`R' | B'mmm | > > + | W | v`v`v`v | `x' | pp | > > + | z| L'L | b | `v | aaa | > > +*/ > > +static void > > +build_evex_insns_with_extend_evex_prefix (void) > > The name is somewhat odd and doesn't fit that of other similar functions. > In particular this function doesn't build an entire insn, but still just the prefix. > So perhaps build_apx_evex_prefix()? > It is better, replaced. > > +{ > > + build_evex_prefix (); > > + if (i.rex2 & REX_R) > > + &= 0xef; > > + if (i.vex.register_specifier > > + && register_number (i.vex.register_specifier) > 0xf) > > + i.vex.bytes[3] &=0xf7; > > Nit: Missing blank. > > But: Is this needed? Doesn't build_evex_prefix() fill this bit already, which isn't > new in APX? > V4 is used for vector register In build_evex_prefix(), we need to update V4 with GPR32. /* The upper 16 registers are encoded in the fourth byte of the EVEX prefix. */ if (!(i.vex.register_specifier->reg_flags & RegVRex)) i.vex.bytes[3] = 0x8; > > + if (i.rex2 & REX_B) > > + i.vex.bytes[1] |= 0x08; > > + if (i.rex2 & REX_X) > > + i.vex.bytes[2] &= 0xfb; > > +} > > + > > +/* Build the EVEX prefix (4-byte) for legacy insn > > + | 62h | > > + | `R`X`B`R' | B'100 | > > + | W | v`v`v`v | `x' | pp | > > + | 000 | ND | `v | NF | 00 | > > + For legacy insn without ndd nor nf, [vvvvv] must be all zero. */ > > +static void build_legacy_insns_with_apx_encoding (void) > > As per above, maybe build_extended_evex_prefix()? Or, ... > > > +{ > > + /* map{0,1} of legacy space without ndd or nf could use rex2 > > +prefix. */ > > + if (i.tm.opcode_space <= SPACE_0F > > + && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper) > > + return build_rex2_prefix (); > > ... because of this, build_apx_prefix()? Yet I think the call to this function might > better remain in the caller. > I deleted this function, since we handle rex2 following rex, and handle others following VEX and EVEX. > > + if (i.prefix[DATA_PREFIX] != 0) > > + { > > + i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66; > > + i.prefix[DATA_PREFIX] = 0; > > + } > > While this looks to be correct for the case when the prefix was derived from an > insn template and the use of 16-bit operands, I don't think it is uniformly > correct when "data16" was used as a prefix explicitly. In such a case either > REX2 encoding needs to be used, or an error needs emitting. > > You may further want to assert that i.tm.opcode_modifier.opcodeprefix is still > zero ahead of the assignment. > For REX2 encoding, we add no special handling, just follow REX. For EVEX-promoted encoding, such as “data16 aand %r25d,0x123(%r31,%rax,4)”, the following existing code will report an error. if (is_any_vex_encoding (&i.tm) || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX) { /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns. */ if (i.prefix[DATA_PREFIX]) { as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm)); return; } > > @@ -10057,7 +10136,7 @@ output_insn (void) > > > > /* Since the VEX/EVEX prefix contains the implicit prefix, we > > don't need the explicit prefix. */ > > - if (!is_any_vex_encoding (&i.tm)) > > + if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding > > + ()) > > { > > switch (i.tm.opcode_modifier.opcodeprefix) > > I'm not convinced the use of this predicate is appropriate here. I'd generally > have expected is_any_vex_encoding() to be extended to also detect all cases > of EVEX encodings in APX. Removed this function, as mentioned before, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template). > > > --- a/opcodes/i386-dis-evex-len.h > > +++ b/opcodes/i386-dis-evex-len.h > > As for the earlier patch, I'll look at the disassembler changes separately. > > > @@ -1121,6 +1122,15 @@ process_i386_opcode_modifier (FILE *table, > char *mod, unsigned int space, > > fprintf (stderr, > > "%s: %d: W modifier without Word/Dword/Qword > operand(s)\n", > > filename, lineno); > > + if (modifiers[Vex].value > > + || (space > SPACE_0F > > + && !(space == SPACE_EVEXMAP4 > > + || modifiers[EVex].value > > + || modifiers[Disp8MemShift].value > > + || modifiers[Broadcast].value > > + || modifiers[Masking].value > > + || modifiers[SAE].value))) > > First of all, this wants simplifying to > > if (modifiers[Vex].value > || (space > SPACE_0F > && space != SPACE_EVEXMAP4 > && !modifiers[EVex].value > && !modifiers[Disp8MemShift].value > && !modifiers[Broadcast].value > && !modifiers[Masking].value > && !modifiers[SAE].value)) > > which helps readability and makes more obvious that this parallels tc- > i386.c:is_evex_encoding(). Such a connection, where updates need to be > made in sync, needs pointing out in code comments at both sites. > Done. > Yet of course this condition won't hold anymore for combined VEX/EVEX > templates. > I rebased master and as you predicted this doesn't work, one entry contains both VEX and EVEX, VEX requires No_egpr=1 and EVEX requires No_egpr=0, Finally I chose to add "No_egpr=1" for it. And added the following judgment in check_EgprOperands. check_EgprOperands (const insn_template *t) { - if (t->opcode_modifier.noegpr) if (t->opcode_modifier.noegpr && !need_evex_encoding()) > > + modifiers[No_egpr].value = 1; > > } > > And then - shouldn't at least part of this already be put in place in patch 1? > Done in patch 1. > Finally, to avoid the split between where this attribute gets set, wouldn't it be > possible to also handle the XSAVE/XRSTOR variants here rather than directly in > the opcode table? > Done in patch 1. > > @@ -187,6 +188,7 @@ mov, 0xf24, i386|No64, > > D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te > > > > // Move after swapping the bytes > > movbe, 0x0f38f0, Movbe, > D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { > > Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 } > > +movbe, 0x60, Movbe|APX_F|x64, > > +D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVex128|EVexMap4, { > > +Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, > > +Reg16|Reg32|Reg64 } > > In new code please omit redundant Word, Dword, and alike. > Done. > I further wonder if it wouldn't help if i386-gen inserted the x64 for all APX > templates, rather than open-coding that on every single template. > Or alternatively put > > #define APX_F APX_F|x64 > > earlier in the file. > Done. > > @@ -300,6 +302,9 @@ sbb, 0x18, 0, > > > Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInd > ex } > > +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { > > > +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIn > dex } > > Looking at just the additions up to here, I'm getting the impression that in this > patch - despite its title - you only add non-ND, non-NF insn forms for > previously non-VEX-encoded insns. This could do with clarifying, by both > making the title more concise and by stating the exact scope of the work done > in the description. > Done. > > @@ -1312,13 +1330,16 @@ getsec, 0xf37, SMX, NoSuf, {} > > > > invept, 0x660f3880, EPT|No64, Modrm|IgnoreSize|NoSuf, { > > Oword|Unspecified|BaseIndex, Reg32 } invept, 0x660f3880, EPT|x64, > > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 } > > +invept, 0xf3f0, APX_F|EPT|x64, > Modrm|NoSuf|NoRex64|EVex128|EVexMap4, > > +{ Oword|Unspecified|BaseIndex, Reg64 } > > invvpid, 0x660f3881, EPT|No64, Modrm|IgnoreSize|NoSuf, { > > Oword|Unspecified|BaseIndex, Reg32 } invvpid, 0x660f3881, EPT|x64, > > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 } > > +invvpid, 0xf3f1, APX_F|EPT|x64, > Modrm|NoSuf|NoRex64|EVex128|EVexMap4, > > +{ Oword|Unspecified|BaseIndex, Reg64 } > > > > // INVPCID instruction > > > > invpcid, 0x660f3882, INVPCID|No64, Modrm|IgnoreSize|NoSuf, { > > Oword|Unspecified|BaseIndex, Reg32 } invpcid, 0x660f3882, > > INVPCID|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, > Reg64 > > } > > +invpcid, 0xf3f2, APX_F|INVPCID|x64, > > +Modrm|NoSuf|NoRex64|EVex128|EVexMap4, > { Oword|Unspecified|BaseIndex, > > +Reg64 } > > I don't think NoRex64 belongs in any EVEX template. > Removed it from APX_F EVEX template. > > crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, { > > Reg8|Reg64|Unspecified|BaseIndex, Reg64 } > > +crc32, 0xf0, APX_F|x64, > > +W|Modrm|No_wSuf|No_lSuf|No_sSuf|EVex128|EVexMap4, { > > +Reg8|Reg64|Unspecified|BaseIndex, Reg64 } > > There's quite a bit of logic in tc-i386.c to get CRC32 right. I wonder if you can > really get away without adjusting that logic to also take effect on the EVEX > encodings. > Thanks for reminding, checked crc32 logic in tc-i386.c, it mainly focuses on base_opcode and i.rex W bit, for base_opcode part we can , for i.rex.w bit, we need apply it to evex.w bit, For crc32 case we use else and APX_F only supports 64 bit mode, it can make sure we use i.rex & REX_W for evex.w. and added some new test case to make sure encode is right. Thanks for the reminder. I checked the crc32 logic in tc-i386.c, mainly focusing on the base_opcode and i.rex W bits. For the base_opcode part, we can inherit it. For the i.rex.w bit, we need to apply it to the evex.w bit. , For the following code in build_evex_prefix(), crc32 takes the else branch and APX_F only supports 64 bit mode, it can ensures we use i.rex.w for evex.w. And added some new test cases to ensure correct encoding. /* Check the REX.W bit and VEXW. */ if (i.tm.opcode_modifier.vexw == VEXWIG) w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0; else if (i.tm.opcode_modifier.vexw) w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0; else w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0; + crc32q %r31, %r22 + crc32q (%r31), %r22 + crc32b %r19b, %r17 + crc32b %r19b, %r21d + crc32b (%r19),%ebx + crc32l %r31d, %r23d + crc32l (%r31), %r23d + crc32w %r31w, %r21d + crc32w (%r31),%r21d + crc32 %rax, %r18 > > @@ -3408,3 +3487,4 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {} eretu, > > 0xf30f01ca, FRED|x64, NoSuf, {} > > > > // FRED instructions end. > > + > > Nit: Stray change. Done. Thanks, Lili.