public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Enable gating instructions with FEAT_X || FEAT_Y
@ 2024-06-04 13:25 Andrew Carlotti
  2024-06-12 13:31 ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 3+ messages in thread
From: Andrew Carlotti @ 2024-06-04 13:25 UTC (permalink / raw)
  To: binutils; +Cc: Richard Earnshaw

Some aarch64 instructions can be enabled independently by two different
feature sets.  This patch extends aarch64_opcode->avariant to a
2-element array of feature sets.  Checks for opcode enablement now use a
new inline function that tests both feature sets.  These two feature
sets are identical for all currently supported instructions, so there is
no functional change.

This passes all existing tests, and is further tested via its usage in my
subsequent FP8 patches.  I've additionally tested the new behaviour by changing
one or both features sets in arch64_feature_dotprod, and verifying that
assembly only fails when both specied feature sets are not present.

Is this ok for master?


diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index fec17c40a43e60e294b6aab8976e5926a3c49230..c421bfdac1f423344f5ecdb998d1d0b8d24f3ae5 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -6567,10 +6567,10 @@ parse_operands (char *str, const aarch64_opcode *opcode)
   clear_error ();
   skip_whitespace (str);
 
-  if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SME2))
+  if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SME2))
     imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP_PN;
-  else if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
-	   || AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2))
+  else if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
+	   || AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2))
     imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP;
   else
     imm_reg_type = REG_TYPE_R_ZR_BHSDQ_V;
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index 8a21611e3ff59220ae5e25120c375ac2db111b77..9da73a932f7083b0e6d9964eda5e38124808f5da 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -1183,6 +1183,10 @@ typedef struct aarch64_instr_sequence aarch64_instr_sequence;
 /* Forward declare instruction definition.  */
 typedef struct aarch64_inst aarch64_inst;
 
+/* Maximum number of architecture feature sets that can independently enable
+   an instruction.  */
+#define AARCH64_MAX_AVARIANT_NUM 2
+
 /* This structure holds information for a particular opcode.  */
 
 struct aarch64_opcode
@@ -1207,7 +1211,7 @@ struct aarch64_opcode
   enum aarch64_op op;
 
   /* Which architecture variant provides this instruction.  */
-  const aarch64_feature_set *avariant;
+  const aarch64_feature_set (*avariant)[AARCH64_MAX_AVARIANT_NUM];
 
   /* An array of operand codes.  Each code is an index into the
      operand table.  They appear in the order which the operands must
@@ -1327,6 +1331,14 @@ extern const aarch64_opcode aarch64_opcode_table[];
 #define C_SCAN_MOPS_PME (3U << 2)
 /* Next bit is 4.  */
 
+static inline bool
+cpu_enables_opcode_p (const aarch64_feature_set cpu,
+		      const aarch64_opcode *opcode)
+{
+  return AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[0])
+	 || AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[1]);
+}
+
 static inline bool
 alias_opcode_p (const aarch64_opcode *opcode)
 {
diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
index 213df616608b3ea3159b4f6b92b65ea319999a0a..d92bc38ab6075a0bc989b576785a125ba67d696c 100644
--- a/opcodes/aarch64-dis.c
+++ b/opcodes/aarch64-dis.c
@@ -3315,7 +3315,7 @@ determine_disassembling_preference (struct aarch64_inst *inst,
 	  continue;
 	}
 
-      if (!AARCH64_CPU_HAS_ALL_FEATURES (arch_variant, *alias->avariant))
+      if (!cpu_enables_opcode_p(arch_variant, alias))
 	{
 	  DEBUG_TRACE ("skip %s: we're missing features", alias->name);
 	  continue;
diff --git a/opcodes/aarch64-opc.c b/opcodes/aarch64-opc.c
index bbe6f09808b6e85307b1fad039b5ebca7b489bf0..29909606dc27798de57d568775efb7e8dc822a70 100644
--- a/opcodes/aarch64-opc.c
+++ b/opcodes/aarch64-opc.c
@@ -5515,8 +5515,8 @@ verify_constraints (const struct aarch64_inst *inst,
 	  /* Check to see if the MOVPRFX SVE instruction is followed by an SVE
 	     instruction for better error messages.  */
 	  if (!opcode->avariant
-	      || (!AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
-		  && !AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2)))
+	      || (!AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
+		  && !AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2)))
 	    {
 	      mismatch_detail->kind = AARCH64_OPDE_SYNTAX_ERROR;
 	      mismatch_detail->error = _("SVE instruction expected after "
@@ -5761,7 +5761,7 @@ aarch64_cpu_supports_inst_p (aarch64_feature_set cpu_variant,
 			     aarch64_inst *inst)
 {
   if (!inst->opcode->avariant
-      || !AARCH64_CPU_HAS_ALL_FEATURES (cpu_variant, *inst->opcode->avariant))
+      || !cpu_enables_opcode_p (cpu_variant, inst->opcode))
     return false;
 
   if (inst->opcode->iclass == sme_fp_sd
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 1d12630273e063bc7171b1f8f16dd687f7ec954f..117806621484aad4c2d7907957d4b7c74bfa745f 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -2588,163 +2588,104 @@
   QLF3(V_4S, V_8H, S_H),	\
 }
 \f
-/* Opcode table.
-
-  Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
-  bitmask, even if this is implied by other selected feature bits.  This
-  allows verify_constraints to identify SVE instructions when selecting an
-  error message for MOVPRFX constraint violations.  */
-
-static const aarch64_feature_set aarch64_feature_v8 =
-  AARCH64_FEATURE (V8);
-static const aarch64_feature_set aarch64_feature_fp =
-  AARCH64_FEATURE (FP);
-static const aarch64_feature_set aarch64_feature_simd =
-  AARCH64_FEATURE (SIMD);
-static const aarch64_feature_set aarch64_feature_crc =
-  AARCH64_FEATURE (CRC);
-static const aarch64_feature_set aarch64_feature_lse =
-  AARCH64_FEATURE (LSE);
-static const aarch64_feature_set aarch64_feature_lse128 =
-  AARCH64_FEATURES (2, LSE, LSE128);
-static const aarch64_feature_set aarch64_feature_lor =
-  AARCH64_FEATURE (LOR);
-static const aarch64_feature_set aarch64_feature_rdma =
-  AARCH64_FEATURE (RDMA);
-static const aarch64_feature_set aarch64_feature_fp_f16 =
-  AARCH64_FEATURES (2, F16, FP);
-static const aarch64_feature_set aarch64_feature_simd_f16 =
-  AARCH64_FEATURES (2, F16, SIMD);
-static const aarch64_feature_set aarch64_feature_sve =
-  AARCH64_FEATURE (SVE);
-static const aarch64_feature_set aarch64_feature_pauth =
-  AARCH64_FEATURE (PAUTH);
-static const aarch64_feature_set aarch64_feature_compnum =
-  AARCH64_FEATURE (COMPNUM);
-static const aarch64_feature_set aarch64_feature_jscvt =
-  AARCH64_FEATURE (JSCVT);
-static const aarch64_feature_set aarch64_feature_rcpc =
-  AARCH64_FEATURE (RCPC);
-static const aarch64_feature_set aarch64_feature_rcpc2 =
-  AARCH64_FEATURE (RCPC2);
-static const aarch64_feature_set aarch64_feature_dotprod =
-  AARCH64_FEATURE (DOTPROD);
-static const aarch64_feature_set aarch64_feature_sha2 =
-  AARCH64_FEATURES (2, V8, SHA2);
-static const aarch64_feature_set aarch64_feature_aes =
-  AARCH64_FEATURES (2, V8, AES);
-static const aarch64_feature_set aarch64_feature_sm4 =
-  AARCH64_FEATURES (3, SM4, SIMD, FP);
-static const aarch64_feature_set aarch64_feature_sha3 =
-  AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP);
-static const aarch64_feature_set aarch64_feature_fp_16_v8_2a =
-  AARCH64_FEATURES (3, F16_FML, F16, FP);
-static const aarch64_feature_set aarch64_feature_flagmanip =
-  AARCH64_FEATURE (FLAGMANIP);
-static const aarch64_feature_set aarch64_feature_frintts =
-  AARCH64_FEATURE (FRINTTS);
-static const aarch64_feature_set aarch64_feature_sb =
-  AARCH64_FEATURE (SB);
-static const aarch64_feature_set aarch64_feature_predres =
-  AARCH64_FEATURE (PREDRES);
-static const aarch64_feature_set aarch64_feature_predres2 =
-  AARCH64_FEATURES (2, PREDRES, PREDRES2);
-static const aarch64_feature_set aarch64_feature_memtag =
-  AARCH64_FEATURE (MEMTAG);
-static const aarch64_feature_set aarch64_feature_bfloat16 =
-  AARCH64_FEATURE (BFLOAT16);
-static const aarch64_feature_set aarch64_feature_bfloat16_sve =
-  AARCH64_FEATURES (2, BFLOAT16, SVE);
-static const aarch64_feature_set aarch64_feature_tme =
-  AARCH64_FEATURE (TME);
-static const aarch64_feature_set aarch64_feature_sve2 =
-  AARCH64_FEATURE (SVE2);
-static const aarch64_feature_set aarch64_feature_sve2aes =
-  AARCH64_FEATURES (2, SVE2, SVE2_AES);
-static const aarch64_feature_set aarch64_feature_sve2sha3 =
-  AARCH64_FEATURES (2, SVE2, SVE2_SHA3);
-static const aarch64_feature_set aarch64_feature_sve2sm4 =
-  AARCH64_FEATURES (2, SVE2, SVE2_SM4);
-static const aarch64_feature_set aarch64_feature_sve2bitperm =
-  AARCH64_FEATURES (2, SVE2, SVE2_BITPERM);
-static const aarch64_feature_set aarch64_feature_sme =
-  AARCH64_FEATURES (2, SVE2, SME);
-static const aarch64_feature_set aarch64_feature_sme_f64f64 =
-  AARCH64_FEATURES (3, SVE2, SME, SME_F64F64);
-static const aarch64_feature_set aarch64_feature_sme_i16i64 =
-  AARCH64_FEATURES (3, SVE2, SME, SME_I16I64);
-static const aarch64_feature_set aarch64_feature_sme2 =
-  AARCH64_FEATURES (3, SVE2, SME, SME2);
-static const aarch64_feature_set aarch64_feature_sme2_i16i64 =
-  AARCH64_FEATURES (2, SME2, SME_I16I64);
-static const aarch64_feature_set aarch64_feature_sme2_f64f64 =
-  AARCH64_FEATURES (2, SME2, SME_F64F64);
-static const aarch64_feature_set aarch64_feature_i8mm =
-  AARCH64_FEATURE (I8MM);
-static const aarch64_feature_set aarch64_feature_i8mm_sve =
-  AARCH64_FEATURES (2, I8MM, SVE);
-static const aarch64_feature_set aarch64_feature_f32mm_sve =
-  AARCH64_FEATURES (2, F32MM, SVE);
-static const aarch64_feature_set aarch64_feature_f64mm_sve =
-  AARCH64_FEATURES (2, F64MM, SVE);
-static const aarch64_feature_set aarch64_feature_v8r =
-  AARCH64_FEATURE (V8R);
-static const aarch64_feature_set aarch64_feature_ls64 =
-  AARCH64_FEATURE (LS64);
-static const aarch64_feature_set aarch64_feature_flagm =
-  AARCH64_FEATURE (FLAGM);
-static const aarch64_feature_set aarch64_feature_xs =
-  AARCH64_FEATURE (XS);
-static const aarch64_feature_set aarch64_feature_wfxt =
-  AARCH64_FEATURE (WFXT);
-static const aarch64_feature_set aarch64_feature_mops =
-  AARCH64_FEATURE (MOPS);
-static const aarch64_feature_set aarch64_feature_mops_memtag =
-  AARCH64_FEATURES (2, MOPS, MEMTAG);
-static const aarch64_feature_set aarch64_feature_hbc =
-  AARCH64_FEATURE (HBC);
-static const aarch64_feature_set aarch64_feature_cssc =
-  AARCH64_FEATURE (CSSC);
-static const aarch64_feature_set aarch64_feature_chk =
-  AARCH64_FEATURE (CHK);
-static const aarch64_feature_set aarch64_feature_gcs =
-  AARCH64_FEATURE (GCS);
-static const aarch64_feature_set aarch64_feature_ite =
-  AARCH64_FEATURE (ITE);
-static const aarch64_feature_set aarch64_feature_d128 =
-  AARCH64_FEATURE (D128);
-static const aarch64_feature_set aarch64_feature_the =
-  AARCH64_FEATURE (THE);
-static const aarch64_feature_set aarch64_feature_d128_the =
-  AARCH64_FEATURES (2, D128, THE);
-static const aarch64_feature_set aarch64_feature_b16b16 =
-  AARCH64_FEATURE (B16B16);
-static const aarch64_feature_set aarch64_feature_sme2p1 =
-  AARCH64_FEATURE (SME2p1);
-static const aarch64_feature_set aarch64_feature_sve2p1 =
-  AARCH64_FEATURE (SVE2p1);
-static const aarch64_feature_set aarch64_feature_rcpc3 =
-  AARCH64_FEATURE (RCPC3);
-static const aarch64_feature_set aarch64_feature_cpa =
-  AARCH64_FEATURE (CPA);
-static const aarch64_feature_set aarch64_feature_cpa_sve =
-  AARCH64_FEATURES (2, CPA, SVE);
-static const aarch64_feature_set aarch64_feature_faminmax =
-  AARCH64_FEATURE (FAMINMAX);
-static const aarch64_feature_set aarch64_feature_faminmax_sve2 =
-  AARCH64_FEATURES (2, FAMINMAX, SVE2);
-static const aarch64_feature_set aarch64_feature_faminmax_sme2 =
-  AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2);
-static const aarch64_feature_set aarch64_feature_fp8 =
-  AARCH64_FEATURE (FP8);
-static const aarch64_feature_set aarch64_feature_fp8_sve2 =
-  AARCH64_FEATURES (2, FP8, SVE2);
-static const aarch64_feature_set aarch64_feature_fp8_sme2 =
-  AARCH64_FEATURES (2, FP8, SME2);
-static const aarch64_feature_set aarch64_feature_lut =
-  AARCH64_FEATURE (LUT);
-static const aarch64_feature_set aarch64_feature_lut_sve2 =
-  AARCH64_FEATURES (2, LUT, SVE2);
+
+#define FEATURE_SET_OR_1(NAME,FEATS1) \
+static const aarch64_feature_set \
+aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS1};
+
+#define FEATURE_SET_OR_2(NAME,FEATS1,FEATS2) \
+static const aarch64_feature_set \
+aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS2};
+
+
+/* Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
+   bitmask in the first feature set, even if this is implied by other selected
+   feature bits.  Similarly, any SME feature must include AARCH64_FEATURE_SME.
+
+   This restriction allows parse operands to select appropriate reg types to
+   exclude when parsing immediate values, and allows verify_constraints to
+   identify SVE instructions when selecting an error message for MOVPRFX
+   constraint violations.
+
+   Only the first feature set is checked in these cases, so that existing
+   opcode parsing is not changed when adding an alternative enabling feature
+   set.  */
+
+FEATURE_SET_OR_1(v8, AARCH64_FEATURE (V8))
+FEATURE_SET_OR_1(fp, AARCH64_FEATURE (FP))
+FEATURE_SET_OR_1(simd, AARCH64_FEATURE (SIMD))
+FEATURE_SET_OR_1(crc, AARCH64_FEATURE (CRC))
+FEATURE_SET_OR_1(lse, AARCH64_FEATURE (LSE))
+FEATURE_SET_OR_1(lse128, AARCH64_FEATURES (2, LSE, LSE128))
+FEATURE_SET_OR_1(lor, AARCH64_FEATURE (LOR))
+FEATURE_SET_OR_1(rdma, AARCH64_FEATURE (RDMA))
+FEATURE_SET_OR_1(fp_f16, AARCH64_FEATURES (2, F16, FP))
+FEATURE_SET_OR_1(simd_f16, AARCH64_FEATURES (2, F16, SIMD))
+FEATURE_SET_OR_1(sve, AARCH64_FEATURE (SVE))
+FEATURE_SET_OR_1(pauth, AARCH64_FEATURE (PAUTH))
+FEATURE_SET_OR_1(compnum, AARCH64_FEATURE (COMPNUM))
+FEATURE_SET_OR_1(jscvt, AARCH64_FEATURE (JSCVT))
+FEATURE_SET_OR_1(rcpc, AARCH64_FEATURE (RCPC))
+FEATURE_SET_OR_1(rcpc2, AARCH64_FEATURE (RCPC2))
+FEATURE_SET_OR_1(dotprod, AARCH64_FEATURE (DOTPROD))
+FEATURE_SET_OR_1(sha2, AARCH64_FEATURES (2, V8, SHA2))
+FEATURE_SET_OR_1(aes, AARCH64_FEATURES (2, V8, AES))
+FEATURE_SET_OR_1(sm4, AARCH64_FEATURES (3, SM4, SIMD, FP))
+FEATURE_SET_OR_1(sha3, AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP))
+FEATURE_SET_OR_1(fp_16_v8_2a, AARCH64_FEATURES (3, F16_FML, F16, FP))
+FEATURE_SET_OR_1(flagmanip, AARCH64_FEATURE (FLAGMANIP))
+FEATURE_SET_OR_1(frintts, AARCH64_FEATURE (FRINTTS))
+FEATURE_SET_OR_1(sb, AARCH64_FEATURE (SB))
+FEATURE_SET_OR_1(predres, AARCH64_FEATURE (PREDRES))
+FEATURE_SET_OR_1(predres2, AARCH64_FEATURES (2, PREDRES, PREDRES2))
+FEATURE_SET_OR_1(memtag, AARCH64_FEATURE (MEMTAG))
+FEATURE_SET_OR_1(bfloat16, AARCH64_FEATURE (BFLOAT16))
+FEATURE_SET_OR_1(bfloat16_sve, AARCH64_FEATURES (2, BFLOAT16, SVE))
+FEATURE_SET_OR_1(tme, AARCH64_FEATURE (TME))
+FEATURE_SET_OR_1(sve2, AARCH64_FEATURE (SVE2))
+FEATURE_SET_OR_1(sve2aes, AARCH64_FEATURES (2, SVE2, SVE2_AES))
+FEATURE_SET_OR_1(sve2sha3, AARCH64_FEATURES (2, SVE2, SVE2_SHA3))
+FEATURE_SET_OR_1(sve2sm4, AARCH64_FEATURES (2, SVE2, SVE2_SM4))
+FEATURE_SET_OR_1(sve2bitperm, AARCH64_FEATURES (2, SVE2, SVE2_BITPERM))
+FEATURE_SET_OR_1(sme, AARCH64_FEATURES (2, SVE2, SME))
+FEATURE_SET_OR_1(sme_f64f64, AARCH64_FEATURES (3, SVE2, SME, SME_F64F64))
+FEATURE_SET_OR_1(sme_i16i64, AARCH64_FEATURES (3, SVE2, SME, SME_I16I64))
+FEATURE_SET_OR_1(sme2, AARCH64_FEATURES (3, SVE2, SME, SME2))
+FEATURE_SET_OR_1(sme2_i16i64, AARCH64_FEATURES (2, SME2, SME_I16I64))
+FEATURE_SET_OR_1(sme2_f64f64, AARCH64_FEATURES (2, SME2, SME_F64F64))
+FEATURE_SET_OR_1(i8mm, AARCH64_FEATURE (I8MM))
+FEATURE_SET_OR_1(i8mm_sve, AARCH64_FEATURES (2, I8MM, SVE))
+FEATURE_SET_OR_1(f32mm_sve, AARCH64_FEATURES (2, F32MM, SVE))
+FEATURE_SET_OR_1(f64mm_sve, AARCH64_FEATURES (2, F64MM, SVE))
+FEATURE_SET_OR_1(v8r, AARCH64_FEATURE (V8R))
+FEATURE_SET_OR_1(ls64, AARCH64_FEATURE (LS64))
+FEATURE_SET_OR_1(flagm, AARCH64_FEATURE (FLAGM))
+FEATURE_SET_OR_1(xs, AARCH64_FEATURE (XS))
+FEATURE_SET_OR_1(wfxt, AARCH64_FEATURE (WFXT))
+FEATURE_SET_OR_1(mops, AARCH64_FEATURE (MOPS))
+FEATURE_SET_OR_1(mops_memtag, AARCH64_FEATURES (2, MOPS, MEMTAG))
+FEATURE_SET_OR_1(hbc, AARCH64_FEATURE (HBC))
+FEATURE_SET_OR_1(cssc, AARCH64_FEATURE (CSSC))
+FEATURE_SET_OR_1(chk, AARCH64_FEATURE (CHK))
+FEATURE_SET_OR_1(gcs, AARCH64_FEATURE (GCS))
+FEATURE_SET_OR_1(ite, AARCH64_FEATURE (ITE))
+FEATURE_SET_OR_1(d128, AARCH64_FEATURE (D128))
+FEATURE_SET_OR_1(the, AARCH64_FEATURE (THE))
+FEATURE_SET_OR_1(d128_the, AARCH64_FEATURES (2, D128, THE))
+FEATURE_SET_OR_1(b16b16, AARCH64_FEATURE (B16B16))
+FEATURE_SET_OR_1(sme2p1, AARCH64_FEATURE (SME2p1))
+FEATURE_SET_OR_1(sve2p1, AARCH64_FEATURE (SVE2p1))
+FEATURE_SET_OR_1(rcpc3, AARCH64_FEATURE (RCPC3))
+FEATURE_SET_OR_1(cpa, AARCH64_FEATURE (CPA))
+FEATURE_SET_OR_1(cpa_sve, AARCH64_FEATURES (2, CPA, SVE))
+FEATURE_SET_OR_1(faminmax, AARCH64_FEATURE (FAMINMAX))
+FEATURE_SET_OR_1(faminmax_sve2, AARCH64_FEATURES (2, FAMINMAX, SVE2))
+FEATURE_SET_OR_1(faminmax_sme2, AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2))
+FEATURE_SET_OR_1(fp8, AARCH64_FEATURE (FP8))
+FEATURE_SET_OR_1(fp8_sve2, AARCH64_FEATURES (2, FP8, SVE2))
+FEATURE_SET_OR_1(fp8_sme2, AARCH64_FEATURES (2, FP8, SME2))
+FEATURE_SET_OR_1(lut, AARCH64_FEATURE (LUT))
+FEATURE_SET_OR_1(lut_sve2, AARCH64_FEATURES (2, LUT, SVE2))
 
 #define CORE		&aarch64_feature_v8
 #define FP		&aarch64_feature_fp

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] aarch64: Enable gating instructions with FEAT_X || FEAT_Y
  2024-06-04 13:25 [PATCH] aarch64: Enable gating instructions with FEAT_X || FEAT_Y Andrew Carlotti
@ 2024-06-12 13:31 ` Richard Earnshaw (lists)
  2024-06-12 14:51   ` Andrew Carlotti
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Earnshaw (lists) @ 2024-06-12 13:31 UTC (permalink / raw)
  To: Andrew Carlotti, binutils

On 04/06/2024 14:25, Andrew Carlotti wrote:
> Some aarch64 instructions can be enabled independently by two different
> feature sets.  This patch extends aarch64_opcode->avariant to a
> 2-element array of feature sets.  Checks for opcode enablement now use a
> new inline function that tests both feature sets.  These two feature
> sets are identical for all currently supported instructions, so there is
> no functional change.
> 
> This passes all existing tests, and is further tested via its usage in my
> subsequent FP8 patches.  I've additionally tested the new behaviour by changing
> one or both features sets in arch64_feature_dotprod, and verifying that
> assembly only fails when both specied feature sets are not present.
> 
> Is this ok for master?

Given that we already have some features that are enabled on, for example, variants of armv8 and armv9, don't we already have a mechanism for doing this in general?  That is, to define an internal feature and then use (feata, featb) -> featc? as part of the feata and featb definitions?

R.

> 
> 
> diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
> index fec17c40a43e60e294b6aab8976e5926a3c49230..c421bfdac1f423344f5ecdb998d1d0b8d24f3ae5 100644
> --- a/gas/config/tc-aarch64.c
> +++ b/gas/config/tc-aarch64.c
> @@ -6567,10 +6567,10 @@ parse_operands (char *str, const aarch64_opcode *opcode)
>    clear_error ();
>    skip_whitespace (str);
>  
> -  if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SME2))
> +  if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SME2))
>      imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP_PN;
> -  else if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
> -	   || AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2))
> +  else if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
> +	   || AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2))
>      imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP;
>    else
>      imm_reg_type = REG_TYPE_R_ZR_BHSDQ_V;
> diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
> index 8a21611e3ff59220ae5e25120c375ac2db111b77..9da73a932f7083b0e6d9964eda5e38124808f5da 100644
> --- a/include/opcode/aarch64.h
> +++ b/include/opcode/aarch64.h
> @@ -1183,6 +1183,10 @@ typedef struct aarch64_instr_sequence aarch64_instr_sequence;
>  /* Forward declare instruction definition.  */
>  typedef struct aarch64_inst aarch64_inst;
>  
> +/* Maximum number of architecture feature sets that can independently enable
> +   an instruction.  */
> +#define AARCH64_MAX_AVARIANT_NUM 2
> +
>  /* This structure holds information for a particular opcode.  */
>  
>  struct aarch64_opcode
> @@ -1207,7 +1211,7 @@ struct aarch64_opcode
>    enum aarch64_op op;
>  
>    /* Which architecture variant provides this instruction.  */
> -  const aarch64_feature_set *avariant;
> +  const aarch64_feature_set (*avariant)[AARCH64_MAX_AVARIANT_NUM];
>  
>    /* An array of operand codes.  Each code is an index into the
>       operand table.  They appear in the order which the operands must
> @@ -1327,6 +1331,14 @@ extern const aarch64_opcode aarch64_opcode_table[];
>  #define C_SCAN_MOPS_PME (3U << 2)
>  /* Next bit is 4.  */
>  
> +static inline bool
> +cpu_enables_opcode_p (const aarch64_feature_set cpu,
> +		      const aarch64_opcode *opcode)
> +{
> +  return AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[0])
> +	 || AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[1]);
> +}
> +
>  static inline bool
>  alias_opcode_p (const aarch64_opcode *opcode)
>  {
> diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
> index 213df616608b3ea3159b4f6b92b65ea319999a0a..d92bc38ab6075a0bc989b576785a125ba67d696c 100644
> --- a/opcodes/aarch64-dis.c
> +++ b/opcodes/aarch64-dis.c
> @@ -3315,7 +3315,7 @@ determine_disassembling_preference (struct aarch64_inst *inst,
>  	  continue;
>  	}
>  
> -      if (!AARCH64_CPU_HAS_ALL_FEATURES (arch_variant, *alias->avariant))
> +      if (!cpu_enables_opcode_p(arch_variant, alias))
>  	{
>  	  DEBUG_TRACE ("skip %s: we're missing features", alias->name);
>  	  continue;
> diff --git a/opcodes/aarch64-opc.c b/opcodes/aarch64-opc.c
> index bbe6f09808b6e85307b1fad039b5ebca7b489bf0..29909606dc27798de57d568775efb7e8dc822a70 100644
> --- a/opcodes/aarch64-opc.c
> +++ b/opcodes/aarch64-opc.c
> @@ -5515,8 +5515,8 @@ verify_constraints (const struct aarch64_inst *inst,
>  	  /* Check to see if the MOVPRFX SVE instruction is followed by an SVE
>  	     instruction for better error messages.  */
>  	  if (!opcode->avariant
> -	      || (!AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
> -		  && !AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2)))
> +	      || (!AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
> +		  && !AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2)))
>  	    {
>  	      mismatch_detail->kind = AARCH64_OPDE_SYNTAX_ERROR;
>  	      mismatch_detail->error = _("SVE instruction expected after "
> @@ -5761,7 +5761,7 @@ aarch64_cpu_supports_inst_p (aarch64_feature_set cpu_variant,
>  			     aarch64_inst *inst)
>  {
>    if (!inst->opcode->avariant
> -      || !AARCH64_CPU_HAS_ALL_FEATURES (cpu_variant, *inst->opcode->avariant))
> +      || !cpu_enables_opcode_p (cpu_variant, inst->opcode))
>      return false;
>  
>    if (inst->opcode->iclass == sme_fp_sd
> diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
> index 1d12630273e063bc7171b1f8f16dd687f7ec954f..117806621484aad4c2d7907957d4b7c74bfa745f 100644
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -2588,163 +2588,104 @@
>    QLF3(V_4S, V_8H, S_H),	\
>  }
>  \f
> -/* Opcode table.
> -
> -  Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
> -  bitmask, even if this is implied by other selected feature bits.  This
> -  allows verify_constraints to identify SVE instructions when selecting an
> -  error message for MOVPRFX constraint violations.  */
> -
> -static const aarch64_feature_set aarch64_feature_v8 =
> -  AARCH64_FEATURE (V8);
> -static const aarch64_feature_set aarch64_feature_fp =
> -  AARCH64_FEATURE (FP);
> -static const aarch64_feature_set aarch64_feature_simd =
> -  AARCH64_FEATURE (SIMD);
> -static const aarch64_feature_set aarch64_feature_crc =
> -  AARCH64_FEATURE (CRC);
> -static const aarch64_feature_set aarch64_feature_lse =
> -  AARCH64_FEATURE (LSE);
> -static const aarch64_feature_set aarch64_feature_lse128 =
> -  AARCH64_FEATURES (2, LSE, LSE128);
> -static const aarch64_feature_set aarch64_feature_lor =
> -  AARCH64_FEATURE (LOR);
> -static const aarch64_feature_set aarch64_feature_rdma =
> -  AARCH64_FEATURE (RDMA);
> -static const aarch64_feature_set aarch64_feature_fp_f16 =
> -  AARCH64_FEATURES (2, F16, FP);
> -static const aarch64_feature_set aarch64_feature_simd_f16 =
> -  AARCH64_FEATURES (2, F16, SIMD);
> -static const aarch64_feature_set aarch64_feature_sve =
> -  AARCH64_FEATURE (SVE);
> -static const aarch64_feature_set aarch64_feature_pauth =
> -  AARCH64_FEATURE (PAUTH);
> -static const aarch64_feature_set aarch64_feature_compnum =
> -  AARCH64_FEATURE (COMPNUM);
> -static const aarch64_feature_set aarch64_feature_jscvt =
> -  AARCH64_FEATURE (JSCVT);
> -static const aarch64_feature_set aarch64_feature_rcpc =
> -  AARCH64_FEATURE (RCPC);
> -static const aarch64_feature_set aarch64_feature_rcpc2 =
> -  AARCH64_FEATURE (RCPC2);
> -static const aarch64_feature_set aarch64_feature_dotprod =
> -  AARCH64_FEATURE (DOTPROD);
> -static const aarch64_feature_set aarch64_feature_sha2 =
> -  AARCH64_FEATURES (2, V8, SHA2);
> -static const aarch64_feature_set aarch64_feature_aes =
> -  AARCH64_FEATURES (2, V8, AES);
> -static const aarch64_feature_set aarch64_feature_sm4 =
> -  AARCH64_FEATURES (3, SM4, SIMD, FP);
> -static const aarch64_feature_set aarch64_feature_sha3 =
> -  AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP);
> -static const aarch64_feature_set aarch64_feature_fp_16_v8_2a =
> -  AARCH64_FEATURES (3, F16_FML, F16, FP);
> -static const aarch64_feature_set aarch64_feature_flagmanip =
> -  AARCH64_FEATURE (FLAGMANIP);
> -static const aarch64_feature_set aarch64_feature_frintts =
> -  AARCH64_FEATURE (FRINTTS);
> -static const aarch64_feature_set aarch64_feature_sb =
> -  AARCH64_FEATURE (SB);
> -static const aarch64_feature_set aarch64_feature_predres =
> -  AARCH64_FEATURE (PREDRES);
> -static const aarch64_feature_set aarch64_feature_predres2 =
> -  AARCH64_FEATURES (2, PREDRES, PREDRES2);
> -static const aarch64_feature_set aarch64_feature_memtag =
> -  AARCH64_FEATURE (MEMTAG);
> -static const aarch64_feature_set aarch64_feature_bfloat16 =
> -  AARCH64_FEATURE (BFLOAT16);
> -static const aarch64_feature_set aarch64_feature_bfloat16_sve =
> -  AARCH64_FEATURES (2, BFLOAT16, SVE);
> -static const aarch64_feature_set aarch64_feature_tme =
> -  AARCH64_FEATURE (TME);
> -static const aarch64_feature_set aarch64_feature_sve2 =
> -  AARCH64_FEATURE (SVE2);
> -static const aarch64_feature_set aarch64_feature_sve2aes =
> -  AARCH64_FEATURES (2, SVE2, SVE2_AES);
> -static const aarch64_feature_set aarch64_feature_sve2sha3 =
> -  AARCH64_FEATURES (2, SVE2, SVE2_SHA3);
> -static const aarch64_feature_set aarch64_feature_sve2sm4 =
> -  AARCH64_FEATURES (2, SVE2, SVE2_SM4);
> -static const aarch64_feature_set aarch64_feature_sve2bitperm =
> -  AARCH64_FEATURES (2, SVE2, SVE2_BITPERM);
> -static const aarch64_feature_set aarch64_feature_sme =
> -  AARCH64_FEATURES (2, SVE2, SME);
> -static const aarch64_feature_set aarch64_feature_sme_f64f64 =
> -  AARCH64_FEATURES (3, SVE2, SME, SME_F64F64);
> -static const aarch64_feature_set aarch64_feature_sme_i16i64 =
> -  AARCH64_FEATURES (3, SVE2, SME, SME_I16I64);
> -static const aarch64_feature_set aarch64_feature_sme2 =
> -  AARCH64_FEATURES (3, SVE2, SME, SME2);
> -static const aarch64_feature_set aarch64_feature_sme2_i16i64 =
> -  AARCH64_FEATURES (2, SME2, SME_I16I64);
> -static const aarch64_feature_set aarch64_feature_sme2_f64f64 =
> -  AARCH64_FEATURES (2, SME2, SME_F64F64);
> -static const aarch64_feature_set aarch64_feature_i8mm =
> -  AARCH64_FEATURE (I8MM);
> -static const aarch64_feature_set aarch64_feature_i8mm_sve =
> -  AARCH64_FEATURES (2, I8MM, SVE);
> -static const aarch64_feature_set aarch64_feature_f32mm_sve =
> -  AARCH64_FEATURES (2, F32MM, SVE);
> -static const aarch64_feature_set aarch64_feature_f64mm_sve =
> -  AARCH64_FEATURES (2, F64MM, SVE);
> -static const aarch64_feature_set aarch64_feature_v8r =
> -  AARCH64_FEATURE (V8R);
> -static const aarch64_feature_set aarch64_feature_ls64 =
> -  AARCH64_FEATURE (LS64);
> -static const aarch64_feature_set aarch64_feature_flagm =
> -  AARCH64_FEATURE (FLAGM);
> -static const aarch64_feature_set aarch64_feature_xs =
> -  AARCH64_FEATURE (XS);
> -static const aarch64_feature_set aarch64_feature_wfxt =
> -  AARCH64_FEATURE (WFXT);
> -static const aarch64_feature_set aarch64_feature_mops =
> -  AARCH64_FEATURE (MOPS);
> -static const aarch64_feature_set aarch64_feature_mops_memtag =
> -  AARCH64_FEATURES (2, MOPS, MEMTAG);
> -static const aarch64_feature_set aarch64_feature_hbc =
> -  AARCH64_FEATURE (HBC);
> -static const aarch64_feature_set aarch64_feature_cssc =
> -  AARCH64_FEATURE (CSSC);
> -static const aarch64_feature_set aarch64_feature_chk =
> -  AARCH64_FEATURE (CHK);
> -static const aarch64_feature_set aarch64_feature_gcs =
> -  AARCH64_FEATURE (GCS);
> -static const aarch64_feature_set aarch64_feature_ite =
> -  AARCH64_FEATURE (ITE);
> -static const aarch64_feature_set aarch64_feature_d128 =
> -  AARCH64_FEATURE (D128);
> -static const aarch64_feature_set aarch64_feature_the =
> -  AARCH64_FEATURE (THE);
> -static const aarch64_feature_set aarch64_feature_d128_the =
> -  AARCH64_FEATURES (2, D128, THE);
> -static const aarch64_feature_set aarch64_feature_b16b16 =
> -  AARCH64_FEATURE (B16B16);
> -static const aarch64_feature_set aarch64_feature_sme2p1 =
> -  AARCH64_FEATURE (SME2p1);
> -static const aarch64_feature_set aarch64_feature_sve2p1 =
> -  AARCH64_FEATURE (SVE2p1);
> -static const aarch64_feature_set aarch64_feature_rcpc3 =
> -  AARCH64_FEATURE (RCPC3);
> -static const aarch64_feature_set aarch64_feature_cpa =
> -  AARCH64_FEATURE (CPA);
> -static const aarch64_feature_set aarch64_feature_cpa_sve =
> -  AARCH64_FEATURES (2, CPA, SVE);
> -static const aarch64_feature_set aarch64_feature_faminmax =
> -  AARCH64_FEATURE (FAMINMAX);
> -static const aarch64_feature_set aarch64_feature_faminmax_sve2 =
> -  AARCH64_FEATURES (2, FAMINMAX, SVE2);
> -static const aarch64_feature_set aarch64_feature_faminmax_sme2 =
> -  AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2);
> -static const aarch64_feature_set aarch64_feature_fp8 =
> -  AARCH64_FEATURE (FP8);
> -static const aarch64_feature_set aarch64_feature_fp8_sve2 =
> -  AARCH64_FEATURES (2, FP8, SVE2);
> -static const aarch64_feature_set aarch64_feature_fp8_sme2 =
> -  AARCH64_FEATURES (2, FP8, SME2);
> -static const aarch64_feature_set aarch64_feature_lut =
> -  AARCH64_FEATURE (LUT);
> -static const aarch64_feature_set aarch64_feature_lut_sve2 =
> -  AARCH64_FEATURES (2, LUT, SVE2);
> +
> +#define FEATURE_SET_OR_1(NAME,FEATS1) \
> +static const aarch64_feature_set \
> +aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS1};
> +
> +#define FEATURE_SET_OR_2(NAME,FEATS1,FEATS2) \
> +static const aarch64_feature_set \
> +aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS2};
> +
> +
> +/* Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
> +   bitmask in the first feature set, even if this is implied by other selected
> +   feature bits.  Similarly, any SME feature must include AARCH64_FEATURE_SME.
> +
> +   This restriction allows parse operands to select appropriate reg types to
> +   exclude when parsing immediate values, and allows verify_constraints to
> +   identify SVE instructions when selecting an error message for MOVPRFX
> +   constraint violations.
> +
> +   Only the first feature set is checked in these cases, so that existing
> +   opcode parsing is not changed when adding an alternative enabling feature
> +   set.  */
> +
> +FEATURE_SET_OR_1(v8, AARCH64_FEATURE (V8))
> +FEATURE_SET_OR_1(fp, AARCH64_FEATURE (FP))
> +FEATURE_SET_OR_1(simd, AARCH64_FEATURE (SIMD))
> +FEATURE_SET_OR_1(crc, AARCH64_FEATURE (CRC))
> +FEATURE_SET_OR_1(lse, AARCH64_FEATURE (LSE))
> +FEATURE_SET_OR_1(lse128, AARCH64_FEATURES (2, LSE, LSE128))
> +FEATURE_SET_OR_1(lor, AARCH64_FEATURE (LOR))
> +FEATURE_SET_OR_1(rdma, AARCH64_FEATURE (RDMA))
> +FEATURE_SET_OR_1(fp_f16, AARCH64_FEATURES (2, F16, FP))
> +FEATURE_SET_OR_1(simd_f16, AARCH64_FEATURES (2, F16, SIMD))
> +FEATURE_SET_OR_1(sve, AARCH64_FEATURE (SVE))
> +FEATURE_SET_OR_1(pauth, AARCH64_FEATURE (PAUTH))
> +FEATURE_SET_OR_1(compnum, AARCH64_FEATURE (COMPNUM))
> +FEATURE_SET_OR_1(jscvt, AARCH64_FEATURE (JSCVT))
> +FEATURE_SET_OR_1(rcpc, AARCH64_FEATURE (RCPC))
> +FEATURE_SET_OR_1(rcpc2, AARCH64_FEATURE (RCPC2))
> +FEATURE_SET_OR_1(dotprod, AARCH64_FEATURE (DOTPROD))
> +FEATURE_SET_OR_1(sha2, AARCH64_FEATURES (2, V8, SHA2))
> +FEATURE_SET_OR_1(aes, AARCH64_FEATURES (2, V8, AES))
> +FEATURE_SET_OR_1(sm4, AARCH64_FEATURES (3, SM4, SIMD, FP))
> +FEATURE_SET_OR_1(sha3, AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP))
> +FEATURE_SET_OR_1(fp_16_v8_2a, AARCH64_FEATURES (3, F16_FML, F16, FP))
> +FEATURE_SET_OR_1(flagmanip, AARCH64_FEATURE (FLAGMANIP))
> +FEATURE_SET_OR_1(frintts, AARCH64_FEATURE (FRINTTS))
> +FEATURE_SET_OR_1(sb, AARCH64_FEATURE (SB))
> +FEATURE_SET_OR_1(predres, AARCH64_FEATURE (PREDRES))
> +FEATURE_SET_OR_1(predres2, AARCH64_FEATURES (2, PREDRES, PREDRES2))
> +FEATURE_SET_OR_1(memtag, AARCH64_FEATURE (MEMTAG))
> +FEATURE_SET_OR_1(bfloat16, AARCH64_FEATURE (BFLOAT16))
> +FEATURE_SET_OR_1(bfloat16_sve, AARCH64_FEATURES (2, BFLOAT16, SVE))
> +FEATURE_SET_OR_1(tme, AARCH64_FEATURE (TME))
> +FEATURE_SET_OR_1(sve2, AARCH64_FEATURE (SVE2))
> +FEATURE_SET_OR_1(sve2aes, AARCH64_FEATURES (2, SVE2, SVE2_AES))
> +FEATURE_SET_OR_1(sve2sha3, AARCH64_FEATURES (2, SVE2, SVE2_SHA3))
> +FEATURE_SET_OR_1(sve2sm4, AARCH64_FEATURES (2, SVE2, SVE2_SM4))
> +FEATURE_SET_OR_1(sve2bitperm, AARCH64_FEATURES (2, SVE2, SVE2_BITPERM))
> +FEATURE_SET_OR_1(sme, AARCH64_FEATURES (2, SVE2, SME))
> +FEATURE_SET_OR_1(sme_f64f64, AARCH64_FEATURES (3, SVE2, SME, SME_F64F64))
> +FEATURE_SET_OR_1(sme_i16i64, AARCH64_FEATURES (3, SVE2, SME, SME_I16I64))
> +FEATURE_SET_OR_1(sme2, AARCH64_FEATURES (3, SVE2, SME, SME2))
> +FEATURE_SET_OR_1(sme2_i16i64, AARCH64_FEATURES (2, SME2, SME_I16I64))
> +FEATURE_SET_OR_1(sme2_f64f64, AARCH64_FEATURES (2, SME2, SME_F64F64))
> +FEATURE_SET_OR_1(i8mm, AARCH64_FEATURE (I8MM))
> +FEATURE_SET_OR_1(i8mm_sve, AARCH64_FEATURES (2, I8MM, SVE))
> +FEATURE_SET_OR_1(f32mm_sve, AARCH64_FEATURES (2, F32MM, SVE))
> +FEATURE_SET_OR_1(f64mm_sve, AARCH64_FEATURES (2, F64MM, SVE))
> +FEATURE_SET_OR_1(v8r, AARCH64_FEATURE (V8R))
> +FEATURE_SET_OR_1(ls64, AARCH64_FEATURE (LS64))
> +FEATURE_SET_OR_1(flagm, AARCH64_FEATURE (FLAGM))
> +FEATURE_SET_OR_1(xs, AARCH64_FEATURE (XS))
> +FEATURE_SET_OR_1(wfxt, AARCH64_FEATURE (WFXT))
> +FEATURE_SET_OR_1(mops, AARCH64_FEATURE (MOPS))
> +FEATURE_SET_OR_1(mops_memtag, AARCH64_FEATURES (2, MOPS, MEMTAG))
> +FEATURE_SET_OR_1(hbc, AARCH64_FEATURE (HBC))
> +FEATURE_SET_OR_1(cssc, AARCH64_FEATURE (CSSC))
> +FEATURE_SET_OR_1(chk, AARCH64_FEATURE (CHK))
> +FEATURE_SET_OR_1(gcs, AARCH64_FEATURE (GCS))
> +FEATURE_SET_OR_1(ite, AARCH64_FEATURE (ITE))
> +FEATURE_SET_OR_1(d128, AARCH64_FEATURE (D128))
> +FEATURE_SET_OR_1(the, AARCH64_FEATURE (THE))
> +FEATURE_SET_OR_1(d128_the, AARCH64_FEATURES (2, D128, THE))
> +FEATURE_SET_OR_1(b16b16, AARCH64_FEATURE (B16B16))
> +FEATURE_SET_OR_1(sme2p1, AARCH64_FEATURE (SME2p1))
> +FEATURE_SET_OR_1(sve2p1, AARCH64_FEATURE (SVE2p1))
> +FEATURE_SET_OR_1(rcpc3, AARCH64_FEATURE (RCPC3))
> +FEATURE_SET_OR_1(cpa, AARCH64_FEATURE (CPA))
> +FEATURE_SET_OR_1(cpa_sve, AARCH64_FEATURES (2, CPA, SVE))
> +FEATURE_SET_OR_1(faminmax, AARCH64_FEATURE (FAMINMAX))
> +FEATURE_SET_OR_1(faminmax_sve2, AARCH64_FEATURES (2, FAMINMAX, SVE2))
> +FEATURE_SET_OR_1(faminmax_sme2, AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2))
> +FEATURE_SET_OR_1(fp8, AARCH64_FEATURE (FP8))
> +FEATURE_SET_OR_1(fp8_sve2, AARCH64_FEATURES (2, FP8, SVE2))
> +FEATURE_SET_OR_1(fp8_sme2, AARCH64_FEATURES (2, FP8, SME2))
> +FEATURE_SET_OR_1(lut, AARCH64_FEATURE (LUT))
> +FEATURE_SET_OR_1(lut_sve2, AARCH64_FEATURES (2, LUT, SVE2))
>  
>  #define CORE		&aarch64_feature_v8
>  #define FP		&aarch64_feature_fp


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] aarch64: Enable gating instructions with FEAT_X || FEAT_Y
  2024-06-12 13:31 ` Richard Earnshaw (lists)
@ 2024-06-12 14:51   ` Andrew Carlotti
  0 siblings, 0 replies; 3+ messages in thread
From: Andrew Carlotti @ 2024-06-12 14:51 UTC (permalink / raw)
  To: Richard Earnshaw (lists); +Cc: binutils

On Wed, Jun 12, 2024 at 02:31:58PM +0100, Richard Earnshaw (lists) wrote:
> On 04/06/2024 14:25, Andrew Carlotti wrote:
> > Some aarch64 instructions can be enabled independently by two different
> > feature sets.  This patch extends aarch64_opcode->avariant to a
> > 2-element array of feature sets.  Checks for opcode enablement now use a
> > new inline function that tests both feature sets.  These two feature
> > sets are identical for all currently supported instructions, so there is
> > no functional change.
> > 
> > This passes all existing tests, and is further tested via its usage in my
> > subsequent FP8 patches.  I've additionally tested the new behaviour by changing
> > one or both features sets in arch64_feature_dotprod, and verifying that
> > assembly only fails when both specied feature sets are not present.
> > 
> > Is this ok for master?
> 
> Given that we already have some features that are enabled on, for example, variants of armv8 and armv9, don't we already have a mechanism for doing this in general?  That is, to define an internal feature and then use (feata, featb) -> featc? as part of the feata and featb definitions?
> 
> R.

A simple implementation with internal feature bits and the existing dependency
mechanism would fail to handle `+nofeat` correctly.  It also wouldn't be able
to support (A && B) || C.  The only way I can see to handle this with extra
internal feature bits is to add a separate post-processing step that runs each
time the feature flags has changed and recomputes all the internal feature bits
from scratch.  This recomputation would need to be either written manually in
code, or represented by adding another table of complicated feature
dependencies.

> > 
> > 
> > diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
> > index fec17c40a43e60e294b6aab8976e5926a3c49230..c421bfdac1f423344f5ecdb998d1d0b8d24f3ae5 100644
> > --- a/gas/config/tc-aarch64.c
> > +++ b/gas/config/tc-aarch64.c
> > @@ -6567,10 +6567,10 @@ parse_operands (char *str, const aarch64_opcode *opcode)
> >    clear_error ();
> >    skip_whitespace (str);
> >  
> > -  if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SME2))
> > +  if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SME2))
> >      imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP_PN;
> > -  else if (AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
> > -	   || AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2))
> > +  else if (AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
> > +	   || AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2))
> >      imm_reg_type = REG_TYPE_R_ZR_SP_BHSDQ_VZP;
> >    else
> >      imm_reg_type = REG_TYPE_R_ZR_BHSDQ_V;
> > diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
> > index 8a21611e3ff59220ae5e25120c375ac2db111b77..9da73a932f7083b0e6d9964eda5e38124808f5da 100644
> > --- a/include/opcode/aarch64.h
> > +++ b/include/opcode/aarch64.h
> > @@ -1183,6 +1183,10 @@ typedef struct aarch64_instr_sequence aarch64_instr_sequence;
> >  /* Forward declare instruction definition.  */
> >  typedef struct aarch64_inst aarch64_inst;
> >  
> > +/* Maximum number of architecture feature sets that can independently enable
> > +   an instruction.  */
> > +#define AARCH64_MAX_AVARIANT_NUM 2
> > +
> >  /* This structure holds information for a particular opcode.  */
> >  
> >  struct aarch64_opcode
> > @@ -1207,7 +1211,7 @@ struct aarch64_opcode
> >    enum aarch64_op op;
> >  
> >    /* Which architecture variant provides this instruction.  */
> > -  const aarch64_feature_set *avariant;
> > +  const aarch64_feature_set (*avariant)[AARCH64_MAX_AVARIANT_NUM];
> >  
> >    /* An array of operand codes.  Each code is an index into the
> >       operand table.  They appear in the order which the operands must
> > @@ -1327,6 +1331,14 @@ extern const aarch64_opcode aarch64_opcode_table[];
> >  #define C_SCAN_MOPS_PME (3U << 2)
> >  /* Next bit is 4.  */
> >  
> > +static inline bool
> > +cpu_enables_opcode_p (const aarch64_feature_set cpu,
> > +		      const aarch64_opcode *opcode)
> > +{
> > +  return AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[0])
> > +	 || AARCH64_CPU_HAS_ALL_FEATURES (cpu, (*opcode->avariant)[1]);
> > +}
> > +
> >  static inline bool
> >  alias_opcode_p (const aarch64_opcode *opcode)
> >  {
> > diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
> > index 213df616608b3ea3159b4f6b92b65ea319999a0a..d92bc38ab6075a0bc989b576785a125ba67d696c 100644
> > --- a/opcodes/aarch64-dis.c
> > +++ b/opcodes/aarch64-dis.c
> > @@ -3315,7 +3315,7 @@ determine_disassembling_preference (struct aarch64_inst *inst,
> >  	  continue;
> >  	}
> >  
> > -      if (!AARCH64_CPU_HAS_ALL_FEATURES (arch_variant, *alias->avariant))
> > +      if (!cpu_enables_opcode_p(arch_variant, alias))
> >  	{
> >  	  DEBUG_TRACE ("skip %s: we're missing features", alias->name);
> >  	  continue;
> > diff --git a/opcodes/aarch64-opc.c b/opcodes/aarch64-opc.c
> > index bbe6f09808b6e85307b1fad039b5ebca7b489bf0..29909606dc27798de57d568775efb7e8dc822a70 100644
> > --- a/opcodes/aarch64-opc.c
> > +++ b/opcodes/aarch64-opc.c
> > @@ -5515,8 +5515,8 @@ verify_constraints (const struct aarch64_inst *inst,
> >  	  /* Check to see if the MOVPRFX SVE instruction is followed by an SVE
> >  	     instruction for better error messages.  */
> >  	  if (!opcode->avariant
> > -	      || (!AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE)
> > -		  && !AARCH64_CPU_HAS_FEATURE (*opcode->avariant, SVE2)))
> > +	      || (!AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE)
> > +		  && !AARCH64_CPU_HAS_FEATURE ((*opcode->avariant)[0], SVE2)))
> >  	    {
> >  	      mismatch_detail->kind = AARCH64_OPDE_SYNTAX_ERROR;
> >  	      mismatch_detail->error = _("SVE instruction expected after "
> > @@ -5761,7 +5761,7 @@ aarch64_cpu_supports_inst_p (aarch64_feature_set cpu_variant,
> >  			     aarch64_inst *inst)
> >  {
> >    if (!inst->opcode->avariant
> > -      || !AARCH64_CPU_HAS_ALL_FEATURES (cpu_variant, *inst->opcode->avariant))
> > +      || !cpu_enables_opcode_p (cpu_variant, inst->opcode))
> >      return false;
> >  
> >    if (inst->opcode->iclass == sme_fp_sd
> > diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
> > index 1d12630273e063bc7171b1f8f16dd687f7ec954f..117806621484aad4c2d7907957d4b7c74bfa745f 100644
> > --- a/opcodes/aarch64-tbl.h
> > +++ b/opcodes/aarch64-tbl.h
> > @@ -2588,163 +2588,104 @@
> >    QLF3(V_4S, V_8H, S_H),	\
> >  }
> >  \f
> > -/* Opcode table.
> > -
> > -  Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
> > -  bitmask, even if this is implied by other selected feature bits.  This
> > -  allows verify_constraints to identify SVE instructions when selecting an
> > -  error message for MOVPRFX constraint violations.  */
> > -
> > -static const aarch64_feature_set aarch64_feature_v8 =
> > -  AARCH64_FEATURE (V8);
> > -static const aarch64_feature_set aarch64_feature_fp =
> > -  AARCH64_FEATURE (FP);
> > -static const aarch64_feature_set aarch64_feature_simd =
> > -  AARCH64_FEATURE (SIMD);
> > -static const aarch64_feature_set aarch64_feature_crc =
> > -  AARCH64_FEATURE (CRC);
> > -static const aarch64_feature_set aarch64_feature_lse =
> > -  AARCH64_FEATURE (LSE);
> > -static const aarch64_feature_set aarch64_feature_lse128 =
> > -  AARCH64_FEATURES (2, LSE, LSE128);
> > -static const aarch64_feature_set aarch64_feature_lor =
> > -  AARCH64_FEATURE (LOR);
> > -static const aarch64_feature_set aarch64_feature_rdma =
> > -  AARCH64_FEATURE (RDMA);
> > -static const aarch64_feature_set aarch64_feature_fp_f16 =
> > -  AARCH64_FEATURES (2, F16, FP);
> > -static const aarch64_feature_set aarch64_feature_simd_f16 =
> > -  AARCH64_FEATURES (2, F16, SIMD);
> > -static const aarch64_feature_set aarch64_feature_sve =
> > -  AARCH64_FEATURE (SVE);
> > -static const aarch64_feature_set aarch64_feature_pauth =
> > -  AARCH64_FEATURE (PAUTH);
> > -static const aarch64_feature_set aarch64_feature_compnum =
> > -  AARCH64_FEATURE (COMPNUM);
> > -static const aarch64_feature_set aarch64_feature_jscvt =
> > -  AARCH64_FEATURE (JSCVT);
> > -static const aarch64_feature_set aarch64_feature_rcpc =
> > -  AARCH64_FEATURE (RCPC);
> > -static const aarch64_feature_set aarch64_feature_rcpc2 =
> > -  AARCH64_FEATURE (RCPC2);
> > -static const aarch64_feature_set aarch64_feature_dotprod =
> > -  AARCH64_FEATURE (DOTPROD);
> > -static const aarch64_feature_set aarch64_feature_sha2 =
> > -  AARCH64_FEATURES (2, V8, SHA2);
> > -static const aarch64_feature_set aarch64_feature_aes =
> > -  AARCH64_FEATURES (2, V8, AES);
> > -static const aarch64_feature_set aarch64_feature_sm4 =
> > -  AARCH64_FEATURES (3, SM4, SIMD, FP);
> > -static const aarch64_feature_set aarch64_feature_sha3 =
> > -  AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP);
> > -static const aarch64_feature_set aarch64_feature_fp_16_v8_2a =
> > -  AARCH64_FEATURES (3, F16_FML, F16, FP);
> > -static const aarch64_feature_set aarch64_feature_flagmanip =
> > -  AARCH64_FEATURE (FLAGMANIP);
> > -static const aarch64_feature_set aarch64_feature_frintts =
> > -  AARCH64_FEATURE (FRINTTS);
> > -static const aarch64_feature_set aarch64_feature_sb =
> > -  AARCH64_FEATURE (SB);
> > -static const aarch64_feature_set aarch64_feature_predres =
> > -  AARCH64_FEATURE (PREDRES);
> > -static const aarch64_feature_set aarch64_feature_predres2 =
> > -  AARCH64_FEATURES (2, PREDRES, PREDRES2);
> > -static const aarch64_feature_set aarch64_feature_memtag =
> > -  AARCH64_FEATURE (MEMTAG);
> > -static const aarch64_feature_set aarch64_feature_bfloat16 =
> > -  AARCH64_FEATURE (BFLOAT16);
> > -static const aarch64_feature_set aarch64_feature_bfloat16_sve =
> > -  AARCH64_FEATURES (2, BFLOAT16, SVE);
> > -static const aarch64_feature_set aarch64_feature_tme =
> > -  AARCH64_FEATURE (TME);
> > -static const aarch64_feature_set aarch64_feature_sve2 =
> > -  AARCH64_FEATURE (SVE2);
> > -static const aarch64_feature_set aarch64_feature_sve2aes =
> > -  AARCH64_FEATURES (2, SVE2, SVE2_AES);
> > -static const aarch64_feature_set aarch64_feature_sve2sha3 =
> > -  AARCH64_FEATURES (2, SVE2, SVE2_SHA3);
> > -static const aarch64_feature_set aarch64_feature_sve2sm4 =
> > -  AARCH64_FEATURES (2, SVE2, SVE2_SM4);
> > -static const aarch64_feature_set aarch64_feature_sve2bitperm =
> > -  AARCH64_FEATURES (2, SVE2, SVE2_BITPERM);
> > -static const aarch64_feature_set aarch64_feature_sme =
> > -  AARCH64_FEATURES (2, SVE2, SME);
> > -static const aarch64_feature_set aarch64_feature_sme_f64f64 =
> > -  AARCH64_FEATURES (3, SVE2, SME, SME_F64F64);
> > -static const aarch64_feature_set aarch64_feature_sme_i16i64 =
> > -  AARCH64_FEATURES (3, SVE2, SME, SME_I16I64);
> > -static const aarch64_feature_set aarch64_feature_sme2 =
> > -  AARCH64_FEATURES (3, SVE2, SME, SME2);
> > -static const aarch64_feature_set aarch64_feature_sme2_i16i64 =
> > -  AARCH64_FEATURES (2, SME2, SME_I16I64);
> > -static const aarch64_feature_set aarch64_feature_sme2_f64f64 =
> > -  AARCH64_FEATURES (2, SME2, SME_F64F64);
> > -static const aarch64_feature_set aarch64_feature_i8mm =
> > -  AARCH64_FEATURE (I8MM);
> > -static const aarch64_feature_set aarch64_feature_i8mm_sve =
> > -  AARCH64_FEATURES (2, I8MM, SVE);
> > -static const aarch64_feature_set aarch64_feature_f32mm_sve =
> > -  AARCH64_FEATURES (2, F32MM, SVE);
> > -static const aarch64_feature_set aarch64_feature_f64mm_sve =
> > -  AARCH64_FEATURES (2, F64MM, SVE);
> > -static const aarch64_feature_set aarch64_feature_v8r =
> > -  AARCH64_FEATURE (V8R);
> > -static const aarch64_feature_set aarch64_feature_ls64 =
> > -  AARCH64_FEATURE (LS64);
> > -static const aarch64_feature_set aarch64_feature_flagm =
> > -  AARCH64_FEATURE (FLAGM);
> > -static const aarch64_feature_set aarch64_feature_xs =
> > -  AARCH64_FEATURE (XS);
> > -static const aarch64_feature_set aarch64_feature_wfxt =
> > -  AARCH64_FEATURE (WFXT);
> > -static const aarch64_feature_set aarch64_feature_mops =
> > -  AARCH64_FEATURE (MOPS);
> > -static const aarch64_feature_set aarch64_feature_mops_memtag =
> > -  AARCH64_FEATURES (2, MOPS, MEMTAG);
> > -static const aarch64_feature_set aarch64_feature_hbc =
> > -  AARCH64_FEATURE (HBC);
> > -static const aarch64_feature_set aarch64_feature_cssc =
> > -  AARCH64_FEATURE (CSSC);
> > -static const aarch64_feature_set aarch64_feature_chk =
> > -  AARCH64_FEATURE (CHK);
> > -static const aarch64_feature_set aarch64_feature_gcs =
> > -  AARCH64_FEATURE (GCS);
> > -static const aarch64_feature_set aarch64_feature_ite =
> > -  AARCH64_FEATURE (ITE);
> > -static const aarch64_feature_set aarch64_feature_d128 =
> > -  AARCH64_FEATURE (D128);
> > -static const aarch64_feature_set aarch64_feature_the =
> > -  AARCH64_FEATURE (THE);
> > -static const aarch64_feature_set aarch64_feature_d128_the =
> > -  AARCH64_FEATURES (2, D128, THE);
> > -static const aarch64_feature_set aarch64_feature_b16b16 =
> > -  AARCH64_FEATURE (B16B16);
> > -static const aarch64_feature_set aarch64_feature_sme2p1 =
> > -  AARCH64_FEATURE (SME2p1);
> > -static const aarch64_feature_set aarch64_feature_sve2p1 =
> > -  AARCH64_FEATURE (SVE2p1);
> > -static const aarch64_feature_set aarch64_feature_rcpc3 =
> > -  AARCH64_FEATURE (RCPC3);
> > -static const aarch64_feature_set aarch64_feature_cpa =
> > -  AARCH64_FEATURE (CPA);
> > -static const aarch64_feature_set aarch64_feature_cpa_sve =
> > -  AARCH64_FEATURES (2, CPA, SVE);
> > -static const aarch64_feature_set aarch64_feature_faminmax =
> > -  AARCH64_FEATURE (FAMINMAX);
> > -static const aarch64_feature_set aarch64_feature_faminmax_sve2 =
> > -  AARCH64_FEATURES (2, FAMINMAX, SVE2);
> > -static const aarch64_feature_set aarch64_feature_faminmax_sme2 =
> > -  AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2);
> > -static const aarch64_feature_set aarch64_feature_fp8 =
> > -  AARCH64_FEATURE (FP8);
> > -static const aarch64_feature_set aarch64_feature_fp8_sve2 =
> > -  AARCH64_FEATURES (2, FP8, SVE2);
> > -static const aarch64_feature_set aarch64_feature_fp8_sme2 =
> > -  AARCH64_FEATURES (2, FP8, SME2);
> > -static const aarch64_feature_set aarch64_feature_lut =
> > -  AARCH64_FEATURE (LUT);
> > -static const aarch64_feature_set aarch64_feature_lut_sve2 =
> > -  AARCH64_FEATURES (2, LUT, SVE2);
> > +
> > +#define FEATURE_SET_OR_1(NAME,FEATS1) \
> > +static const aarch64_feature_set \
> > +aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS1};
> > +
> > +#define FEATURE_SET_OR_2(NAME,FEATS1,FEATS2) \
> > +static const aarch64_feature_set \
> > +aarch64_feature_##NAME[AARCH64_MAX_AVARIANT_NUM] = {FEATS1, FEATS2};
> > +
> > +
> > +/* Any SVE or SVE2 feature must include AARCH64_FEATURE_{SVE|SVE2} in its
> > +   bitmask in the first feature set, even if this is implied by other selected
> > +   feature bits.  Similarly, any SME feature must include AARCH64_FEATURE_SME.
> > +
> > +   This restriction allows parse operands to select appropriate reg types to
> > +   exclude when parsing immediate values, and allows verify_constraints to
> > +   identify SVE instructions when selecting an error message for MOVPRFX
> > +   constraint violations.
> > +
> > +   Only the first feature set is checked in these cases, so that existing
> > +   opcode parsing is not changed when adding an alternative enabling feature
> > +   set.  */
> > +
> > +FEATURE_SET_OR_1(v8, AARCH64_FEATURE (V8))
> > +FEATURE_SET_OR_1(fp, AARCH64_FEATURE (FP))
> > +FEATURE_SET_OR_1(simd, AARCH64_FEATURE (SIMD))
> > +FEATURE_SET_OR_1(crc, AARCH64_FEATURE (CRC))
> > +FEATURE_SET_OR_1(lse, AARCH64_FEATURE (LSE))
> > +FEATURE_SET_OR_1(lse128, AARCH64_FEATURES (2, LSE, LSE128))
> > +FEATURE_SET_OR_1(lor, AARCH64_FEATURE (LOR))
> > +FEATURE_SET_OR_1(rdma, AARCH64_FEATURE (RDMA))
> > +FEATURE_SET_OR_1(fp_f16, AARCH64_FEATURES (2, F16, FP))
> > +FEATURE_SET_OR_1(simd_f16, AARCH64_FEATURES (2, F16, SIMD))
> > +FEATURE_SET_OR_1(sve, AARCH64_FEATURE (SVE))
> > +FEATURE_SET_OR_1(pauth, AARCH64_FEATURE (PAUTH))
> > +FEATURE_SET_OR_1(compnum, AARCH64_FEATURE (COMPNUM))
> > +FEATURE_SET_OR_1(jscvt, AARCH64_FEATURE (JSCVT))
> > +FEATURE_SET_OR_1(rcpc, AARCH64_FEATURE (RCPC))
> > +FEATURE_SET_OR_1(rcpc2, AARCH64_FEATURE (RCPC2))
> > +FEATURE_SET_OR_1(dotprod, AARCH64_FEATURE (DOTPROD))
> > +FEATURE_SET_OR_1(sha2, AARCH64_FEATURES (2, V8, SHA2))
> > +FEATURE_SET_OR_1(aes, AARCH64_FEATURES (2, V8, AES))
> > +FEATURE_SET_OR_1(sm4, AARCH64_FEATURES (3, SM4, SIMD, FP))
> > +FEATURE_SET_OR_1(sha3, AARCH64_FEATURES (4, SHA2, SHA3, SIMD, FP))
> > +FEATURE_SET_OR_1(fp_16_v8_2a, AARCH64_FEATURES (3, F16_FML, F16, FP))
> > +FEATURE_SET_OR_1(flagmanip, AARCH64_FEATURE (FLAGMANIP))
> > +FEATURE_SET_OR_1(frintts, AARCH64_FEATURE (FRINTTS))
> > +FEATURE_SET_OR_1(sb, AARCH64_FEATURE (SB))
> > +FEATURE_SET_OR_1(predres, AARCH64_FEATURE (PREDRES))
> > +FEATURE_SET_OR_1(predres2, AARCH64_FEATURES (2, PREDRES, PREDRES2))
> > +FEATURE_SET_OR_1(memtag, AARCH64_FEATURE (MEMTAG))
> > +FEATURE_SET_OR_1(bfloat16, AARCH64_FEATURE (BFLOAT16))
> > +FEATURE_SET_OR_1(bfloat16_sve, AARCH64_FEATURES (2, BFLOAT16, SVE))
> > +FEATURE_SET_OR_1(tme, AARCH64_FEATURE (TME))
> > +FEATURE_SET_OR_1(sve2, AARCH64_FEATURE (SVE2))
> > +FEATURE_SET_OR_1(sve2aes, AARCH64_FEATURES (2, SVE2, SVE2_AES))
> > +FEATURE_SET_OR_1(sve2sha3, AARCH64_FEATURES (2, SVE2, SVE2_SHA3))
> > +FEATURE_SET_OR_1(sve2sm4, AARCH64_FEATURES (2, SVE2, SVE2_SM4))
> > +FEATURE_SET_OR_1(sve2bitperm, AARCH64_FEATURES (2, SVE2, SVE2_BITPERM))
> > +FEATURE_SET_OR_1(sme, AARCH64_FEATURES (2, SVE2, SME))
> > +FEATURE_SET_OR_1(sme_f64f64, AARCH64_FEATURES (3, SVE2, SME, SME_F64F64))
> > +FEATURE_SET_OR_1(sme_i16i64, AARCH64_FEATURES (3, SVE2, SME, SME_I16I64))
> > +FEATURE_SET_OR_1(sme2, AARCH64_FEATURES (3, SVE2, SME, SME2))
> > +FEATURE_SET_OR_1(sme2_i16i64, AARCH64_FEATURES (2, SME2, SME_I16I64))
> > +FEATURE_SET_OR_1(sme2_f64f64, AARCH64_FEATURES (2, SME2, SME_F64F64))
> > +FEATURE_SET_OR_1(i8mm, AARCH64_FEATURE (I8MM))
> > +FEATURE_SET_OR_1(i8mm_sve, AARCH64_FEATURES (2, I8MM, SVE))
> > +FEATURE_SET_OR_1(f32mm_sve, AARCH64_FEATURES (2, F32MM, SVE))
> > +FEATURE_SET_OR_1(f64mm_sve, AARCH64_FEATURES (2, F64MM, SVE))
> > +FEATURE_SET_OR_1(v8r, AARCH64_FEATURE (V8R))
> > +FEATURE_SET_OR_1(ls64, AARCH64_FEATURE (LS64))
> > +FEATURE_SET_OR_1(flagm, AARCH64_FEATURE (FLAGM))
> > +FEATURE_SET_OR_1(xs, AARCH64_FEATURE (XS))
> > +FEATURE_SET_OR_1(wfxt, AARCH64_FEATURE (WFXT))
> > +FEATURE_SET_OR_1(mops, AARCH64_FEATURE (MOPS))
> > +FEATURE_SET_OR_1(mops_memtag, AARCH64_FEATURES (2, MOPS, MEMTAG))
> > +FEATURE_SET_OR_1(hbc, AARCH64_FEATURE (HBC))
> > +FEATURE_SET_OR_1(cssc, AARCH64_FEATURE (CSSC))
> > +FEATURE_SET_OR_1(chk, AARCH64_FEATURE (CHK))
> > +FEATURE_SET_OR_1(gcs, AARCH64_FEATURE (GCS))
> > +FEATURE_SET_OR_1(ite, AARCH64_FEATURE (ITE))
> > +FEATURE_SET_OR_1(d128, AARCH64_FEATURE (D128))
> > +FEATURE_SET_OR_1(the, AARCH64_FEATURE (THE))
> > +FEATURE_SET_OR_1(d128_the, AARCH64_FEATURES (2, D128, THE))
> > +FEATURE_SET_OR_1(b16b16, AARCH64_FEATURE (B16B16))
> > +FEATURE_SET_OR_1(sme2p1, AARCH64_FEATURE (SME2p1))
> > +FEATURE_SET_OR_1(sve2p1, AARCH64_FEATURE (SVE2p1))
> > +FEATURE_SET_OR_1(rcpc3, AARCH64_FEATURE (RCPC3))
> > +FEATURE_SET_OR_1(cpa, AARCH64_FEATURE (CPA))
> > +FEATURE_SET_OR_1(cpa_sve, AARCH64_FEATURES (2, CPA, SVE))
> > +FEATURE_SET_OR_1(faminmax, AARCH64_FEATURE (FAMINMAX))
> > +FEATURE_SET_OR_1(faminmax_sve2, AARCH64_FEATURES (2, FAMINMAX, SVE2))
> > +FEATURE_SET_OR_1(faminmax_sme2, AARCH64_FEATURES (3, SVE2, FAMINMAX, SME2))
> > +FEATURE_SET_OR_1(fp8, AARCH64_FEATURE (FP8))
> > +FEATURE_SET_OR_1(fp8_sve2, AARCH64_FEATURES (2, FP8, SVE2))
> > +FEATURE_SET_OR_1(fp8_sme2, AARCH64_FEATURES (2, FP8, SME2))
> > +FEATURE_SET_OR_1(lut, AARCH64_FEATURE (LUT))
> > +FEATURE_SET_OR_1(lut_sve2, AARCH64_FEATURES (2, LUT, SVE2))
> >  
> >  #define CORE		&aarch64_feature_v8
> >  #define FP		&aarch64_feature_fp
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-06-12 14:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-04 13:25 [PATCH] aarch64: Enable gating instructions with FEAT_X || FEAT_Y Andrew Carlotti
2024-06-12 13:31 ` Richard Earnshaw (lists)
2024-06-12 14:51   ` Andrew Carlotti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).