diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c index 59cf887e6ba007ed2f516fa6975d66bc88579107..ff17e9391cd9f5770bd7d4c4ad8ba7281b3e8a87 100644 --- a/gas/config/tc-arm.c +++ b/gas/config/tc-arm.c @@ -510,7 +510,10 @@ struct arm_it unsigned isreg : 1; /* Operand was a register. */ unsigned immisreg : 2; /* .imm field is a second register. 0: imm, 1: gpr, 2: MVE Q-register. */ - unsigned isscalar : 1; /* Operand is a (Neon) scalar. */ + unsigned isscalar : 2; /* Operand is a (SIMD) scalar: + 0) not scalar, + 1) Neon scalar, + 2) MVE scalar. */ unsigned immisalign : 1; /* Immediate is an alignment specifier. */ unsigned immisfloat : 1; /* Immediate was parsed as a float. */ /* Note: we abuse "regisimm" to mean "is Neon register" in VMOV @@ -1656,9 +1659,14 @@ parse_typed_reg_or_scalar (char **ccp, enum arm_reg_type type, { if (type != REG_TYPE_VFD && !(type == REG_TYPE_VFS - && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2))) + && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2)) + && !(type == REG_TYPE_NQ + && ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext))) { - first_error (_("only D registers may be indexed")); + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + first_error (_("only D and Q registers may be indexed")); + else + first_error (_("only D registers may be indexed")); return FAIL; } @@ -1747,27 +1755,41 @@ arm_typed_reg_parse (char **ccp, enum arm_reg_type type, just do easy checks here, and do further checks later. */ static int -parse_scalar (char **ccp, int elsize, struct neon_type_el *type) +parse_scalar (char **ccp, int elsize, struct neon_type_el *type, enum + arm_reg_type reg_type) { int reg; char *str = *ccp; struct neon_typed_alias atype; - enum arm_reg_type reg_type = REG_TYPE_VFD; - - if (elsize == 4) - reg_type = REG_TYPE_VFS; + unsigned reg_size; reg = parse_typed_reg_or_scalar (&str, reg_type, NULL, &atype); + switch (reg_type) + { + case REG_TYPE_VFS: + reg_size = 32; + break; + case REG_TYPE_VFD: + reg_size = 64; + break; + case REG_TYPE_MQ: + reg_size = 128; + break; + default: + gas_assert (0); + return FAIL; + } + if (reg == FAIL || (atype.defined & NTA_HASINDEX) == 0) return FAIL; - if (atype.index == NEON_ALL_LANES) + if (reg_type != REG_TYPE_MQ && atype.index == NEON_ALL_LANES) { first_error (_("scalar must have an index")); return FAIL; } - else if (atype.index >= 64 / elsize) + else if (atype.index >= reg_size / elsize) { first_error (_("scalar index out of range")); return FAIL; @@ -6542,7 +6564,61 @@ parse_neon_mov (char **str, int *which_operand) char *ptr = *str; struct neon_type_el optype; - if ((val = parse_scalar (&ptr, 8, &optype)) != FAIL) + if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL) + { + /* Cases 17 or 19. */ + inst.operands[i].reg = val; + inst.operands[i].isvec = 1; + inst.operands[i].isscalar = 2; + inst.operands[i].vectype = optype; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL) + { + /* Case 17: VMOV.
, */ + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].present = 1; + } + else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL) + { + /* Case 19: VMOV , , , */ + inst.operands[i].reg = val; + inst.operands[i].isvec = 1; + inst.operands[i].isscalar = 2; + inst.operands[i].vectype = optype; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) == FAIL) + goto wanted_arm; + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) == FAIL) + goto wanted_arm; + + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].present = 1; + } + else + { + first_error (_("expected ARM or MVE vector register")); + return FAIL; + } + } + else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_VFD)) != FAIL) { /* Case 4: VMOV. , . */ inst.operands[i].reg = val; @@ -6560,8 +6636,10 @@ parse_neon_mov (char **str, int *which_operand) inst.operands[i].isreg = 1; inst.operands[i].present = 1; } - else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, &optype)) - != FAIL) + else if (((val = arm_typed_reg_parse (&ptr, REG_TYPE_NSDQ, &rtype, &optype)) + != FAIL) + || ((val = arm_typed_reg_parse (&ptr, REG_TYPE_MQ, &rtype, &optype)) + != FAIL)) { /* Cases 0, 1, 2, 3, 5 (D only). */ if (skip_past_comma (&ptr) == FAIL) @@ -6658,7 +6736,7 @@ parse_neon_mov (char **str, int *which_operand) } else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL) { - /* Cases 6, 7. */ + /* Cases 6, 7, 16, 18. */ inst.operands[i].reg = val; inst.operands[i].isreg = 1; inst.operands[i++].present = 1; @@ -6666,7 +6744,15 @@ parse_neon_mov (char **str, int *which_operand) if (skip_past_comma (&ptr) == FAIL) goto wanted_comma; - if ((val = parse_scalar (&ptr, 8, &optype)) != FAIL) + if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) != FAIL) + { + /* Case 18: VMOV.
, */ + inst.operands[i].reg = val; + inst.operands[i].isscalar = 2; + inst.operands[i].present = 1; + inst.operands[i].vectype = optype; + } + else if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_VFD)) != FAIL) { /* Case 6: VMOV.
, */ inst.operands[i].reg = val; @@ -6676,7 +6762,6 @@ parse_neon_mov (char **str, int *which_operand) } else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN)) != FAIL) { - /* Case 7: VMOV , , */ inst.operands[i].reg = val; inst.operands[i].isreg = 1; inst.operands[i++].present = 1; @@ -6685,37 +6770,70 @@ parse_neon_mov (char **str, int *which_operand) goto wanted_comma; if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFSD, &rtype, &optype)) - == FAIL) + != FAIL) { - first_error (_(reg_expected_msgs[REG_TYPE_VFSD])); - return FAIL; - } - - inst.operands[i].reg = val; - inst.operands[i].isreg = 1; - inst.operands[i].isvec = 1; - inst.operands[i].issingle = (rtype == REG_TYPE_VFS); - inst.operands[i].vectype = optype; - inst.operands[i].present = 1; + /* Case 7: VMOV , , */ - if (rtype == REG_TYPE_VFS) - { - /* Case 14. */ - i++; - if (skip_past_comma (&ptr) == FAIL) - goto wanted_comma; - if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL, - &optype)) == FAIL) - { - first_error (_(reg_expected_msgs[REG_TYPE_VFS])); - return FAIL; - } inst.operands[i].reg = val; inst.operands[i].isreg = 1; inst.operands[i].isvec = 1; - inst.operands[i].issingle = 1; + inst.operands[i].issingle = (rtype == REG_TYPE_VFS); inst.operands[i].vectype = optype; inst.operands[i].present = 1; + + if (rtype == REG_TYPE_VFS) + { + /* Case 14. */ + i++; + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL, + &optype)) == FAIL) + { + first_error (_(reg_expected_msgs[REG_TYPE_VFS])); + return FAIL; + } + inst.operands[i].reg = val; + inst.operands[i].isreg = 1; + inst.operands[i].isvec = 1; + inst.operands[i].issingle = 1; + inst.operands[i].vectype = optype; + inst.operands[i].present = 1; + } + } + else + { + if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) + != FAIL) + { + /* Case 16: VMOV , , , */ + inst.operands[i].reg = val; + inst.operands[i].isvec = 1; + inst.operands[i].isscalar = 2; + inst.operands[i].vectype = optype; + inst.operands[i++].present = 1; + + if (skip_past_comma (&ptr) == FAIL) + goto wanted_comma; + + if ((val = parse_scalar (&ptr, 8, &optype, REG_TYPE_MQ)) + == FAIL) + { + first_error (_(reg_expected_msgs[REG_TYPE_MQ])); + return FAIL; + } + inst.operands[i].reg = val; + inst.operands[i].isvec = 1; + inst.operands[i].isscalar = 2; + inst.operands[i].vectype = optype; + inst.operands[i].present = 1; + } + else + { + first_error (_("VFP single, double or MVE vector register" + " expected")); + return FAIL; + } } } else if ((val = arm_typed_reg_parse (&ptr, REG_TYPE_VFS, NULL, &optype)) @@ -6990,10 +7108,11 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) } \ while (0) -#define po_scalar_or_goto(elsz, label) \ +#define po_scalar_or_goto(elsz, label, reg_type) \ do \ { \ - val = parse_scalar (& str, elsz, & inst.operands[i].vectype); \ + val = parse_scalar (& str, elsz, & inst.operands[i].vectype, \ + reg_type); \ if (val == FAIL) \ goto label; \ inst.operands[i].reg = val; \ @@ -7141,7 +7260,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) break; /* Neon scalar. Using an element size of 8 means that some invalid scalars are accepted here, so deal with those in later code. */ - case OP_RNSC: po_scalar_or_goto (8, failure); break; + case OP_RNSC: po_scalar_or_goto (8, failure, REG_TYPE_VFD); break; case OP_RNDQ_I0: { @@ -7174,7 +7293,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RR_RNSC: { - po_scalar_or_goto (8, try_rr); + po_scalar_or_goto (8, try_rr, REG_TYPE_VFD); break; try_rr: po_reg_or_fail (REG_TYPE_RN); @@ -7187,19 +7306,21 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) try_rnsdq_rnsc: case OP_RNSDQ_RNSC: { - po_scalar_or_goto (8, try_nsdq); + po_scalar_or_goto (8, try_nsdq, REG_TYPE_VFD); + inst.error = 0; break; try_nsdq: po_reg_or_fail (REG_TYPE_NSDQ); + inst.error = 0; } break; case OP_RNSD_RNSC: { - po_scalar_or_goto (8, try_s_scalar); + po_scalar_or_goto (8, try_s_scalar, REG_TYPE_VFD); break; try_s_scalar: - po_scalar_or_goto (4, try_nsd); + po_scalar_or_goto (4, try_nsd, REG_TYPE_VFS); break; try_nsd: po_reg_or_fail (REG_TYPE_NSD); @@ -7208,7 +7329,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RNDQ_RNSC: { - po_scalar_or_goto (8, try_ndq); + po_scalar_or_goto (8, try_ndq, REG_TYPE_VFD); break; try_ndq: po_reg_or_fail (REG_TYPE_NDQ); @@ -7217,7 +7338,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb) case OP_RND_RNSC: { - po_scalar_or_goto (8, try_vfd); + po_scalar_or_goto (8, try_vfd, REG_TYPE_VFD); break; try_vfd: po_reg_or_fail (REG_TYPE_VFD); @@ -10170,6 +10291,10 @@ do_sxth (void) static void do_vfp_sp_monadic (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd); encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm); } @@ -10205,6 +10330,10 @@ do_vfp_sp_dp_cvt (void) static void do_vfp_reg_from_sp (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + inst.instruction |= inst.operands[0].reg << 12; encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn); } @@ -10222,6 +10351,10 @@ do_vfp_reg2_from_sp2 (void) static void do_vfp_sp_from_reg (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1xd) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sn); inst.instruction |= inst.operands[1].reg << 12; } @@ -10324,6 +10457,10 @@ do_vfp_xp_ldstmdb (void) static void do_vfp_dp_rd_rm (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm); } @@ -10345,6 +10482,10 @@ do_vfp_dp_rd_rn (void) static void do_vfp_dp_rd_rn_rm (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd); encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn); encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dm); @@ -10359,6 +10500,10 @@ do_vfp_dp_rd (void) static void do_vfp_dp_rm_rd_rn (void) { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dm); encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd); encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dn); @@ -13961,6 +14106,10 @@ do_t_loloop (void) #define M_MNEM_vldrh 0xec100e10 #define M_MNEM_vldrw 0xec100e40 #define M_MNEM_vldrd 0xec100e50 +#define M_MNEM_vmovlt 0xeea01f40 +#define M_MNEM_vmovlb 0xeea00f40 +#define M_MNEM_vmovnt 0xfe311e81 +#define M_MNEM_vmovnb 0xfe310e81 /* Neon instruction encoder helpers. */ @@ -14125,6 +14274,8 @@ NEON_ENC_TAB - a table used to drive neon_select_shape. */ #define NEON_SHAPE_DEF \ + X(4, (R, R, S, S), QUAD), \ + X(4, (S, S, R, R), QUAD), \ X(3, (R, Q, Q), QUAD), \ X(3, (D, D, D), DOUBLE), \ X(3, (Q, Q, Q), QUAD), \ @@ -17853,6 +18004,67 @@ do_neon_dup (void) } } +static void +do_mve_mov (int toQ) +{ + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = MVE_UNPREDICABLE_INSN; + + unsigned Rt = 0, Rt2 = 1, Q0 = 2, Q1 = 3; + if (toQ) + { + Q0 = 0; + Q1 = 1; + Rt = 2; + Rt2 = 3; + } + + constraint (inst.operands[Q0].reg != inst.operands[Q1].reg + 2, + _("Index one must be [2,3] and index two must be two less than" + " index one.")); + constraint (inst.operands[Rt].reg == inst.operands[Rt2].reg, + _("General purpose registers may not be the same")); + constraint (inst.operands[Rt].reg == REG_SP + || inst.operands[Rt2].reg == REG_SP, + BAD_SP); + constraint (inst.operands[Rt].reg == REG_PC + || inst.operands[Rt2].reg == REG_PC, + BAD_PC); + + inst.instruction = 0xec000f00; + inst.instruction |= HI1 (inst.operands[Q1].reg / 32) << 23; + inst.instruction |= !!toQ << 20; + inst.instruction |= inst.operands[Rt2].reg << 16; + inst.instruction |= LOW4 (inst.operands[Q1].reg / 32) << 13; + inst.instruction |= (inst.operands[Q1].reg % 4) << 4; + inst.instruction |= inst.operands[Rt].reg; +} + +static void +do_mve_movn (void) +{ + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; + + if (inst.cond > COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + else + inst.pred_insn_type = MVE_OUTSIDE_PRED_INSN; + + struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_I16 | N_I32 + | N_KEY); + + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= (neon_logbits (et.size) - 1) << 18; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.is_neon = 1; + +} + /* VMOV has particularly many variations. It can be one of: 0. VMOV , 1. VMOV
, @@ -17882,6 +18094,10 @@ do_neon_dup (void) (Two ARM regs to two VFP singles.) 15. VMOV , , , (Two VFP singles to two ARM regs.) + 16. VMOV , , , + 17. VMOV , , , + 18. VMOV.
, + 19. VMOV.
, These cases can be disambiguated using neon_select_shape, except cases 1/9 and 3/11 which depend on the operand type too. @@ -17897,10 +18113,11 @@ do_neon_dup (void) static void do_neon_mov (void) { - enum neon_shape rs = neon_select_shape (NS_RRFF, NS_FFRR, NS_DRR, NS_RRD, - NS_QQ, NS_DD, NS_QI, NS_DI, NS_SR, - NS_RS, NS_FF, NS_FI, NS_RF, NS_FR, - NS_HR, NS_RH, NS_HI, NS_NULL); + enum neon_shape rs = neon_select_shape (NS_RRSS, NS_SSRR, NS_RRFF, NS_FFRR, + NS_DRR, NS_RRD, NS_QQ, NS_DD, NS_QI, + NS_DI, NS_SR, NS_RS, NS_FF, NS_FI, + NS_RF, NS_FR, NS_HR, NS_RH, NS_HI, + NS_NULL); struct neon_type_el et; const char *ldconst = 0; @@ -17919,7 +18136,7 @@ do_neon_mov (void) case NS_QQ: /* case 0/1. */ { - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) + if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH)) return; /* The architecture manual I have doesn't explicitly state which value the U bit should have for register->register moves, but @@ -17949,7 +18166,7 @@ do_neon_mov (void) /* fall through. */ case NS_QI: /* case 2/3. */ - if (vfp_or_neon_is_neon (NEON_CHECK_CC | NEON_CHECK_ARCH) == FAIL) + if (check_simd_pred_availability (0, NEON_CHECK_CC | NEON_CHECK_ARCH)) return; inst.instruction = 0x0800010; neon_move_immediate (); @@ -17976,12 +18193,31 @@ do_neon_mov (void) et = neon_check_type (2, NS_NULL, N_8 | N_16 | N_32 | N_KEY, N_EQK); logsize = neon_logbits (et.size); - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1), - _(BAD_FPU)); - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1) - && et.size != 32, _(BAD_FPU)); + if (et.size != 32) + { + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && vfp_or_neon_is_neon (NEON_CHECK_ARCH) == FAIL) + return; + } + else + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + } + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + if (inst.operands[1].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[1].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + } + unsigned size = inst.operands[0].isscalar == 1 ? 64 : 128; + constraint (et.type == NT_invtype, _("bad type for scalar")); - constraint (x >= 64 / et.size, _("scalar index out of range")); + constraint (x >= size / et.size, _("scalar index out of range")); + switch (et.size) { @@ -17991,7 +18227,7 @@ do_neon_mov (void) default: ; } - bcdebits |= x << logsize; + bcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; inst.instruction = 0xe000b10; do_vfp_cond_or_thumb (); @@ -17999,12 +18235,14 @@ do_neon_mov (void) inst.instruction |= HI1 (dn) << 7; inst.instruction |= inst.operands[1].reg << 12; inst.instruction |= (bcdebits & 3) << 5; - inst.instruction |= (bcdebits >> 2) << 21; + inst.instruction |= ((bcdebits >> 2) & 3) << 21; + inst.instruction |= (x >> (3-logsize)) << 16; } break; case NS_DRR: /* case 5 (fmdrr). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2), + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), _(BAD_FPU)); inst.instruction = 0xc400b10; @@ -18036,12 +18274,32 @@ do_neon_mov (void) N_EQK, N_S8 | N_S16 | N_U8 | N_U16 | N_32 | N_KEY); logsize = neon_logbits (et.size); - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1), - _(BAD_FPU)); - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_v1) - && et.size != 32, _(BAD_FPU)); + if (et.size != 32) + { + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext) + && vfp_or_neon_is_neon (NEON_CHECK_CC + | NEON_CHECK_ARCH) == FAIL) + return; + } + else + { + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v1) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); + } + + if (ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + { + if (inst.operands[0].reg == REG_SP) + as_tsktsk (MVE_BAD_SP); + else if (inst.operands[0].reg == REG_PC) + as_tsktsk (MVE_BAD_PC); + } + + unsigned size = inst.operands[1].isscalar == 1 ? 64 : 128; + constraint (et.type == NT_invtype, _("bad type for scalar")); - constraint (x >= 64 / et.size, _("scalar index out of range")); + constraint (x >= size / et.size, _("scalar index out of range")); switch (et.size) { @@ -18051,7 +18309,7 @@ do_neon_mov (void) default: ; } - abcdebits |= x << logsize; + abcdebits |= (x & ((1 << (3-logsize)) - 1)) << logsize; inst.instruction = 0xe100b10; do_vfp_cond_or_thumb (); inst.instruction |= LOW4 (dn) << 16; @@ -18059,11 +18317,13 @@ do_neon_mov (void) inst.instruction |= inst.operands[0].reg << 12; inst.instruction |= (abcdebits & 3) << 5; inst.instruction |= (abcdebits >> 2) << 21; + inst.instruction |= (x >> (3-logsize)) << 16; } break; case NS_RRD: /* case 7 (fmrrd). */ - constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2), + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), _(BAD_FPU)); inst.instruction = 0xc500b10; @@ -18130,11 +18390,21 @@ do_neon_mov (void) do_scalar_fp16_v82_encode (); break; + case NS_RRSS: + do_mve_mov (0); + break; + case NS_SSRR: + do_mve_mov (1); + break; + /* The encoders for the fmrrs and fmsrr instructions expect three operands (one of which is a list), but we have parsed four. Do some fiddling to make the operands what do_vfp_reg2_from_sp2 and do_vfp_sp2_from_reg2 expect. */ case NS_RRFF: /* case 14 (fmrrs). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); constraint (inst.operands[3].reg != inst.operands[2].reg + 1, _("VFP registers must be adjacent")); inst.operands[2].imm = 2; @@ -18143,6 +18413,9 @@ do_neon_mov (void) break; case NS_FFRR: /* case 15 (fmsrr). */ + constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v2) + && !ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext), + _(BAD_FPU)); constraint (inst.operands[1].reg != inst.operands[0].reg + 1, _("VFP registers must be adjacent")); inst.operands[1] = inst.operands[2]; @@ -18162,6 +18435,39 @@ do_neon_mov (void) } } +static void +do_mve_movl (void) +{ + if (!(inst.operands[0].present && inst.operands[0].isquad + && inst.operands[1].present && inst.operands[1].isquad + && !inst.operands[2].present)) + { + inst.instruction = 0; + inst.cond = 0xb; + if (thumb_mode) + set_pred_insn_type (INSIDE_IT_INSN); + do_neon_mov (); + return; + } + + if (!ARM_CPU_HAS_FEATURE (cpu_variant, mve_ext)) + return; + + if (inst.cond != COND_ALWAYS) + inst.pred_insn_type = INSIDE_VPT_INSN; + + struct neon_type_el et = neon_check_type (2, NS_QQ, N_EQK, N_S8 | N_U8 + | N_S16 | N_U16 | N_KEY); + + inst.instruction |= (et.type == NT_unsigned) << 28; + inst.instruction |= HI1 (inst.operands[0].reg) << 22; + inst.instruction |= (neon_logbits (et.size) + 1) << 19; + inst.instruction |= LOW4 (inst.operands[0].reg) << 12; + inst.instruction |= HI1 (inst.operands[1].reg) << 5; + inst.instruction |= LOW4 (inst.operands[1].reg); + inst.is_neon = 1; +} + static void do_neon_rshift_round_imm (void) { @@ -21199,6 +21505,10 @@ static struct asm_barrier_opt barrier_opt_names[] = #define cCE(mnem, op, nops, ops, ae) \ { mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, ARM_VARIANT, do_##ae, do_##ae, 0 } +/* mov instructions that are shared between coprocessor and MVE. */ +#define mcCE(mnem, op, nops, ops, ae) \ + { #mnem, OPS##nops ops, OT_csuffix, 0x##op, 0xe##op, ARM_VARIANT, THUMB_VARIANT, do_##ae, do_##ae, 0 } + /* Legacy coprocessor instructions where conditional infix and conditional suffix are ambiguous. For consistency this includes all FPA instructions, not just the potentially ambiguous ones. */ @@ -22473,9 +22783,6 @@ static const struct asm_opcode insns[] = #define ARM_VARIANT & fpu_vfp_ext_v1xd /* VFP V1xD (single precision). */ /* Moves and type conversions. */ - cCE("fcpys", eb00a40, 2, (RVS, RVS), vfp_sp_monadic), - cCE("fmrs", e100a10, 2, (RR, RVS), vfp_reg_from_sp), - cCE("fmsr", e000a10, 2, (RVS, RR), vfp_sp_from_reg), cCE("fmstat", ef1fa10, 0, (), noargs), cCE("vmrs", ef00a10, 2, (APSR_RR, RVC), vmrs), cCE("vmsr", ee00a10, 2, (RVC, RR), vmsr), @@ -22547,7 +22854,6 @@ static const struct asm_opcode insns[] = #define ARM_VARIANT & fpu_vfp_ext_v1 /* VFP V1 (Double precision). */ /* Moves and type conversions. */ - cCE("fcpyd", eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm), cCE("fcvtds", eb70ac0, 2, (RVD, RVS), vfp_dp_sp_cvt), cCE("fcvtsd", eb70bc0, 2, (RVS, RVD), vfp_sp_dp_cvt), cCE("fmdhr", e200b10, 2, (RVD, RR), vfp_dp_rn_rd), @@ -22583,14 +22889,6 @@ static const struct asm_opcode insns[] = cCE("fcmped", eb40bc0, 2, (RVD, RVD), vfp_dp_rd_rm), cCE("fcmpezd", eb50bc0, 1, (RVD), vfp_dp_rd), -#undef ARM_VARIANT -#define ARM_VARIANT & fpu_vfp_ext_v2 - - cCE("fmsrr", c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2), - cCE("fmrrs", c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2), - cCE("fmdrr", c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn), - cCE("fmrrd", c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm), - /* Instructions which may belong to either the Neon or VFP instruction sets. Individual encoder functions perform additional architecture checks. */ #undef ARM_VARIANT @@ -22629,7 +22927,6 @@ static const struct asm_opcode insns[] = /* NOTE: All VMOV encoding is special-cased! */ - NCE(vmov, 0, 1, (VMOV), neon_mov), NCE(vmovq, 0, 1, (VMOV), neon_mov), #undef THUMB_VARIANT @@ -23373,11 +23670,24 @@ static const struct asm_opcode insns[] = mCEF(vldrw, _vldrw, 2, (RMQ, ADDRMVE), mve_vstr_vldr), mCEF(vldrd, _vldrd, 2, (RMQ, ADDRMVE), mve_vstr_vldr), + mCEF(vmovnt, _vmovnt, 2, (RMQ, RMQ), mve_movn), + mCEF(vmovnb, _vmovnb, 2, (RMQ, RMQ), mve_movn), + #undef ARM_VARIANT -#define ARM_VARIANT & fpu_vfp_ext_v1xd +#define ARM_VARIANT & fpu_vfp_ext_v1 #undef THUMB_VARIANT #define THUMB_VARIANT & arm_ext_v6t2 + mcCE(fcpyd, eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm), + +#undef ARM_VARIANT +#define ARM_VARIANT & fpu_vfp_ext_v1xd + + MNCE(vmov, 0, 1, (VMOV), neon_mov), + mcCE(fmrs, e100a10, 2, (RR, RVS), vfp_reg_from_sp), + mcCE(fmsr, e000a10, 2, (RVS, RR), vfp_sp_from_reg), + mcCE(fcpys, eb00a40, 2, (RVS, RVS), vfp_sp_monadic), + mCEF(vmullt, _vmullt, 3, (RNSDQMQ, oRNSDQMQ, RNSDQ_RNSC_MQ), mve_vmull), mnCEF(vadd, _vadd, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_addsub_if_i), mnCEF(vsub, _vsub, 3, (RNSDQMQ, oRNSDQMQ, RNSDQMQR), neon_addsub_if_i), @@ -23385,6 +23695,17 @@ static const struct asm_opcode insns[] = MNCEF(vabs, 1b10300, 2, (RNSDQMQ, RNSDQMQ), neon_abs_neg), MNCEF(vneg, 1b10380, 2, (RNSDQMQ, RNSDQMQ), neon_abs_neg), + mCEF(vmovlt, _vmovlt, 1, (VMOV), mve_movl), + mCEF(vmovlb, _vmovlb, 1, (VMOV), mve_movl), + +#undef ARM_VARIANT +#define ARM_VARIANT & fpu_vfp_ext_v2 + + mcCE(fmsrr, c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2), + mcCE(fmrrs, c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2), + mcCE(fmdrr, c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn), + mcCE(fmrrd, c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm), + #undef ARM_VARIANT #define ARM_VARIANT & fpu_vfp_ext_armv8xd mnUF(vcvta, _vcvta, 2, (RNSDQMQ, oRNSDQMQ), neon_cvta), diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-1.d b/gas/testsuite/gas/arm/mve-vmov-bad-1.d new file mode 100644 index 0000000000000000000000000000000000000000..a1933bf5dff7015f0dff118b8643b9f454c4dddb --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-1.d @@ -0,0 +1,5 @@ +#name: bad MVE VMOV (between general-purpose register and vector lane) +#as: -march=armv8.1-m.main+mve.fp +#error_output: mve-vmov-bad-1.l + +.*: +file format .*arm.* diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-1.l b/gas/testsuite/gas/arm/mve-vmov-bad-1.l new file mode 100644 index 0000000000000000000000000000000000000000..4cff35850df1f4a4b79d9936ce73482ec6220b08 --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-1.l @@ -0,0 +1,24 @@ +[^:]*: Assembler messages: +[^:]*:3: Warning: instruction is UNPREDICTABLE with SP operand +[^:]*:4: Warning: instruction is UNPREDICTABLE with PC operand +[^:]*:5: Error: bad type for scalar -- `vmov.64 q0\[0\],r0' +[^:]*:6: Error: scalar index out of range -- `vmov.8 q0\[16\],r0' +[^:]*:7: Error: scalar index out of range -- `vmov.16 q0\[8\],r0' +[^:]*:8: Error: scalar index out of range -- `vmov.32 q0\[4\],r0' +[^:]*:10: Error: syntax error -- `vmovt.8 q0\[0\],r0' +[^:]*:11: Error: syntax error -- `vmovt.8 q0\[0\],r0' +[^:]*:13: Error: instruction not allowed in IT block -- `vmov.8 q0\[0\],r0' +[^:]*:14: Warning: instruction is UNPREDICTABLE with SP operand +[^:]*:15: Warning: instruction is UNPREDICTABLE with PC operand +[^:]*:16: Error: bad type for scalar -- `vmov.u64 r0,q0\[0\]' +[^:]*:17: Error: bad type for scalar -- `vmov.s64 r0,q0\[0\]' +[^:]*:18: Error: bad type for scalar -- `vmov.64 r0,q0\[0\]' +[^:]*:19: Error: bad type for scalar -- `vmov.8 r0,q0\[0\]' +[^:]*:20: Error: bad type for scalar -- `vmov.16 r0,q0\[0\]' +[^:]*:21: Error: bad type for scalar -- `vmov.f16 r0,q0\[0\]' +[^:]*:22: Error: scalar index out of range -- `vmov.u8 r0,q0\[16\]' +[^:]*:23: Error: scalar index out of range -- `vmov.u16 r0,q0\[8\]' +[^:]*:24: Error: scalar index out of range -- `vmov.32 r0,q0\[4\]' +[^:]*:26: Error: syntax error -- `vmovt.u8 r0,q0\[0\]' +[^:]*:27: Error: syntax error -- `vmovt.u8 r0,q0\[0\]' +[^:]*:29: Error: instruction not allowed in IT block -- `vmov.u8 r0,q0\[0\]' diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-1.s b/gas/testsuite/gas/arm/mve-vmov-bad-1.s new file mode 100644 index 0000000000000000000000000000000000000000..5d58d498f28aefbb7af4623d0004ce4a9ce4c4d9 --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-1.s @@ -0,0 +1,29 @@ +.syntax unified +.thumb +vmov.8 q0[0], sp +vmov.8 q0[0], pc +vmov.64 q0[0], r0 +vmov.8 q0[16], r0 +vmov.16 q0[8], r0 +vmov.32 q0[4], r0 +vpst +vmovt.8 q0[0], r0 +vmovt.8 q0[0], r0 +it eq +vmov.8 q0[0], r0 +vmov.u8 sp, q0[0] +vmov.u8 pc, q0[0] +vmov.u64 r0, q0[0] +vmov.s64 r0, q0[0] +vmov.64 r0, q0[0] +vmov.8 r0, q0[0] +vmov.16 r0, q0[0] +vmov.f16 r0, q0[0] +vmov.u8 r0, q0[16] +vmov.u16 r0, q0[8] +vmov.32 r0, q0[4] +vpst +vmovt.u8 r0, q0[0] +vmovt.u8 r0, q0[0] +it eq +vmov.u8 r0, q0[0] diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-2.d b/gas/testsuite/gas/arm/mve-vmov-bad-2.d new file mode 100644 index 0000000000000000000000000000000000000000..c2b02d00100760a8ed1cb7975fc40364b4ed0cc9 --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-2.d @@ -0,0 +1,5 @@ +#name: bad MVE VMOV (between two 32-bit vector lanes to two general-purpose registers) +#as: -march=armv8.1-m.main+mve.fp +#error_output: mve-vmov-bad-2.l + +.*: +file format .*arm.* diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-2.l b/gas/testsuite/gas/arm/mve-vmov-bad-2.l new file mode 100644 index 0000000000000000000000000000000000000000..2f4bdc8293a04e3814a9cc80871af51048b23c44 --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-2.l @@ -0,0 +1,10 @@ +[^:]*: Assembler messages: +[^:]*:3: Error: General purpose registers may not be the same -- `vmov r0,r0,q0\[2\],q0\[0\]' +[^:]*:4: Error: r13 not allowed here -- `vmov sp,r0,q0\[2\],q0\[0\]' +[^:]*:5: Error: r13 not allowed here -- `vmov r0,sp,q0\[2\],q0\[0\]' +[^:]*:6: Error: r15 not allowed here -- `vmov pc,r0,q0\[2\],q0\[0\]' +[^:]*:7: Error: r15 not allowed here -- `vmov r0,pc,q0\[2\],q0\[0\]' +[^:]*:8: Error: r13 not allowed here -- `vmov q0\[2\],q0\[0\],sp,r0' +[^:]*:9: Error: r13 not allowed here -- `vmov q0\[2\],q0\[0\],r0,sp' +[^:]*:10: Error: r15 not allowed here -- `vmov q0\[2\],q0\[0\],pc,r0' +[^:]*:11: Error: r15 not allowed here -- `vmov q0\[2\],q0\[0\],r0,pc' diff --git a/gas/testsuite/gas/arm/mve-vmov-bad-2.s b/gas/testsuite/gas/arm/mve-vmov-bad-2.s new file mode 100644 index 0000000000000000000000000000000000000000..20db239cbb9259e680a4a1b2fcc90d61c7aad60e --- /dev/null +++ b/gas/testsuite/gas/arm/mve-vmov-bad-2.s @@ -0,0 +1,11 @@ +.syntax unified +.thumb +vmov r0, r0, q0[2], q0[0] +vmov sp, r0, q0[2], q0[0] +vmov r0, sp, q0[2], q0[0] +vmov pc, r0, q0[2], q0[0] +vmov r0, pc, q0[2], q0[0] +vmov q0[2], q0[0], sp, r0 +vmov q0[2], q0[0], r0, sp +vmov q0[2], q0[0], pc, r0 +vmov q0[2], q0[0], r0, pc