From: Srinath Parvathaneni <srinath.parvathaneni@arm.com>
To: binutils@sourceware.org
Cc: richard.earnshaw@arm.com, nickc@redhat.com
Subject: [PATCH 6/6][Binutils] aarch64: Add SVE2.1 Contiguous load/store instructions.
Date: Mon, 15 Jan 2024 09:40:11 +0000 [thread overview]
Message-ID: <6caee7e1-c16d-402d-9a14-e55b97244128@arm.com> (raw)
In-Reply-To: <73155200-f7c2-4226-b4be-4a320ea82044@arm.com>
[-- Attachment #1: Type: text/plain, Size: 223 bytes --]
Hi,
This patch add support for SVE2.1 instructions ld1q,
ld2q, ld3q and ld4q, st1q, st2q, st3q and st4q.
Regression testing for aarch64-none-elf target and found no regressions.
Ok for binutils-master?
Regards,
Srinath.
[-- Attachment #2: 6_6.patch --]
[-- Type: text/x-patch, Size: 12646 bytes --]
diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index 0665732fe03cc59df4ebd36ee1afbad08c22b72e..5eff6a754adea9c44432e3faacf31d20c4f6fb98 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -6749,6 +6749,9 @@ parse_operands (char *str, const aarch64_opcode *opcode)
case AARCH64_OPND_SVE_ZtxN:
case AARCH64_OPND_SME_Zdnx2:
case AARCH64_OPND_SME_Zdnx4:
+ case AARCH64_OPND_SME_Zt2:
+ case AARCH64_OPND_SME_Zt3:
+ case AARCH64_OPND_SME_Zt4:
case AARCH64_OPND_SME_Zmx2:
case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2:
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1-bad.l b/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
index 08aef46de61a6cbbe88ebac77da03ee97c9ebe7c..50a4bacc73c20324ae50b8688dd8cf5123a238ae 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
+++ b/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
@@ -80,3 +80,17 @@
.*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
.*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
.*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `ld1q Z0.Q,p4/Z,\[Z16.D,x0\]'
+.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,x2,lsl#4\]'
+.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,x4,lsl#4\]'
+.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,x6,lsl#4\]'
+.*: Error: selected processor does not support `st1q Z0.Q,p4,\[Z16.D,x0\]'
+.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,x2,lsl#4\]'
+.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,x4,lsl#4\]'
+.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,x6,lsl#4\]'
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1.d b/gas/testsuite/gas/aarch64/sve2p1-1.d
index 437ce9789834683963910141c1468ad46b273ded..daece899b38bba4daa2ca9e58dba2d551f6cf988 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1.d
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.d
@@ -89,3 +89,17 @@
.*: 64d7ac44 fminqv v4.2d, p3, z2.d
.*: 64d7b028 fminqv v8.2d, p4, z1.d
.*: 6497bc10 fminqv v16.4s, p7, z0.s
+.*: c400b200 ld1q z0.q, p4/z, \[z16.d, x0\]
+.*: a49ef000 ld2q {z0.q, z1.q}, p4/z, \[x0, #-4, mul vl\]
+.*: a51ef000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, #-4, mul vl\]
+.*: a59ef000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, #-4, mul vl\]
+.*: a4a2f000 ld2h {z0.h-z1.h}, p4/z, \[x0, #4, mul vl\]
+.*: a5249000 ld3q {z0.q, z1.q, z2.q}, p4/z, \[x0, x4, lsl #4\]
+.*: a5a69000 ld4q {z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, x6, lsl #4\]
+.*: e4203200 st1q z0.q, p4, \[z16.d, x0\]
+.*: e44e1000 st2q {z0.q, z1.q}, p4, \[x0, #-4, mul vl\]
+.*: e48e1000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, #-4, mul vl\]
+.*: e4ce1000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, #-4, mul vl\]
+.*: e4621000 st2q {z0.q, z1.q}, p4, \[x0, x2, lsl #4\]
+.*: e4a41000 st3q {z0.q, z1.q, z2.q}, p4, \[x0, x4, lsl #4\]
+.*: e4e61000 st4q {z0.q, z1.q, z2.q, z3.q}, p4, \[x0, x6, lsl #4\]
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1.s b/gas/testsuite/gas/aarch64/sve2p1-1.s
index b4908b2be38d927bb61a38e5aba681837d8417e1..2a1c7c107d757ae922cec5566adbace1f03e0dce 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1.s
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.s
@@ -90,3 +90,18 @@ fminqv v2.4s, p2, z4.s
fminqv v4.2d, p3, z2.d
fminqv v8.2d, p4, z1.d
fminqv v16.4s, p7, z0.s
+ld1q Z0.Q, p4/Z, [Z16.D, x0]
+ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, #-4, MUL VL]
+ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, #-4, MUL VL]
+ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, #-4, MUL VL]
+ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, x2, lsl #4]
+ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, x4, lsl #4]
+ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, x6, lsl #4]
+
+st1q Z0.Q, p4, [Z16.D, x0]
+st2q {Z0.Q, Z1.Q}, p4, [x0, #-4, MUL VL]
+st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, #-4, MUL VL]
+st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, #-4, MUL VL]
+st2q {Z0.Q, Z1.Q}, p4, [x0, x2, lsl #4]
+st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, x4, lsl #4]
+st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, x6, lsl #4]
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index de161db75d509b0ac96c604da7bc9743193d23b2..189bab5a92bcacb1ece30752817f666a34f5d81d 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -797,6 +797,9 @@ enum aarch64_opnd
AARCH64_OPND_MOPS_WB_Rn, /* Rn!, in bits [5, 9]. */
AARCH64_OPND_CSSC_SIMM8, /* CSSC signed 8-bit immediate. */
AARCH64_OPND_CSSC_UIMM8, /* CSSC unsigned 8-bit immediate. */
+ AARCH64_OPND_SME_Zt2, /* Qobule SVE vector register list. */
+ AARCH64_OPND_SME_Zt3, /* Trible SVE vector register list. */
+ AARCH64_OPND_SME_Zt4, /* Quad SVE vector register list. */
};
/* Qualifier constrains an operand. It either specifies a variant of an
diff --git a/opcodes/aarch64-dis.h b/opcodes/aarch64-dis.h
index 30212f2ae2c2759b5667e5a007912d22c4a702fc..48bebfea1e146e71d5fcae67c6558a35fe198e3f 100644
--- a/opcodes/aarch64-dis.h
+++ b/opcodes/aarch64-dis.h
@@ -139,6 +139,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
+AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist_zt);
#undef AARCH64_DECL_OPD_EXTRACTOR
diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
index 1381e7524402a867cee23becbaa693d1b293c28d..9e96ba35ed45a404426467b897e379ba44e7e51a 100644
--- a/opcodes/aarch64-dis.c
+++ b/opcodes/aarch64-dis.c
@@ -2160,6 +2160,21 @@ aarch64_ext_sve_reglist (const aarch64_operand *self,
return true;
}
+/* Decode {Zn.<T> , Zm.<T>}. The fields array specifies which field
+ to use for Zn. The opcode-dependent value specifies the number
+ of registers in the list. */
+bool
+aarch64_ext_sve_reglist_zt (const aarch64_operand *self,
+ aarch64_opnd_info *info, aarch64_insn code,
+ const aarch64_inst *inst ATTRIBUTE_UNUSED,
+ aarch64_operand_error *errors ATTRIBUTE_UNUSED)
+{
+ info->reglist.first_regno = extract_field (self->fields[0], code, 0);
+ info->reglist.num_regs = get_operand_specific_data (self);
+ info->reglist.stride = 1;
+ return true;
+}
+
/* Decode a strided register list. The first field holds the top bit
(0 or 16) and the second field holds the lower bits. The stride is
16 divided by the list length. */
diff --git a/opcodes/aarch64-opc.c b/opcodes/aarch64-opc.c
index 1d8ed26c7090e4b73489b15e74a911e33b54555c..13cd2bcd8a7a79508c340bcf618af61b622bc0fe 100644
--- a/opcodes/aarch64-opc.c
+++ b/opcodes/aarch64-opc.c
@@ -1870,6 +1870,9 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
case AARCH64_OPND_SME_Zmx4:
case AARCH64_OPND_SME_Znx2:
case AARCH64_OPND_SME_Znx4:
+ case AARCH64_OPND_SME_Zt2:
+ case AARCH64_OPND_SME_Zt3:
+ case AARCH64_OPND_SME_Zt4:
num = get_operand_specific_data (&aarch64_operands[type]);
if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
return 0;
@@ -3626,7 +3629,10 @@ print_register_list (char *buf, size_t size, const aarch64_opnd_info *opnd,
/* The hyphenated form is preferred for disassembly if there are
more than two registers in the list, and the register numbers
are monotonically increasing in increments of one. */
- if (stride == 1 && num_regs > 1)
+ if (stride == 1 && num_regs > 1
+ && ((opnd->type != AARCH64_OPND_SME_Zt2)
+ && (opnd->type != AARCH64_OPND_SME_Zt3)
+ && (opnd->type != AARCH64_OPND_SME_Zt4)))
snprintf (buf, size, "{%s-%s}%s",
style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
@@ -4071,6 +4077,9 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_SME_Znx4:
case AARCH64_OPND_SME_Ztx2_STRIDED:
case AARCH64_OPND_SME_Ztx4_STRIDED:
+ case AARCH64_OPND_SME_Zt2:
+ case AARCH64_OPND_SME_Zt3:
+ case AARCH64_OPND_SME_Zt4:
print_register_list (buf, size, opnd, "z", styler);
break;
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 383091ef199310b21a0741527eca50bb4a10e668..c5c5c612e508b29ab99d60e0fae20d2c8fcccde4 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -1781,6 +1781,14 @@
{ \
QLF3(S_S,P_Z,S_S), \
}
+#define OP_SVE_SZS_QD \
+{ \
+ QLF3(S_Q,P_Z,S_D), \
+}
+#define OP_SVE_SUS_QD \
+{ \
+ QLF3(S_Q,NIL,S_D), \
+}
#define OP_SVE_SBB \
{ \
QLF3(S_S,S_B,S_B), \
@@ -6353,6 +6361,21 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
+ SVE2p1_INSNC("ld1q",0xc400a000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SZS_QD, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld2q",0xa490e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld3q",0xa510e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld4q",0xa590e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld2q",0xa4a0e000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld3q",0xa5208000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("ld4q",0xa5a08000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+
+ SVE2p1_INSNC("st1q",0xe4202000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SUS_QD, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st2q",0xe4400000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st3q",0xe4800000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st4q",0xe4c00000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
{0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
};
@@ -6989,4 +7012,13 @@ const struct aarch64_opcode aarch64_opcode_table[] =
Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8), \
"an 8-bit signed immediate") \
Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8), \
- "an 8-bit unsigned immediate")
+ "an 8-bit unsigned immediate") \
+ X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt2", \
+ 2 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
+ "a list of 2 SVE vector registers") \
+ X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt3", \
+ 3 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
+ "a list of 3 SVE vector registers") \
+ X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt4", \
+ 4 << OPD_F_OD_LSB, F(FLD_SVE_Zt), \
+ "a list of 4 SVE vector registers")
next prev parent reply other threads:[~2024-01-15 9:40 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-15 9:28 [PATCH 1/6] [Binutils] aarch64: Add support for FEAT_B16B16 instructions Srinath Parvathaneni
2024-01-15 9:34 ` [PATCH 2/6][Binutils] aarch64: Add support for FEAT_SME2p1 instructions Srinath Parvathaneni
2024-01-15 9:35 ` [PATCH 3/6][Binutils] aarch64: Add support for FEAT_SVE2p1 Srinath Parvathaneni
2024-01-15 9:37 ` [PATCH 4/6][Binutils] aarch64: Add SVE2.1 dupq, eorqv and extq instructions Srinath Parvathaneni
2024-01-15 9:38 ` PATCH 5/6][Binutils] aarch64: Add SVE2.1 fmin and fmax instructions Srinath Parvathaneni
2024-01-15 9:40 ` Srinath Parvathaneni [this message]
2024-01-15 11:46 ` [PATCH 1/6] [Binutils] aarch64: Add support for FEAT_B16B16 instructions Nick Clifton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6caee7e1-c16d-402d-9a14-e55b97244128@arm.com \
--to=srinath.parvathaneni@arm.com \
--cc=binutils@sourceware.org \
--cc=nickc@redhat.com \
--cc=richard.earnshaw@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).