public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
From: Srinath Parvathaneni <srinath.parvathaneni@arm.com>
To: binutils@sourceware.org
Cc: richard.earnshaw@arm.com, nickc@redhat.com
Subject: [PATCH 6/6][Binutils] aarch64: Add SVE2.1 Contiguous load/store instructions.
Date: Mon, 15 Jan 2024 09:40:11 +0000	[thread overview]
Message-ID: <6caee7e1-c16d-402d-9a14-e55b97244128@arm.com> (raw)
In-Reply-To: <73155200-f7c2-4226-b4be-4a320ea82044@arm.com>

[-- Attachment #1: Type: text/plain, Size: 223 bytes --]

Hi,

This patch add support for SVE2.1 instructions ld1q,
ld2q, ld3q and ld4q, st1q, st2q, st3q and st4q.

Regression testing for aarch64-none-elf target and found no regressions.

Ok for binutils-master?

Regards,
Srinath.

[-- Attachment #2: 6_6.patch --]
[-- Type: text/x-patch, Size: 12646 bytes --]

diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index 0665732fe03cc59df4ebd36ee1afbad08c22b72e..5eff6a754adea9c44432e3faacf31d20c4f6fb98 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -6749,6 +6749,9 @@ parse_operands (char *str, const aarch64_opcode *opcode)
 	case AARCH64_OPND_SVE_ZtxN:
 	case AARCH64_OPND_SME_Zdnx2:
 	case AARCH64_OPND_SME_Zdnx4:
+	case AARCH64_OPND_SME_Zt2:
+	case AARCH64_OPND_SME_Zt3:
+	case AARCH64_OPND_SME_Zt4:
 	case AARCH64_OPND_SME_Zmx2:
 	case AARCH64_OPND_SME_Zmx4:
 	case AARCH64_OPND_SME_Znx2:
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1-bad.l b/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
index 08aef46de61a6cbbe88ebac77da03ee97c9ebe7c..50a4bacc73c20324ae50b8688dd8cf5123a238ae 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
+++ b/gas/testsuite/gas/aarch64/sve2p1-1-bad.l
@@ -80,3 +80,17 @@
 .*: Error: selected processor does not support `fminqv v4.2d,p3,z2.d'
 .*: Error: selected processor does not support `fminqv v8.2d,p4,z1.d'
 .*: Error: selected processor does not support `fminqv v16.4s,p7,z0.s'
+.*: Error: selected processor does not support `ld1q Z0.Q,p4/Z,\[Z16.D,x0\]'
+.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `ld2q {Z0.Q,Z1.Q},p4/Z,\[x0,x2,lsl#4\]'
+.*: Error: selected processor does not support `ld3q {Z0.Q,Z1.Q,Z2.Q},p4/Z,\[x0,x4,lsl#4\]'
+.*: Error: selected processor does not support `ld4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4/Z,\[x0,x6,lsl#4\]'
+.*: Error: selected processor does not support `st1q Z0.Q,p4,\[Z16.D,x0\]'
+.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,#-4,MUL VL\]'
+.*: Error: selected processor does not support `st2q {Z0.Q,Z1.Q},p4,\[x0,x2,lsl#4\]'
+.*: Error: selected processor does not support `st3q {Z0.Q,Z1.Q,Z2.Q},p4,\[x0,x4,lsl#4\]'
+.*: Error: selected processor does not support `st4q {Z0.Q,Z1.Q,Z2.Q,Z3.Q},p4,\[x0,x6,lsl#4\]'
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1.d b/gas/testsuite/gas/aarch64/sve2p1-1.d
index 437ce9789834683963910141c1468ad46b273ded..daece899b38bba4daa2ca9e58dba2d551f6cf988 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1.d
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.d
@@ -89,3 +89,17 @@
 .*:	64d7ac44 	fminqv	v4.2d, p3, z2.d
 .*:	64d7b028 	fminqv	v8.2d, p4, z1.d
 .*:	6497bc10 	fminqv	v16.4s, p7, z0.s
+.*:	c400b200 	ld1q	z0.q, p4/z, \[z16.d, x0\]
+.*:	a49ef000 	ld2q	{z0.q, z1.q}, p4/z, \[x0, #-4, mul vl\]
+.*:	a51ef000 	ld3q	{z0.q, z1.q, z2.q}, p4/z, \[x0, #-4, mul vl\]
+.*:	a59ef000 	ld4q	{z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, #-4, mul vl\]
+.*:	a4a2f000 	ld2h	{z0.h-z1.h}, p4/z, \[x0, #4, mul vl\]
+.*:	a5249000 	ld3q	{z0.q, z1.q, z2.q}, p4/z, \[x0, x4, lsl #4\]
+.*:	a5a69000 	ld4q	{z0.q, z1.q, z2.q, z3.q}, p4/z, \[x0, x6, lsl #4\]
+.*:	e4203200 	st1q	z0.q, p4, \[z16.d, x0\]
+.*:	e44e1000 	st2q	{z0.q, z1.q}, p4, \[x0, #-4, mul vl\]
+.*:	e48e1000 	st3q	{z0.q, z1.q, z2.q}, p4, \[x0, #-4, mul vl\]
+.*:	e4ce1000 	st4q	{z0.q, z1.q, z2.q, z3.q}, p4, \[x0, #-4, mul vl\]
+.*:	e4621000 	st2q	{z0.q, z1.q}, p4, \[x0, x2, lsl #4\]
+.*:	e4a41000 	st3q	{z0.q, z1.q, z2.q}, p4, \[x0, x4, lsl #4\]
+.*:	e4e61000 	st4q	{z0.q, z1.q, z2.q, z3.q}, p4, \[x0, x6, lsl #4\]
diff --git a/gas/testsuite/gas/aarch64/sve2p1-1.s b/gas/testsuite/gas/aarch64/sve2p1-1.s
index b4908b2be38d927bb61a38e5aba681837d8417e1..2a1c7c107d757ae922cec5566adbace1f03e0dce 100644
--- a/gas/testsuite/gas/aarch64/sve2p1-1.s
+++ b/gas/testsuite/gas/aarch64/sve2p1-1.s
@@ -90,3 +90,18 @@ fminqv v2.4s, p2, z4.s
 fminqv v4.2d, p3, z2.d
 fminqv v8.2d, p4, z1.d
 fminqv v16.4s, p7, z0.s
+ld1q Z0.Q, p4/Z, [Z16.D, x0]
+ld2q {Z0.Q, Z1.Q}, p4/Z, [x0,  #-4, MUL VL]
+ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0,  #-4, MUL VL]
+ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0,  #-4, MUL VL]
+ld2q {Z0.Q, Z1.Q}, p4/Z, [x0, x2, lsl  #4]
+ld3q {Z0.Q, Z1.Q, Z2.Q}, p4/Z, [x0, x4, lsl  #4]
+ld4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4/Z, [x0, x6, lsl  #4]
+
+st1q Z0.Q, p4, [Z16.D, x0]
+st2q {Z0.Q, Z1.Q}, p4, [x0,  #-4, MUL VL]
+st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0,  #-4, MUL VL]
+st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0,  #-4, MUL VL]
+st2q {Z0.Q, Z1.Q}, p4, [x0, x2, lsl  #4]
+st3q {Z0.Q, Z1.Q, Z2.Q}, p4, [x0, x4, lsl  #4]
+st4q {Z0.Q, Z1.Q, Z2.Q, Z3.Q}, p4, [x0, x6, lsl  #4]
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index de161db75d509b0ac96c604da7bc9743193d23b2..189bab5a92bcacb1ece30752817f666a34f5d81d 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -797,6 +797,9 @@ enum aarch64_opnd
   AARCH64_OPND_MOPS_WB_Rn,	/* Rn!, in bits [5, 9].  */
   AARCH64_OPND_CSSC_SIMM8,	/* CSSC signed 8-bit immediate.  */
   AARCH64_OPND_CSSC_UIMM8,	/* CSSC unsigned 8-bit immediate.  */
+  AARCH64_OPND_SME_Zt2,		/* Qobule SVE vector register list.  */
+  AARCH64_OPND_SME_Zt3,		/* Trible SVE vector register list.  */
+  AARCH64_OPND_SME_Zt4,		/* Quad SVE vector register list.  */
 };
 
 /* Qualifier constrains an operand.  It either specifies a variant of an
diff --git a/opcodes/aarch64-dis.h b/opcodes/aarch64-dis.h
index 30212f2ae2c2759b5667e5a007912d22c4a702fc..48bebfea1e146e71d5fcae67c6558a35fe198e3f 100644
--- a/opcodes/aarch64-dis.h
+++ b/opcodes/aarch64-dis.h
@@ -139,6 +139,7 @@ AARCH64_DECL_OPD_EXTRACTOR (ext_imm_rotate2);
 AARCH64_DECL_OPD_EXTRACTOR (ext_x0_to_x30);
 AARCH64_DECL_OPD_EXTRACTOR (ext_simple_index);
 AARCH64_DECL_OPD_EXTRACTOR (ext_plain_shrimm);
+AARCH64_DECL_OPD_EXTRACTOR (ext_sve_reglist_zt);
 
 #undef AARCH64_DECL_OPD_EXTRACTOR
 
diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
index 1381e7524402a867cee23becbaa693d1b293c28d..9e96ba35ed45a404426467b897e379ba44e7e51a 100644
--- a/opcodes/aarch64-dis.c
+++ b/opcodes/aarch64-dis.c
@@ -2160,6 +2160,21 @@ aarch64_ext_sve_reglist (const aarch64_operand *self,
   return true;
 }
 
+/* Decode {Zn.<T> , Zm.<T>}.  The fields array specifies which field
+   to use for Zn.  The opcode-dependent value specifies the number
+   of registers in the list.  */
+bool
+aarch64_ext_sve_reglist_zt (const aarch64_operand *self,
+			    aarch64_opnd_info *info, aarch64_insn code,
+			    const aarch64_inst *inst ATTRIBUTE_UNUSED,
+			    aarch64_operand_error *errors ATTRIBUTE_UNUSED)
+{
+  info->reglist.first_regno = extract_field (self->fields[0], code, 0);
+  info->reglist.num_regs = get_operand_specific_data (self);
+  info->reglist.stride = 1;
+  return true;
+}
+
 /* Decode a strided register list.  The first field holds the top bit
    (0 or 16) and the second field holds the lower bits.  The stride is
    16 divided by the list length.  */
diff --git a/opcodes/aarch64-opc.c b/opcodes/aarch64-opc.c
index 1d8ed26c7090e4b73489b15e74a911e33b54555c..13cd2bcd8a7a79508c340bcf618af61b622bc0fe 100644
--- a/opcodes/aarch64-opc.c
+++ b/opcodes/aarch64-opc.c
@@ -1870,6 +1870,9 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
 	case AARCH64_OPND_SME_Zmx4:
 	case AARCH64_OPND_SME_Znx2:
 	case AARCH64_OPND_SME_Znx4:
+	case AARCH64_OPND_SME_Zt2:
+	case AARCH64_OPND_SME_Zt3:
+	case AARCH64_OPND_SME_Zt4:
 	  num = get_operand_specific_data (&aarch64_operands[type]);
 	  if (!check_reglist (opnd, mismatch_detail, idx, num, 1))
 	    return 0;
@@ -3626,7 +3629,10 @@ print_register_list (char *buf, size_t size, const aarch64_opnd_info *opnd,
   /* The hyphenated form is preferred for disassembly if there are
      more than two registers in the list, and the register numbers
      are monotonically increasing in increments of one.  */
-  if (stride == 1 && num_regs > 1)
+  if (stride == 1 && num_regs > 1
+      && ((opnd->type != AARCH64_OPND_SME_Zt2)
+	  && (opnd->type != AARCH64_OPND_SME_Zt3)
+	  && (opnd->type != AARCH64_OPND_SME_Zt4)))
     snprintf (buf, size, "{%s-%s}%s",
 	      style_reg (styler, "%s%d.%s", prefix, first_reg, qlf_name),
 	      style_reg (styler, "%s%d.%s", prefix, last_reg, qlf_name), tb);
@@ -4071,6 +4077,9 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
     case AARCH64_OPND_SME_Znx4:
     case AARCH64_OPND_SME_Ztx2_STRIDED:
     case AARCH64_OPND_SME_Ztx4_STRIDED:
+    case AARCH64_OPND_SME_Zt2:
+    case AARCH64_OPND_SME_Zt3:
+    case AARCH64_OPND_SME_Zt4:
       print_register_list (buf, size, opnd, "z", styler);
       break;
 
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 383091ef199310b21a0741527eca50bb4a10e668..c5c5c612e508b29ab99d60e0fae20d2c8fcccde4 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -1781,6 +1781,14 @@
 {                                                       \
   QLF3(S_S,P_Z,S_S),                                    \
 }
+#define OP_SVE_SZS_QD                                   \
+{                                                       \
+  QLF3(S_Q,P_Z,S_D),                                    \
+}
+#define OP_SVE_SUS_QD                                   \
+{                                                       \
+  QLF3(S_Q,NIL,S_D),                                    \
+}
 #define OP_SVE_SBB                                      \
 {                                                       \
   QLF3(S_S,S_B,S_B),                                    \
@@ -6353,6 +6361,21 @@ const struct aarch64_opcode aarch64_opcode_table[] =
 
   SVE2p1_INSN("dupq",0x05202400, 0xffe0fc00, sve_index1, 0, OP2 (SVE_Zd, SVE_Zn_5_INDEX), OP_SVE_VV_BHSD, 0, 0),
   SVE2p1_INSN("extq",0x05602400, 0xfff0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zd, SVE_Zm_imm4), OP_SVE_BBB, 0, 0),
+  SVE2p1_INSNC("ld1q",0xc400a000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SZS_QD, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld2q",0xa490e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld3q",0xa510e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld4q",0xa590e000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld2q",0xa4a0e000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld3q",0xa5208000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("ld4q",0xa5a08000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QZU, 0, C_SCAN_MOVPRFX, 0),
+
+  SVE2p1_INSNC("st1q",0xe4202000, 0xffe0e000, sve_misc, 0, OP3 (SVE_Zt, SVE_Pg3, SVE_ADDR_ZX), OP_SVE_SUS_QD, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st2q",0xe4400000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st3q",0xe4800000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st4q",0xe4c00000, 0xfff0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RI_S4x2xVL), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+  SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
 
   {0, 0, 0, 0, 0, 0, {}, {}, 0, 0, 0, NULL},
 };
@@ -6989,4 +7012,13 @@ const struct aarch64_opcode aarch64_opcode_table[] =
     Y(IMMEDIATE, imm, "CSSC_SIMM8", OPD_F_SEXT, F(FLD_CSSC_imm8),	\
       "an 8-bit signed immediate")					\
     Y(IMMEDIATE, imm, "CSSC_UIMM8", 0, F(FLD_CSSC_imm8),		\
-      "an 8-bit unsigned immediate")
+      "an 8-bit unsigned immediate")					\
+    X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt2",	\
+      2 << OPD_F_OD_LSB, F(FLD_SVE_Zt),					\
+      "a list of 2 SVE vector registers")				\
+    X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt3",	\
+      3 << OPD_F_OD_LSB, F(FLD_SVE_Zt),					\
+      "a list of 3 SVE vector registers")				\
+    X(SVE_REGLIST, ins_sve_reglist, ext_sve_reglist_zt, "SME_Zt4",	\
+      4 << OPD_F_OD_LSB, F(FLD_SVE_Zt),					\
+      "a list of 4 SVE vector registers")

  parent reply	other threads:[~2024-01-15  9:40 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-15  9:28 [PATCH 1/6] [Binutils] aarch64: Add support for FEAT_B16B16 instructions Srinath Parvathaneni
2024-01-15  9:34 ` [PATCH 2/6][Binutils] aarch64: Add support for FEAT_SME2p1 instructions Srinath Parvathaneni
2024-01-15  9:35   ` [PATCH 3/6][Binutils] aarch64: Add support for FEAT_SVE2p1 Srinath Parvathaneni
2024-01-15  9:37 ` [PATCH 4/6][Binutils] aarch64: Add SVE2.1 dupq, eorqv and extq instructions Srinath Parvathaneni
2024-01-15  9:38 ` PATCH 5/6][Binutils] aarch64: Add SVE2.1 fmin and fmax instructions Srinath Parvathaneni
2024-01-15  9:40 ` Srinath Parvathaneni [this message]
2024-01-15 11:46 ` [PATCH 1/6] [Binutils] aarch64: Add support for FEAT_B16B16 instructions Nick Clifton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6caee7e1-c16d-402d-9a14-e55b97244128@arm.com \
    --to=srinath.parvathaneni@arm.com \
    --cc=binutils@sourceware.org \
    --cc=nickc@redhat.com \
    --cc=richard.earnshaw@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).