* [PATCH 2/2][ARM][gas] Add support for ARMv8.1 Adv.SIMD extension.
2015-05-21 9:48 [PATCH 1/2][ARM][binutils] Add support for ARMv8.1 Adv.SIMD extension Matthew Wahab
@ 2015-05-21 9:50 ` Matthew Wahab
2015-05-29 15:04 ` Nicholas Clifton
2015-05-29 15:04 ` [PATCH 1/2][ARM][binutils] " Nicholas Clifton
1 sibling, 1 reply; 4+ messages in thread
From: Matthew Wahab @ 2015-05-21 9:50 UTC (permalink / raw)
To: binutils
[-- Attachment #1: Type: text/plain, Size: 1047 bytes --]
The ARMv8.1 architecture introduces two instructions, "vqrdmlah" and "vqrdmlsh",
and their variants to the ARM Advanced SIMD instruction set. This patch adds
support to gas for the new instructions, making them available under under a new
armv8-a architecture extension "+rdma". It also adds a new -mfpu architecture
name "neon-fp-armv8.1", which is neon-fp-armv8.1 with the new instructions.
Tested for arm-none-linux-gnueabihf with check-binutils and check-gas.
Ok for trunk?
Matthew
gas/
2015-05-21 Matthew Wahab <matthew.wahab@arm.com>
* config/tc-arm.c (fpu_neon_ext_v8_1): New.
(neon_tab_entry): Add "vqrdmlah" and "vqrdmlsh".
(asm_opcode_insns): Add "vqrdmlah", "vqrdmlahq", "vqrdmlsh" and
"vqrdmlshq".
(arm_extensions): Add "rdma".
(arm_fpus): Add "neon-fp-armv8.1".
* doc/c-arm.texi (-mcpu): Add "rdma" to supported extensions.
(-mfpu): Add "neon-fp-armv8-1".
gas/testsuite/
2015-05-21 Matthew Wahab <matthew.wahab@arm.com>
* gas/arm/armv8-a+rdma.d: New.
* gas/arm/armv8-a+rdma.s: New.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0002-ARM-gas-Support-ARMv8.1-Adv.SIMD-instructions.patch --]
[-- Type: text/x-patch; name=0002-ARM-gas-Support-ARMv8.1-Adv.SIMD-instructions.patch, Size: 8910 bytes --]
From 730312bd1c061bce71600fcfaf4fe352bd389e2a Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Thu, 14 May 2015 12:52:24 +0100
Subject: [PATCH 2/2] [ARM][gas] Support ARMv8.1 Adv.SIMD instructions.
Change-Id: I8c196329d4b26e253abc3666de76db6a0ef77835
---
gas/config/tc-arm.c | 13 ++++++
gas/doc/c-arm.texi | 5 ++-
gas/testsuite/gas/arm/armv8-a+rdma.d | 77 ++++++++++++++++++++++++++++++++++++
gas/testsuite/gas/arm/armv8-a+rdma.s | 60 ++++++++++++++++++++++++++++
4 files changed, 154 insertions(+), 1 deletion(-)
create mode 100644 gas/testsuite/gas/arm/armv8-a+rdma.d
create mode 100644 gas/testsuite/gas/arm/armv8-a+rdma.s
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 683774f..6317cdd 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -258,6 +258,8 @@ static const arm_feature_set fpu_crypto_ext_armv8 =
ARM_FEATURE_COPROC (FPU_CRYPTO_EXT_ARMV8);
static const arm_feature_set crc_ext_armv8 =
ARM_FEATURE_COPROC (CRC_EXT_ARMV8);
+static const arm_feature_set fpu_neon_ext_v8_1 =
+ ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8 | FPU_NEON_EXT_RDMA);
static int mfloat_abi_opt = -1;
/* Record user cpu selection for object attributes. */
@@ -12897,6 +12899,8 @@ struct neon_tab_entry
X(vqdmull, 0x0800d00, N_INV, 0x0800b40), \
X(vqdmulh, 0x0000b00, N_INV, 0x0800c40), \
X(vqrdmulh, 0x1000b00, N_INV, 0x0800d40), \
+ X(vqrdmlah, 0x3000b10, N_INV, 0x0800e40), \
+ X(vqrdmlsh, 0x3000c10, N_INV, 0x0800f40), \
X(vshl, 0x0000400, N_INV, 0x0800510), \
X(vqshl, 0x0000410, N_INV, 0x0800710), \
X(vand, 0x0000110, N_INV, 0x0800030), \
@@ -19789,6 +19793,11 @@ static const struct asm_opcode insns[] =
NUF(vrecpsq, 0000f10, 3, (RNQ, oRNQ, RNQ), neon_step),
NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step),
NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step),
+ /* ARM v8.1 extension. */
+ nUF(vqrdmlah, _vqrdmlah, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmlahq, _vqrdmlah, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmlsh, _vqrdmlsh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmlshq, _vqrdmlsh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
/* Two address, int/float. Types S8 S16 S32 F32. */
NUF(vabsq, 1b10300, 2, (RNQ, RNQ), neon_abs_neg),
@@ -24669,6 +24678,9 @@ static const struct arm_option_extension_value_table arm_extensions[] =
| ARM_EXT_DIV),
ARM_FEATURE_CORE_LOW (ARM_EXT_VIRT),
ARM_FEATURE_CORE_LOW (ARM_EXT_V7A)),
+ ARM_EXT_OPT ("rdma", FPU_ARCH_NEON_VFP_ARMV8,
+ ARM_FEATURE_COPROC (FPU_NEON_ARMV8 | FPU_NEON_EXT_RDMA),
+ ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
ARM_EXT_OPT ("xscale",ARM_FEATURE_COPROC (ARM_CEXT_XSCALE),
ARM_FEATURE_COPROC (ARM_CEXT_XSCALE), ARM_ANY),
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE }
@@ -24726,6 +24738,7 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
{"neon-fp-armv8", FPU_ARCH_NEON_VFP_ARMV8},
{"crypto-neon-fp-armv8",
FPU_ARCH_CRYPTO_NEON_VFP_ARMV8},
+ {"neon-fp-armv8.1", FPU_ARCH_NEON_VFP_ARMV8_1},
{NULL, ARM_ARCH_NONE}
};
diff --git a/gas/doc/c-arm.texi b/gas/doc/c-arm.texi
index d31ba02..76e4ee9 100644
--- a/gas/doc/c-arm.texi
+++ b/gas/doc/c-arm.texi
@@ -179,6 +179,8 @@ architectures),
@code{simd} (Advanced SIMD Extensions for v8-A architecture, implies @code{fp}),
@code{virt} (Virtualization Extensions for v7-A architecture, implies
@code{idiv}),
+@code{rdma} (ARMv8.1 Advanced SIMD extensions for v8-A architecture, implies
+@code{simd}),
and
@code{xscale}.
@@ -270,8 +272,9 @@ The following format options are recognized:
@code{neon},
@code{neon-vfpv4},
@code{neon-fp-armv8},
-and
@code{crypto-neon-fp-armv8}.
+and
+@code{neon-fp-armv8-1},
In addition to determining which instructions are assembled, this option
also affects the way in which the @code{.double} assembler directive behaves
diff --git a/gas/testsuite/gas/arm/armv8-a+rdma.d b/gas/testsuite/gas/arm/armv8-a+rdma.d
new file mode 100644
index 0000000..f7e958a
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-a+rdma.d
@@ -0,0 +1,77 @@
+#name: Valid v8-a+rdma
+#objdump: -dr
+#skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd
+
+.*: +file format .*arm.*
+
+
+Disassembly of section .text:
+
+00000000 <.*>:
+ 0: f3110b12 vqrdmlah.s16 d0, d1, d2
+ 4: f3120b54 vqrdmlah.s16 q0, q1, q2
+ 8: f3210b12 vqrdmlah.s32 d0, d1, d2
+ c: f3220b54 vqrdmlah.s32 q0, q1, q2
+ 10: f3110c12 vqrdmlsh.s16 d0, d1, d2
+ 14: f3120c54 vqrdmlsh.s16 q0, q1, q2
+ 18: f3210c12 vqrdmlsh.s32 d0, d1, d2
+ 1c: f3220c54 vqrdmlsh.s32 q0, q1, q2
+ 20: f2910e42 vqrdmlah.s16 d0, d1, d2\[0\]
+ 24: f2910e4a vqrdmlah.s16 d0, d1, d2\[1\]
+ 28: f2910e62 vqrdmlah.s16 d0, d1, d2\[2\]
+ 2c: f2910e6a vqrdmlah.s16 d0, d1, d2\[3\]
+ 30: f3920e42 vqrdmlah.s16 q0, q1, d2\[0\]
+ 34: f3920e4a vqrdmlah.s16 q0, q1, d2\[1\]
+ 38: f3920e62 vqrdmlah.s16 q0, q1, d2\[2\]
+ 3c: f3920e6a vqrdmlah.s16 q0, q1, d2\[3\]
+ 40: f2a10e42 vqrdmlah.s32 d0, d1, d2\[0\]
+ 44: f2a10e62 vqrdmlah.s32 d0, d1, d2\[1\]
+ 48: f3a20e42 vqrdmlah.s32 q0, q1, d2\[0\]
+ 4c: f3a20e62 vqrdmlah.s32 q0, q1, d2\[1\]
+ 50: f2910f42 vqrdmlsh.s16 d0, d1, d2\[0\]
+ 54: f2910f4a vqrdmlsh.s16 d0, d1, d2\[1\]
+ 58: f2910f62 vqrdmlsh.s16 d0, d1, d2\[2\]
+ 5c: f2910f6a vqrdmlsh.s16 d0, d1, d2\[3\]
+ 60: f3920f42 vqrdmlsh.s16 q0, q1, d2\[0\]
+ 64: f3920f4a vqrdmlsh.s16 q0, q1, d2\[1\]
+ 68: f3920f62 vqrdmlsh.s16 q0, q1, d2\[2\]
+ 6c: f3920f6a vqrdmlsh.s16 q0, q1, d2\[3\]
+ 70: f2a10f42 vqrdmlsh.s32 d0, d1, d2\[0\]
+ 74: f2a10f62 vqrdmlsh.s32 d0, d1, d2\[1\]
+ 78: f3a20f42 vqrdmlsh.s32 q0, q1, d2\[0\]
+ 7c: f3a20f62 vqrdmlsh.s32 q0, q1, d2\[1\]
+
+00000080 <.*>:
+ 80: ff11 0b12 vqrdmlah.s16 d0, d1, d2
+ 84: ff12 0b54 vqrdmlah.s16 q0, q1, q2
+ 88: ff21 0b12 vqrdmlah.s32 d0, d1, d2
+ 8c: ff22 0b54 vqrdmlah.s32 q0, q1, q2
+ 90: ff11 0c12 vqrdmlsh.s16 d0, d1, d2
+ 94: ff12 0c54 vqrdmlsh.s16 q0, q1, q2
+ 98: ff21 0c12 vqrdmlsh.s32 d0, d1, d2
+ 9c: ff22 0c54 vqrdmlsh.s32 q0, q1, q2
+ a0: ef91 0e42 vqrdmlah.s16 d0, d1, d2\[0\]
+ a4: ef91 0e4a vqrdmlah.s16 d0, d1, d2\[1\]
+ a8: ef91 0e62 vqrdmlah.s16 d0, d1, d2\[2\]
+ ac: ef91 0e6a vqrdmlah.s16 d0, d1, d2\[3\]
+ b0: ff92 0e42 vqrdmlah.s16 q0, q1, d2\[0\]
+ b4: ff92 0e4a vqrdmlah.s16 q0, q1, d2\[1\]
+ b8: ff92 0e62 vqrdmlah.s16 q0, q1, d2\[2\]
+ bc: ff92 0e6a vqrdmlah.s16 q0, q1, d2\[3\]
+ c0: efa1 0e42 vqrdmlah.s32 d0, d1, d2\[0\]
+ c4: efa1 0e62 vqrdmlah.s32 d0, d1, d2\[1\]
+ c8: ffa2 0e42 vqrdmlah.s32 q0, q1, d2\[0\]
+ cc: ffa2 0e62 vqrdmlah.s32 q0, q1, d2\[1\]
+ d0: ef91 0f42 vqrdmlsh.s16 d0, d1, d2\[0\]
+ d4: ef91 0f4a vqrdmlsh.s16 d0, d1, d2\[1\]
+ d8: ef91 0f62 vqrdmlsh.s16 d0, d1, d2\[2\]
+ dc: ef91 0f6a vqrdmlsh.s16 d0, d1, d2\[3\]
+ e0: ff92 0f42 vqrdmlsh.s16 q0, q1, d2\[0\]
+ e4: ff92 0f4a vqrdmlsh.s16 q0, q1, d2\[1\]
+ e8: ff92 0f62 vqrdmlsh.s16 q0, q1, d2\[2\]
+ ec: ff92 0f6a vqrdmlsh.s16 q0, q1, d2\[3\]
+ f0: efa1 0f42 vqrdmlsh.s32 d0, d1, d2\[0\]
+ f4: efa1 0f62 vqrdmlsh.s32 d0, d1, d2\[1\]
+ f8: ffa2 0f42 vqrdmlsh.s32 q0, q1, d2\[0\]
+ fc: ffa2 0f62 vqrdmlsh.s32 q0, q1, d2\[1\]
+
diff --git a/gas/testsuite/gas/arm/armv8-a+rdma.s b/gas/testsuite/gas/arm/armv8-a+rdma.s
new file mode 100644
index 0000000..60632f5
--- /dev/null
+++ b/gas/testsuite/gas/arm/armv8-a+rdma.s
@@ -0,0 +1,60 @@
+ .syntax unified
+ .text
+ .arch armv8-a
+ .arch_extension rdma
+
+ .macro vect_inst I T R
+ \I\().\T \R\()0, \R\()1, \R\()2
+ .endm
+
+ .macro scalar_inst I T R N
+ \I\().\T \R\()0, \R\()1, d\()2[\N\()]
+ .endm
+
+ .text
+ .arm
+A1:
+ .irp inst, vqrdmlah, vqrdmlsh
+ .irp type, s16, s32
+ .irp reg, d, q
+ vect_inst \inst \type \reg
+ .endr
+ .endr
+ .endr
+
+ .irp inst, vqrdmlah, vqrdmlsh
+ .irp reg, d, q
+ .irp idx, 0, 1, 2, 3
+ scalar_inst \inst s16 \reg \idx
+ .endr
+ .endr
+ .irp reg, d, q
+ .irp idx, 0, 1
+ scalar_inst \inst s32 \reg \idx
+ .endr
+ .endr
+ .endr
+
+ .text
+ .thumb
+T1:
+ .irp inst, vqrdmlah, vqrdmlsh
+ .irp type, s16, s32
+ .irp reg, d, q
+ vect_inst \inst \type \reg
+ .endr
+ .endr
+ .endr
+
+ .irp inst, vqrdmlah, vqrdmlsh
+ .irp reg, d, q
+ .irp idx, 0, 1, 2, 3
+ scalar_inst \inst s16 \reg \idx
+ .endr
+ .endr
+ .irp reg, d, q
+ .irp idx, 0, 1
+ scalar_inst \inst s32 \reg \idx
+ .endr
+ .endr
+ .endr
--
1.9.1
^ permalink raw reply [flat|nested] 4+ messages in thread