* [PATCH] aarch64 sim uzp1/uzp2 bug fix
@ 2017-01-08 5:14 Jim Wilson
2017-01-09 9:19 ` Nick Clifton
0 siblings, 1 reply; 2+ messages in thread
From: Jim Wilson @ 2017-01-08 5:14 UTC (permalink / raw)
To: gdb-patches; +Cc: Nick Clifton
[-- Attachment #1: Type: text/plain, Size: 427 bytes --]
The uzp1/uzp2 implementation has a number of problems I had to
rewrite it. It doesn't get the shifting/masking right. it gets
input1 and input2 wrong. It checks one bit instead of two for the
size field. It doesn't fail for the non-full size==3 (1d) case which
should be an unallocated instruction.
The new testcase passes with the patch, and fails without. The GCC C
testsuite failures go from 2269 to 2227 (-42).
Jim
[-- Attachment #2: aarch64-sim-uzp2.patch --]
[-- Type: text/x-patch, Size: 7393 bytes --]
2017-01-07 Jim Wilson <jim.wilson@linaro.org>
sim/aarch64/
* simulator.c (do_vec_UZP): Rewrite.
sim/testsuite/sim/aarch64/
* uzp.s: New.
diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index 7b75c6e..36129e5 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -2958,12 +2958,10 @@ do_vec_UZP (sim_cpu *cpu)
uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
- uint64_t val1 = 0;
- uint64_t val2 = 0;
+ uint64_t val1;
+ uint64_t val2;
- uint64_t input1 = upper ? val_n1 : val_m1;
- uint64_t input2 = upper ? val_n2 : val_m2;
- unsigned i;
+ uint64_t input2 = full ? val_n2 : val_m1;
NYI_assert (29, 24, 0x0E);
NYI_assert (21, 21, 0);
@@ -2971,32 +2969,68 @@ do_vec_UZP (sim_cpu *cpu)
NYI_assert (13, 10, 6);
TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
- switch (INSTR (23, 23))
+ switch (INSTR (23, 22))
{
case 0:
- for (i = 0; i < 8; i++)
+ val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
+ val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
+ val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
+ val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
+
+ val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
+ val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
+ val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
+ val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
+
+ if (full)
{
- val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
- val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
+ val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
+ val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
+ val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
+ val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
+
+ val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
+ val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
+ val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
+ val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
}
break;
case 1:
- for (i = 0; i < 4; i++)
+ val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
+ val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
+
+ val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
+ val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
+
+ if (full)
{
- val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
- val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
+ val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
+ val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
+
+ val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
+ val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
}
break;
case 2:
- val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
- val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
+ val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
+ val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
+
+ if (full)
+ {
+ val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
+ val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
+ }
+ break;
case 3:
- val1 = input1;
- val2 = input2;
- break;
+ if (! full)
+ HALT_UNALLOC;
+
+ val1 = upper ? val_n2 : val_n1;
+ val2 = upper ? val_m2 : val_m1;
+ break;
}
aarch64_set_vec_u64 (cpu, vd, 0, val1);
diff --git a/sim/testsuite/sim/aarch64/uzp.s b/sim/testsuite/sim/aarch64/uzp.s
new file mode 100644
index 0000000..268d4fb
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/uzp.s
@@ -0,0 +1,214 @@
+# mach: aarch64
+
+# Check the unzip instructions: uzp, uzp2.
+
+.include "testutils.inc"
+
+input1:
+ .word 0x04030201
+ .word 0x08070605
+ .word 0x0c0b0a09
+ .word 0x100f0e0d
+input2:
+ .word 0x14131211
+ .word 0x18171615
+ .word 0x1c1b1a19
+ .word 0x201f1e1d
+zl8b:
+ .word 0x07050301
+ .word 0x17151311
+zu8b:
+ .word 0x08060402
+ .word 0x18161412
+zl16b:
+ .word 0x07050301
+ .word 0x0f0d0b09
+ .word 0x17151311
+ .word 0x1f1d1b19
+zu16b:
+ .word 0x08060402
+ .word 0x100e0c0a
+ .word 0x18161412
+ .word 0x201e1c1a
+zl4h:
+ .word 0x06050201
+ .word 0x16151211
+zu4h:
+ .word 0x08070403
+ .word 0x18171413
+zl8h:
+ .word 0x06050201
+ .word 0x0e0d0a09
+ .word 0x16151211
+ .word 0x1e1d1a19
+zu8h:
+ .word 0x08070403
+ .word 0x100f0c0b
+ .word 0x18171413
+ .word 0x201f1c1b
+zl2s:
+ .word 0x04030201
+ .word 0x14131211
+zu2s:
+ .word 0x08070605
+ .word 0x18171615
+zl4s:
+ .word 0x04030201
+ .word 0x0c0b0a09
+ .word 0x14131211
+ .word 0x1c1b1a19
+zu4s:
+ .word 0x08070605
+ .word 0x100f0e0d
+ .word 0x18171615
+ .word 0x201f1e1d
+zl2d:
+ .word 0x04030201
+ .word 0x08070605
+ .word 0x14131211
+ .word 0x18171615
+zu2d:
+ .word 0x0c0b0a09
+ .word 0x100f0e0d
+ .word 0x1c1b1a19
+ .word 0x201f1e1d
+
+ start
+ adrp x0, input1
+ ldr q0, [x0, #:lo12:input1]
+ adrp x0, input2
+ ldr q1, [x0, #:lo12:input2]
+
+ uzp1 v2.8b, v0.8b, v1.8b
+ mov x1, v2.d[0]
+ adrp x3, zl8b
+ ldr x4, [x3, #:lo12:zl8b]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp2 v2.8b, v0.8b, v1.8b
+ mov x1, v2.d[0]
+ adrp x3, zu8b
+ ldr x4, [x3, #:lo12:zu8b]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp1 v2.16b, v0.16b, v1.16b
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zl16b
+ ldr x4, [x3, #:lo12:zl16b]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zl16b+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp2 v2.16b, v0.16b, v1.16b
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zu16b
+ ldr x4, [x3, #:lo12:zu16b]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zu16b+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp1 v2.4h, v0.4h, v1.4h
+ mov x1, v2.d[0]
+ adrp x3, zl4h
+ ldr x4, [x3, #:lo12:zl4h]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp2 v2.4h, v0.4h, v1.4h
+ mov x1, v2.d[0]
+ adrp x3, zu4h
+ ldr x4, [x3, #:lo12:zu4h]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp1 v2.8h, v0.8h, v1.8h
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zl8h
+ ldr x4, [x3, #:lo12:zl8h]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zl8h+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp2 v2.8h, v0.8h, v1.8h
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zu8h
+ ldr x4, [x3, #:lo12:zu8h]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zu8h+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp1 v2.2s, v0.2s, v1.2s
+ mov x1, v2.d[0]
+ adrp x3, zl2s
+ ldr x4, [x3, #:lo12:zl2s]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp2 v2.2s, v0.2s, v1.2s
+ mov x1, v2.d[0]
+ adrp x3, zu2s
+ ldr x4, [x3, #:lo12:zu2s]
+ cmp x1, x4
+ bne .Lfailure
+
+ uzp1 v2.4s, v0.4s, v1.4s
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zl4s
+ ldr x4, [x3, #:lo12:zl4s]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zl4s+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp2 v2.4s, v0.4s, v1.4s
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zu4s
+ ldr x4, [x3, #:lo12:zu4s]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zu4s+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp1 v2.2d, v0.2d, v1.2d
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zl2d
+ ldr x4, [x3, #:lo12:zl2d]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zl2d+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ uzp2 v2.2d, v0.2d, v1.2d
+ mov x1, v2.d[0]
+ mov x2, v2.d[1]
+ adrp x3, zu2d
+ ldr x4, [x3, #:lo12:zu2d]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:zu2d+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ pass
+.Lfailure:
+ fail
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] aarch64 sim uzp1/uzp2 bug fix
2017-01-08 5:14 [PATCH] aarch64 sim uzp1/uzp2 bug fix Jim Wilson
@ 2017-01-09 9:19 ` Nick Clifton
0 siblings, 0 replies; 2+ messages in thread
From: Nick Clifton @ 2017-01-09 9:19 UTC (permalink / raw)
To: Jim Wilson, gdb-patches
Hi Jim,
> The uzp1/uzp2 implementation has a number of problems I had to
> rewrite it. It doesn't get the shifting/masking right. it gets
> input1 and input2 wrong. It checks one bit instead of two for the
> size field. It doesn't fail for the non-full size==3 (1d) case which
> should be an unallocated instruction.
I apologise - I must have been asleep when I wrote that code.
> The new testcase passes with the patch, and fails without. The GCC C
> testsuite failures go from 2269 to 2227 (-42).
Excellent - patch approved - please apply.
Cheers
Nick
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-01-09 9:19 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-08 5:14 [PATCH] aarch64 sim uzp1/uzp2 bug fix Jim Wilson
2017-01-09 9:19 ` Nick Clifton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).