2017-03-03  Jim Wilson  <jim.wilson@linaro.org>

	sim/aarch64/
	* simulator.c (mul64hi): Shift carry left by 32.
	(smulh): Change signum to negate.  If negate, invert result, and add
	carry bit if low part of multiply result is zero.

	sim/testsuite/sim/aarch64/
	* sumov.s: Correct compare test values.
	* sumulh.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index 1756ba1..8a8df7a 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -13020,6 +13020,8 @@ mul64hi (uint64_t value1, uint64_t value2)
 
   /* Drop lowest 32 bits of middle cross-product.  */
   result = resultmid1 >> 32;
+  /* Move carry bit to just above middle cross-product highest bit.  */
+  carry = carry << 32;
 
   /* Add top cross-product plus and any carry.  */
   result += xproducthi + carry;
@@ -13042,7 +13044,7 @@ smulh (sim_cpu *cpu)
   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
   uint64_t uvalue1;
   uint64_t uvalue2;
-  int64_t  signum = 1;
+  int  negate = 0;
 
   if (ra != R31)
     HALT_UNALLOC;
@@ -13051,7 +13053,7 @@ smulh (sim_cpu *cpu)
      the fix the sign up afterwards.  */
   if (value1 < 0)
     {
-      signum *= -1L;
+      negate = !negate;
       uvalue1 = -value1;
     }
   else
@@ -13061,7 +13063,7 @@ smulh (sim_cpu *cpu)
 
   if (value2 < 0)
     {
-      signum *= -1L;
+      negate = !negate;
       uvalue2 = -value2;
     }
   else
@@ -13070,9 +13072,18 @@ smulh (sim_cpu *cpu)
     }
 
   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
+
   uresult = mul64hi (uvalue1, uvalue2);
   result = uresult;
-  result *= signum;
+
+  if (negate)
+    {
+      /* Multiply 128-bit result by -1, which means highpart gets inverted,
+	 and has carry in added only if low part is 0.  */
+      result = ~result;
+      if ((uvalue1 * uvalue2) == 0)
+	result += 1;
+    }
 
   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
 }
diff --git a/sim/testsuite/sim/aarch64/sumov.s b/sim/testsuite/sim/aarch64/sumov.s
index 69021cb..7180c6a 100644
--- a/sim/testsuite/sim/aarch64/sumov.s
+++ b/sim/testsuite/sim/aarch64/sumov.s
@@ -34,7 +34,7 @@ input:
 	smov w1, v0.h[4]
 	cmp w0, #0x0201
 	bne .Lfailure
-	cmp w1, #-2315
+	cmp w1, #-3343
 	bne .Lfailure
 
 	smov x0, v0.h[1]
@@ -50,8 +50,9 @@ input:
 	movk x2, #0x0807, lsl #16
 	cmp x0, x2
 	bne .Lfailure
-	mov x3, #0xf6f5
-	movk x3, #0xf8f7, lsl #16
+	mov w3, #0xf6f5
+	movk w3, #0xf8f7, lsl #16
+	sxtw x3, w3
 	cmp x1, x3
 	bne .Lfailure
 
@@ -64,9 +65,10 @@ input:
 
 	umov w0, v0.h[0]
 	umov w1, v0.h[4]
-	cmp w0, #0201
+	cmp w0, #0x0201
 	bne .Lfailure
-	cmp w1, #0xf2f1
+	mov w2, #0xf2f1
+	cmp w1, w2
 	bne .Lfailure
 
 	umov w0, v0.s[0]
diff --git a/sim/testsuite/sim/aarch64/sumulh.s b/sim/testsuite/sim/aarch64/sumulh.s
new file mode 100644
index 0000000..17f1ecd
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/sumulh.s
@@ -0,0 +1,56 @@
+# mach: aarch64
+
+# Check the multiply highpart instructions: smulh, umulh.
+
+# Test -2*2, -1<<32*-1<<32, -2*-2, and 2*2.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+
+	start
+
+	mov x0, #-2
+	mov x1, #2
+	smulh x2, x0, x1
+	cmp x2, #-1
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #1
+	bne .Lfailure
+
+	mov w0, #-1
+	lsl x0, x0, #32 // 0xffffffff00000000
+	mov x1, x0
+	smulh x2, x0, x1
+	cmp x2, #1
+	bne .Lfailure
+	umulh x3, x0, x1
+	mov w4, #-2
+	lsl x4, x4, #32
+	add x4, x4, #1  // 0xfffffffe00000001
+	cmp x3, x4
+	bne .Lfailure
+
+	mov x0, #-2
+	mov x1, #-2
+	smulh x2, x0, x1
+	cmp x2, #0
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #-4
+	bne .Lfailure
+
+	mov x0, #2
+	mov x1, #2
+	smulh x2, x0, x1
+	cmp x2, #0
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #0
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail