public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][Aarch64] Add support for overflow add and sub operations
@ 2016-11-30 23:06 Michael Collison
  0 siblings, 0 replies; 8+ messages in thread
From: Michael Collison @ 2016-11-30 23:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, rth, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 1915 bytes --]

Hi,

This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as:

int
f (int x, int y, int *ovf)
{
  int res;
  *ovf = __builtin_sadd_overflow (x, y, &res);
  return res;
}

Current trunk at -O2 generates

f:
	mov	w3, w0
	mov	w4, 0
	add	w0, w0, w1
	tbnz	w1, #31, .L4
	cmp	w0, w3
	blt	.L3
.L2:
	str	w4, [x2]
	ret
	.p2align 3
.L4:
	cmp	w0, w3
	ble	.L2
.L3:
	mov	w4, 1
	b	.L2


With the patch this now generates:

f:
	adds	w0, w0, w1
	cset	w1, vs
	str	w1, [x2]
	ret

Tested on aarch64-linux-gnu with no regressions. Okay for trunk?


2016-11-30  Michael Collison  <michael.collison@arm.com>
	    Richard Henderson <rth@redhat.com>

	* config/aarch64/aarch64-modes.def (CC_V): New.
	* config/aarch64/aarch64.c (aarch64_select_cc_mode): Test
	for signed overflow using CC_Vmode.
	(aarch64_get_condition_code_1): Handle CC_Vmode.
	* config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New.
	(addti3): Create simpler code if low part is already known to be 0.
	(addvti4, uaddvti4): New.
	(*add<GPI>3_compareC_cconly_imm): New.
	(*add<GPI>3_compareC_cconly): New.
	(*add<GPI>3_compareC_imm): New.
	(*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not
	handle constants within this pattern.
	(*add<GPI>3_compareV_cconly_imm): New.
	(*add<GPI>3_compareV_cconly): New.
	(*add<GPI>3_compareV_imm): New.
	(add<GPI>3_compareV): New.
	(add<GPI>3_carryinC, add<GPI>3_carryinV): New.
	(*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New.
	(*add<GPI>3_carryinC, *add<GPI>3_carryinV): New.
	(subv<GPI>4, usubv<GPI>4): New.
	(subti): Handle op1 zero.
	(subvti4, usub4ti4): New.
	(*sub<GPI>3_compare1_imm): New.
	(sub<GPI>3_carryinCV): New.
	(*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New.
	(*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New

[-- Attachment #2: rth_overflow_ipreview1.patch --]
[-- Type: application/octet-stream, Size: 21909 bytes --]

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index de8227f..71c2069 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -24,6 +24,7 @@ CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6078b16..e020d24 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4324,6 +4324,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (y) == ZERO_EXTEND)
     return CC_Cmode;
 
+  /* A test for signed overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -4430,6 +4437,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
       break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6afaf90..a074341 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1720,25 +1720,155 @@
   }
 )
 
+(define_expand "addv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+
 (define_expand "addti3"
   [(set (match_operand:TI 0 "register_operand" "")
 	(plus:TI (match_operand:TI 1 "register_operand" "")
-		 (match_operand:TI 2 "register_operand" "")))]
+		 (match_operand:TI 2 "aarch64_reg_or_imm" "")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = gen_lowpart (DImode, operands[1]);
+  rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = gen_highpart (DImode, operands[1]);
+  rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_highpart_offset (DImode, TImode));
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      if (!aarch64_pluslong_operand (h2, DImode))
+	h2 = force_reg (DImode, h2);
+      emit_insn (gen_adddi3 (h0, h1, h2));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
   DONE;
 })
 
+(define_expand "addvti4"
+  [(match_operand:TI 0 "register_operand" "")
+   (match_operand:TI 1 "register_operand" "")
+   (match_operand:TI 2 "aarch64_reg_or_imm" "")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = gen_lowpart (DImode, operands[1]);
+  rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = gen_highpart (DImode, operands[1]);
+  rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_highpart_offset (DImode, TImode));
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "uaddvti4"
+  [(match_operand:TI 0 "register_operand" "")
+   (match_operand:TI 1 "register_operand" "")
+   (match_operand:TI 2 "aarch64_reg_or_imm" "")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = gen_lowpart (DImode, operands[1]);
+  rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = gen_highpart (DImode, operands[1]);
+  rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_highpart_offset (DImode, TImode));
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+   DONE;
+ })
+
 (define_insn "add<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -1837,6 +1967,66 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+;; Note that since we're sign-extending, match the immediate in GPI
+;; rather than in DWI.  Since CONST_INT is modeless, this works fine.
+(define_insn "*add<mode>3_compareV_cconly_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
+	    (match_operand:GPI 1 "aarch64_plus_immediate" "I,J"))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_imm")]
+)
+
+(define_insn "*add<mode>3_compareV_cconly"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "cmn\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*add<mode>3_compareV_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r,r"))
+	    (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))
+	  (sign_extend:<DWI>
+	    (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+   ""
+   "@
+   adds\\t%<w>0, %<w>1, %<w>2
+   subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_imm,alus_imm")]
+)
+
+(define_insn "add<mode>3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "adds\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+ )
+
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -2196,6 +2386,138 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "add<mode>3_carryinC"
+  [(parallel
+     [(set (match_dup 3)
+	   (ne:CC_C
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 4)
+		 (zero_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand" "r")))
+	       (zero_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r")))
+	   (zero_extend:<DWI>
+	     (plus:GPI
+	       (plus:GPI (match_dup 5) (match_dup 1))
+	       (match_dup 2)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 5) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx);
+  operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx);
+})
+
+(define_insn "*add<mode>3_carryinC_zero"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (match_operand:GPI 3 "aarch64_carry_operation" "")
+	      (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI (match_dup 3) (match_dup 1)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinC"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (plus:GPI
+		(match_operand:GPI 4 "aarch64_carry_operation" "")
+		(match_dup 1))
+	      (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (match_dup 2)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "add<mode>3_carryinV"
+  [(parallel
+     [(set (reg:CC_V CC_REGNUM)
+	   (ne:CC_V
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 3)
+		 (sign_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand" "r")))
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r")))
+	   (sign_extend:<DWI>
+	     (plus:GPI
+	       (plus:GPI (match_dup 4) (match_dup 1))
+	       (match_dup 2)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 4) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx);
+  operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx);
+})
+
+(define_insn "*add<mode>3_carryinV_zero"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (match_operand:GPI 3 "aarch64_carry_operation" "")
+	      (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI (match_dup 3) (match_dup 1)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (plus:GPI
+		(match_operand:GPI 4 "aarch64_carry_operation" "")
+		(match_dup 1))
+	      (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (match_dup 2)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*add_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (and:GPI
@@ -2292,22 +2614,158 @@
    (set_attr "simd" "*,yes")]
 )
 
+(define_expand "subv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_zero")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_zero")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
 (define_expand "subti3"
   [(set (match_operand:TI 0 "register_operand" "")
-	(minus:TI (match_operand:TI 1 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "")
 		  (match_operand:TI 2 "register_operand" "")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx l2 = gen_lowpart (DImode, operands[2]);
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_highpart_offset (DImode, TImode));
+  rtx h2 = gen_highpart (DImode, operands[2]);
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  emit_insn (gen_subdi3_compare1 (l0, l1, l2));
+  emit_insn (gen_subdi3_carryin (h0, h1, h2));
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+  DONE;
+})
+
+(define_expand "subvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "aarch64_reg_or_zero")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_highpart_offset (DImode, TImode));
+  rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_highpart_offset (DImode, TImode));
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      if (CONST_INT_P (l2))
+	{
+	  l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2)));
+	  h2 = force_reg (DImode, h2);
+	  emit_insn (gen_adddi3_compareC (l0, l1, l2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (l0, l1, l2));
+      emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "usubvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "aarch64_reg_or_zero")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_highpart_offset (DImode, TImode));
+  rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode,
+				subreg_highpart_offset (DImode, TImode));
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      if (CONST_INT_P (l2))
+	{
+	  l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2)));
+	  h2 = force_reg (DImode, h2);
+	  emit_insn (gen_adddi3_compareC (l0, l1, l2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (l0, l1, l2));
+      emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  rtx x;
+  x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
   DONE;
 })
 
@@ -2336,6 +2794,22 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "*sub<mode>3_compare1_imm"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ")
+	  (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))]
+  "UINTVAL (operands[2]) == -UINTVAL (operands[3])"
+  "@
+  subs\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>3"
+  [(set_attr "type" "alus_imm")]
+)
+
 (define_insn "sub<mode>3_compare1"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -2563,6 +3037,85 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "sub<mode>3_carryinCV"
+  [(parallel
+     [(set (reg:CC CC_REGNUM)
+	   (compare:CC
+	     (sign_extend:<DWI>
+	       (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
+	     (plus:<DWI>
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r"))
+	       (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0)))))
+      (set (match_operand:GPI 0 "register_operand" "=r")
+	   (minus:GPI
+	     (minus:GPI (match_dup 1) (match_dup 2))
+	     (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])]
+   ""
+)
+
+(define_insn "*sub<mode>3_carryinCV_z1_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, <w>zr, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV_z1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r"))
+	    (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (neg:GPI (match_dup 1))
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, <w>zr, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (sign_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (sign_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 2 "register_operand" "r"))
+	    (match_operand:<DWI> 3 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI (match_dup 1) (match_dup 2))
+	  (match_operand:GPI 4 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*sub_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread
* [PATCH][Aarch64] Add support for overflow add and sub operations
@ 2017-05-19  6:27 Michael Collison
  2017-05-19 11:00 ` Christophe Lyon
  0 siblings, 1 reply; 8+ messages in thread
From: Michael Collison @ 2017-05-19  6:27 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 3144 bytes --]

Hi,

This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as:

Sure for a simple test case such as:

int
f (int x, int y, int *ovf)
{
  int res;
  *ovf = __builtin_sadd_overflow (x, y, &res);
  return res;
}

Current trunk at -O2 generates

f:
	mov	w3, w0
	mov	w4, 0
	add	w0, w0, w1
	tbnz	w1, #31, .L4
	cmp	w0, w3
	blt	.L3
.L2:
	str	w4, [x2]
	ret
	.p2align 3
.L4:
	cmp	w0, w3
	ble	.L2
.L3:
	mov	w4, 1
	b	.L2


With the patch this now generates:

f:
	adds	w0, w0, w1
	cset	w1, vs
	str	w1, [x2]
	ret


Original patch from Richard Henderson:

https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html


Okay for trunk?

2017-05-17  Michael Collison  <michael.collison@arm.com>
	    Richard Henderson <rth@redhat.com>

	* config/aarch64/aarch64-modes.def (CC_V): New.
	* config/aarch64/aarch64-protos.h
	(aarch64_add_128bit_scratch_regs): Declare
	(aarch64_add_128bit_scratch_regs): Declare.
	(aarch64_expand_subvti): Declare.
	(aarch64_gen_unlikely_cbranch): Declare
	* config/aarch64/aarch64.c (aarch64_select_cc_mode): Test
	for signed overflow using CC_Vmode.
	(aarch64_get_condition_code_1): Handle CC_Vmode.
	(aarch64_gen_unlikely_cbranch): New function.
	(aarch64_add_128bit_scratch_regs): New function.
	(aarch64_subv_128bit_scratch_regs): New function.
	(aarch64_expand_subvti): New function.
	* config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New.
	(addti3): Create simpler code if low part is already known to be 0.
	(addvti4, uaddvti4): New.
	(*add<GPI>3_compareC_cconly_imm): New.
	(*add<GPI>3_compareC_cconly): New.
	(*add<GPI>3_compareC_imm): New.
	(*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not
	handle constants within this pattern.
	(*add<GPI>3_compareV_cconly_imm): New.
	(*add<GPI>3_compareV_cconly): New.
	(*add<GPI>3_compareV_imm): New.
	(add<GPI>3_compareV): New.
	(add<GPI>3_carryinC, add<GPI>3_carryinV): New.
	(*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New.
	(*add<GPI>3_carryinC, *add<GPI>3_carryinV): New.
	(subv<GPI>4, usubv<GPI>4): New.
	(subti): Handle op1 zero.
	(subvti4, usub4ti4): New.
	(*sub<GPI>3_compare1_imm): New.
	(sub<GPI>3_carryinCV): New.
	(*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New.
	(*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New.
	* testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase.
	* testsuite/gcc.target/arm/builtin_saddl.c: New testcase.
	* testsuite/gcc.target/arm/builtin_saddll.c: New testcase.
	* testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase.
	* testsuite/gcc.target/arm/builtin_uaddl.c: New testcase.
	* testsuite/gcc.target/arm/builtin_uaddll.c: New testcase.
	* testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase.
	* testsuite/gcc.target/arm/builtin_ssubl.c: New testcase.
	* testsuite/gcc.target/arm/builtin_ssubll.c: New testcase.
	* testsuite/gcc.target/arm/builtin_usub_128.c: New testcase.
	* testsuite/gcc.target/arm/builtin_usubl.c: New testcase.
	* testsuite/gcc.target/arm/builtin_usubll.c: New testcase.

[-- Attachment #2: PR6308.patch --]
[-- Type: application/octet-stream, Size: 29469 bytes --]

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 45f7a44..244e490 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -24,6 +24,7 @@ CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index f55d4ba..f38b2b8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
 void aarch64_save_restore_target_globals (tree);
+void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				      rtx *low_in1, rtx *low_in2,
+				      rtx *high_dest, rtx *high_in1,
+				      rtx *high_in2);
+void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				       rtx *low_in1, rtx *low_in2,
+				       rtx *high_dest, rtx *high_in1,
+				       rtx *high_in2);
+void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+			    rtx low_in2, rtx high_dest, rtx high_in1,
+			    rtx high_in2);
+
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
@@ -412,6 +424,8 @@ bool aarch64_float_const_representable_p (rtx);
 
 #if defined (RTX_CODE)
 
+void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
+				   rtx label_ref);
 bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f343d92..71a651c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (y) == ZERO_EXTEND)
     return CC_Cmode;
 
+  /* A test for signed overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
     }
@@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   return true;
 }
 
+/* Generate RTL for a conditional branch with rtx comparison CODE in
+   mode CC_MODE.  The destination of the unlikely conditional branch
+   is LABEL_REF.  */
+
+void
+aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			      rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode,
+		      gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				      rtx *low_in1, rtx *low_in2,
+				      rtx *high_dest, rtx *high_in1,
+				      rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = gen_lowpart (DImode, op1);
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = gen_highpart (DImode, op1);
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				       rtx *low_in1, rtx *low_in2,
+				       rtx *high_dest, rtx *high_in1,
+				       rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+
+}
+
+void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+			    rtx low_in2, rtx high_dest, rtx high_in1,
+			    rtx high_in2)
+{
+  if (low_in2 == const0_rtx)
+    {
+      low_dest = low_in1;
+      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
+				      force_reg (DImode, high_in2)));
+    }
+  else
+    {
+      if (CONST_INT_P (low_in2))
+	{
+	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
+	  high_in2 = force_reg (DImode, high_in2);
+	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
+      emit_insn (gen_subdi3_carryinCV (high_dest,
+				       force_reg (DImode, high_in1),
+				       high_in2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, op0), low_dest);
+  emit_move_insn (gen_highpart (DImode, op0), high_dest);
+
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a693a3b..3976ecb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1711,25 +1711,123 @@
   }
 )
 
+(define_expand "addv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+
+  DONE;
+})
+
+
 (define_expand "addti3"
   [(set (match_operand:TI 0 "register_operand" "")
 	(plus:TI (match_operand:TI 1 "register_operand" "")
-		 (match_operand:TI 2 "register_operand" "")))]
+		 (match_operand:TI 2 "aarch64_reg_or_imm" "")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx l0,l1,l2,h0,h1,h2;
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  aarch64_add_128bit_scratch_regs (operands[1], operands[2],
+				   &l0, &l1, &l2, &h0, &h1, &h2);
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      if (!aarch64_pluslong_operand (h2, DImode))
+	h2 = force_reg (DImode, h2);
+      emit_insn (gen_adddi3 (h0, h1, h2));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
   DONE;
 })
 
+(define_expand "addvti4"
+  [(match_operand:TI 0 "register_operand" "")
+   (match_operand:TI 1 "register_operand" "")
+   (match_operand:TI 2 "aarch64_reg_or_imm" "")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0,l1,l2,h0,h1,h2;
+
+  aarch64_add_128bit_scratch_regs (operands[1], operands[2],
+				   &l0, &l1, &l2, &h0, &h1, &h2);
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+  DONE;
+})
+
+(define_expand "uaddvti4"
+  [(match_operand:TI 0 "register_operand" "")
+   (match_operand:TI 1 "register_operand" "")
+   (match_operand:TI 2 "aarch64_reg_or_imm" "")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0,l1,l2,h0,h1,h2;
+
+  aarch64_add_128bit_scratch_regs (operands[1], operands[2],
+				   &l0, &l1, &l2, &h0, &h1, &h2);
+
+  if (l2 == const0_rtx)
+    {
+      l0 = l1;
+      emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2)));
+    }
+  else
+    {
+      emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2)));
+      emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2)));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+
+  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+  DONE;
+ })
+
 (define_insn "add<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -1828,10 +1926,70 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+;; Note that since we're sign-extending, match the immediate in GPI
+;; rather than in DWI.  Since CONST_INT is modeless, this works fine.
+(define_insn "*add<mode>3_compareV_cconly_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
+	    (match_operand:GPI 1 "aarch64_plus_immediate" "I,J"))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_imm")]
+)
+
+(define_insn "*add<mode>3_compareV_cconly"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "cmn\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*add<mode>3_compareV_imm"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r,r"))
+	    (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))
+	  (sign_extend:<DWI>
+	    (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+   ""
+   "@
+   adds\\t%<w>0, %<w>1, %<w>2
+   subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_imm,alus_imm")]
+)
+
+(define_insn "add<mode>3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "adds\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
-	 (plus:GPI (ASHIFT:GPI 
+	 (plus:GPI (ASHIFT:GPI
 		    (match_operand:GPI 1 "register_operand" "r")
 		    (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
 		   (match_operand:GPI 3 "register_operand" "r"))
@@ -2187,6 +2345,138 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "add<mode>3_carryinC"
+  [(parallel
+     [(set (match_dup 3)
+	   (ne:CC_C
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 4)
+		 (zero_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand" "r")))
+	       (zero_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r")))
+	   (zero_extend:<DWI>
+	     (plus:GPI
+	       (plus:GPI (match_dup 5) (match_dup 1))
+	       (match_dup 2)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 5) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx);
+  operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx);
+})
+
+(define_insn "*add<mode>3_carryinC_zero"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (match_operand:GPI 3 "aarch64_carry_operation" "")
+	      (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI (match_dup 3) (match_dup 1)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinC"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (plus:GPI
+		(match_operand:GPI 4 "aarch64_carry_operation" "")
+		(match_dup 1))
+	      (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (match_dup 2)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "add<mode>3_carryinV"
+  [(parallel
+     [(set (reg:CC_V CC_REGNUM)
+	   (ne:CC_V
+	     (plus:<DWI>
+	       (plus:<DWI>
+		 (match_dup 3)
+		 (sign_extend:<DWI>
+		   (match_operand:GPI 1 "register_operand" "r")))
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r")))
+	   (sign_extend:<DWI>
+	     (plus:GPI
+	       (plus:GPI (match_dup 4) (match_dup 1))
+	       (match_dup 2)))))
+      (set (match_operand:GPI 0 "register_operand")
+	   (plus:GPI
+	     (plus:GPI (match_dup 4) (match_dup 1))
+	     (match_dup 2)))])]
+   ""
+{
+  rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx);
+  operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx);
+})
+
+(define_insn "*add<mode>3_carryinV_zero"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
+	    (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (match_operand:GPI 3 "aarch64_carry_operation" "")
+	      (match_dup 1)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI (match_dup 3) (match_dup 1)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*add<mode>3_carryinV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:<DWI>
+	    (plus:<DWI>
+	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
+	      (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	    (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (sign_extend:<DWI>
+	    (plus:GPI
+	      (plus:GPI
+		(match_operand:GPI 4 "aarch64_carry_operation" "")
+		(match_dup 1))
+	      (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (match_dup 2)))]
+   ""
+   "adcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*add_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (and:GPI
@@ -2283,22 +2573,86 @@
    (set_attr "simd" "*,yes")]
 )
 
+(define_expand "subv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_zero")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(match_operand:GPI 0 "register_operand")
+   (match_operand:GPI 1 "aarch64_reg_or_zero")
+   (match_operand:GPI 2 "aarch64_reg_or_zero")
+   (match_operand 3 "")]
+  ""
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+  aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
+
+  DONE;
+})
+
 (define_expand "subti3"
   [(set (match_operand:TI 0 "register_operand" "")
-	(minus:TI (match_operand:TI 1 "register_operand" "")
+	(minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "")
 		  (match_operand:TI 2 "register_operand" "")))]
   ""
 {
-  rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
-				  gen_lowpart (DImode, operands[2])));
+  rtx l0 = gen_reg_rtx (DImode);
+  rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_lowpart_offset (DImode, TImode));
+  rtx l2 = gen_lowpart (DImode, operands[2]);
+  rtx h0 = gen_reg_rtx (DImode);
+  rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode,
+				subreg_highpart_offset (DImode, TImode));
+  rtx h2 = gen_highpart (DImode, operands[2]);
 
-  rtx high = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]),
-				 gen_highpart (DImode, operands[2])));
+  emit_insn (gen_subdi3_compare1 (l0, l1, l2));
+  emit_insn (gen_subdi3_carryin (h0, h1, h2));
 
-  emit_move_insn (gen_lowpart (DImode, operands[0]), low);
-  emit_move_insn (gen_highpart (DImode, operands[0]), high);
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l0);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h0);
+  DONE;
+})
+
+(define_expand "subvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "aarch64_reg_or_zero")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0,l1,l2,h0,h1,h2;
+
+  aarch64_subv_128bit_scratch_regs (operands[1], operands[2],
+				    &l0, &l1, &l2, &h0, &h1, &h2);
+  aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2);
+
+  aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+  DONE;
+})
+
+(define_expand "usubvti4"
+  [(match_operand:TI 0 "register_operand")
+   (match_operand:TI 1 "aarch64_reg_or_zero")
+   (match_operand:TI 2 "aarch64_reg_or_imm")
+   (match_operand 3 "")]
+  ""
+{
+  rtx l0,l1,l2,h0,h1,h2;
+
+  aarch64_subv_128bit_scratch_regs (operands[1], operands[2],
+				    &l0, &l1, &l2, &h0, &h1, &h2);
+  aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2);
+
+  aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]);
   DONE;
 })
 
@@ -2327,6 +2681,22 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "*sub<mode>3_compare1_imm"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ")
+	  (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))]
+  "UINTVAL (operands[2]) == -UINTVAL (operands[3])"
+  "@
+  subs\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>3"
+  [(set_attr "type" "alus_imm")]
+)
+
 (define_insn "sub<mode>3_compare1"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -2554,6 +2924,85 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_expand "sub<mode>3_carryinCV"
+  [(parallel
+     [(set (reg:CC CC_REGNUM)
+	   (compare:CC
+	     (sign_extend:<DWI>
+	       (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
+	     (plus:<DWI>
+	       (sign_extend:<DWI>
+		 (match_operand:GPI 2 "register_operand" "r"))
+	       (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0)))))
+      (set (match_operand:GPI 0 "register_operand" "=r")
+	   (minus:GPI
+	     (minus:GPI (match_dup 1) (match_dup 2))
+	     (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])]
+   ""
+)
+
+(define_insn "*sub<mode>3_carryinCV_z1_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, <w>zr, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV_z1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r"))
+	    (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (neg:GPI (match_dup 1))
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, <w>zr, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (sign_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (match_dup 1)
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryinCV"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (sign_extend:<DWI>
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (plus:<DWI>
+	    (sign_extend:<DWI>
+	      (match_operand:GPI 2 "register_operand" "r"))
+	    (match_operand:<DWI> 3 "aarch64_borrow_operation" ""))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI (match_dup 1) (match_dup 2))
+	  (match_operand:GPI 4 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*sub_uxt<mode>_shift2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "rk")
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c
new file mode 100644
index 0000000..6d84bb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+__int128 overflow_add (__int128 x, __int128 y)
+{
+  __int128 r;
+
+  int ovr = __builtin_add_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler "addcs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c
new file mode 100644
index 0000000..9768a98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+long overflow_add (long x, long y)
+{
+  long r;
+
+  int ovr = __builtin_saddl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c
new file mode 100644
index 0000000..126a526
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+long long overflow_add (long long x, long long y)
+{
+  long long r;
+
+  int ovr = __builtin_saddll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c
new file mode 100644
index 0000000..c1261e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+__int128 overflow_sub (__int128 x, __int128 y)
+{
+  __int128 r;
+
+  int ovr = __builtin_sub_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
+/* { dg-final { scan-assembler "sbcs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c
new file mode 100644
index 0000000..1040464
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+long overflow_sub (long x, long y)
+{
+  long r;
+
+  int ovr = __builtin_ssubl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c
new file mode 100644
index 0000000..a03df88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+long long overflow_sub (long long x, long long y)
+{
+  long long r;
+
+  int ovr = __builtin_ssubll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c
new file mode 100644
index 0000000..8c7c998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned __int128 overflow_add (unsigned __int128 x, unsigned __int128 y)
+{
+  unsigned __int128 r;
+
+  int ovr = __builtin_add_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
+/* { dg-final { scan-assembler "addcs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c
new file mode 100644
index 0000000..e325591
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned long overflow_add (unsigned long x, unsigned long y)
+{
+  unsigned long r;
+
+  int ovr = __builtin_uaddl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c
new file mode 100644
index 0000000..5f42886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned long long overflow_add (unsigned long long x, unsigned long long y)
+{
+  unsigned long long r;
+
+  int ovr = __builtin_uaddll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "adds" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c
new file mode 100644
index 0000000..a84f4a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned __int128 overflow_sub (unsigned __int128 x, unsigned __int128 y)
+{
+  unsigned __int128 r;
+
+  int ovr = __builtin_sub_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
+/* { dg-final { scan-assembler "sbcs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c
new file mode 100644
index 0000000..ed033da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned long overflow_sub (unsigned long x, unsigned long y)
+{
+  unsigned long r;
+
+  int ovr = __builtin_usubl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c
new file mode 100644
index 0000000..a742f0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+
+extern void overflow_handler ();
+
+unsigned long long overflow_sub (unsigned long long x, unsigned long long y)
+{
+  unsigned long long r;
+
+  int ovr = __builtin_usubll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* { dg-final { scan-assembler "subs" } } */
+
-- 
1.9.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-08-01  6:33 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-30 23:06 [PATCH][Aarch64] Add support for overflow add and sub operations Michael Collison
2017-05-19  6:27 Michael Collison
2017-05-19 11:00 ` Christophe Lyon
2017-05-19 21:42   ` Michael Collison
2017-07-05  9:38     ` Richard Earnshaw (lists)
2017-07-06  7:29       ` Michael Collison
2017-07-06  8:22         ` Richard Earnshaw (lists)
2017-08-01  6:33       ` Michael Collison

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).