[ARM] Add support for overflow add, sub, and neg operations

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [ARM] Add support for overflow add, sub, and neg operations
@ 2016-02-24 23:02 Michael Collison
  2016-02-25  9:51 ` Kyrill Tkachov
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-02-24 23:02 UTC (permalink / raw)
  To: GCC Patches, Kyrill Tkachov, Ramana Radhakrishnan

[-- Attachment #1: Type: text/plain, Size: 1252 bytes --]

This patch adds support for builtin overflow of add, subtract and 
negate. This patch is targeted for gcc 7 stage 1. It was tested with no 
regressions in arm and thumb modes on the following targets:

arm-non-linux-gnueabi
arm-non-linux-gnuabihf
armeb-none-linux-gnuabihf
arm-non-eabi

2016-02-24  Michael Collison  <michael.collison@arm.com>

     * config/arm/arm-modes.def: Add new condition code mode CC_V
     to represent the overflow bit.
     * config/arm/arm.c (maybe_get_arm_condition_code):
     Add support for CC_Vmode.
     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
     addsi3_compareV_upper): New patterns to support signed
     builtin overflow add operations.
     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
     New patterns to support unsigned builtin add overflow operations.
     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
     builtin overflow subtract operations,
     (usubv<mode>4): New patterns to support unsigned builtin subtract
     overflow operations.
     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New patterns
     to support builtin overflow negate operations.


-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org


[-- Attachment #2: bugzilla-69663-upstream-v1.patch --]
[-- Type: text/x-patch, Size: 13638 bytes --]

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 1819553..69231f2 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -59,6 +59,7 @@ CC_MODE (CC_DGEU);
 CC_MODE (CC_DGTU);
 CC_MODE (CC_C);
 CC_MODE (CC_N);
+CC_MODE (CC_V);
 
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d8a2745..e0fbb6f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -22854,6 +22854,8 @@ maybe_get_arm_condition_code (rtx comparison)
 	{
 	case LTU: return ARM_CS;
 	case GEU: return ARM_CC;
+	case NE: return ARM_CS;
+	case EQ: return ARM_CC;
 	default: return ARM_NV;
 	}
 
@@ -22879,6 +22881,15 @@ maybe_get_arm_condition_code (rtx comparison)
 	default: return ARM_NV;
 	}
 
+    case CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_VS;
+	case EQ: return ARM_VC;
+	default: return ARM_NV;
+
+	}
+
     case CCmode:
       switch (comp_code)
 	{
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 64873a2..705fe0b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -539,6 +539,42 @@
    (set_attr "type" "multiple")]
 )
 
+(define_expand "addv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
 (define_expand "addsi3"
   [(set (match_operand:SI          0 "s_register_operand" "")
 	(plus:SI (match_operand:SI 1 "s_register_operand" "")
@@ -616,6 +652,163 @@
  ]
 )
 
+(define_insn_and_split "adddi3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:TI
+	    (sign_extend:TI (match_operand:DI 1 "register_operand" "r"))
+	    (sign_extend:TI (match_operand:DI 2 "register_operand" "r")))
+	  (sign_extend:TI (plus:DI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC_V CC_REGNUM)
+		   (ne:CC_V
+		    (plus:DI (plus:DI
+			      (sign_extend:DI (match_dup 4))
+			      (sign_extend:DI (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+		    (plus:DI (sign_extend:DI
+			      (plus:SI (match_dup 4) (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+	     (set (match_dup 3) (plus:SI (plus:SI
+					  (match_dup 4) (match_dup 5))
+					 (ltu:SI (reg:CC_C CC_REGNUM)
+						 (const_int 0))))])]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+ [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "addsi3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:DI
+	    (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (sign_extend:DI (plus:SI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "adds%?\\t%0, %1, %2"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*addsi3_compareV_upper"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:DI
+	   (plus:DI
+	    (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+	  (plus:DI (sign_extend:DI
+		    (plus:SI (match_dup 1) (match_dup 2)))
+		   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI (match_dup 1) (match_dup 2))
+	 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_ARM"
+  "adcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
+)
+
+(define_insn_and_split "adddi3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:TI
+	    (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
+	    (zero_extend:TI (match_operand:DI 2 "register_operand" "r")))
+	  (zero_extend:TI (plus:DI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC_C CC_REGNUM)
+		   (ne:CC_C
+		    (plus:DI (plus:DI
+			      (zero_extend:DI (match_dup 4))
+			      (zero_extend:DI (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+		    (plus:DI (zero_extend:DI
+			      (plus:SI (match_dup 4) (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+	     (set (match_dup 3) (plus:SI
+				 (plus:SI (match_dup 4) (match_dup 5))
+				 (ltu:SI (reg:CC_C CC_REGNUM)
+					 (const_int 0))))])]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+ [(set_attr "conds" "clob")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*addsi3_compareC_upper"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:DI
+	   (plus:DI
+	    (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+	  (plus:DI (zero_extend:DI
+		    (plus:SI (match_dup 1) (match_dup 2)))
+		   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI (match_dup 1) (match_dup 2))
+	 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_ARM"
+  "adcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
+)
+
+(define_insn "addsi3_compareC"
+   [(set (reg:CC_C CC_REGNUM)
+	 (ne:CC_C
+	  (plus:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (zero_extend:DI
+	   (plus:SI (match_dup 1) (match_dup 2)))))
+    (set (match_operand:SI 0 "register_operand" "=r")
+	 (plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "adds%?\\t%0, %1, %2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
@@ -865,6 +1058,84 @@
     (set_attr "type" "adcs_reg")]
 )
 
+(define_expand "subv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_insn_and_split "subdi3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:DI 1 "register_operand" "r")
+	  (match_operand:DI 2 "register_operand" "r")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 4) (match_dup 5)))
+	     (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+   }
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "subsi3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:SI 1 "register_operand" "r")
+	  (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs%?\\t%0, %1, %2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*subsi3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
         (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
@@ -4349,6 +4620,73 @@
 \f
 ;; Unary arithmetic insns
 
+(define_expand "negvsi3"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand 2 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_subsi3_compare (operands[0], const0_rtx, operands[1]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[2]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "negvdi3"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand 2 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_negdi2_compare (operands[0], operands[1]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[2]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+
+(define_insn_and_split "negdi2_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (match_operand:DI 1 "register_operand" "r")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (const_int 0) (match_dup 1)))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (const_int 0)
+					   (match_dup 1)))])
+   (parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 3)))
+	     (set (match_dup 2)
+		  (minus:SI
+		   (minus:SI (const_int 0) (match_dup 3))
+		   (ltu:SI (reg:CC_C CC_REGNUM)
+			   (const_int 0))))])]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
 (define_expand "negdi2"
  [(parallel
    [(set (match_operand:DI 0 "s_register_operand" "")
@@ -4389,6 +4727,20 @@
    (set_attr "type" "multiple")]
 )
 
+(define_insn "*negsi2_carryin_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (const_int 0)
+		    (match_operand:SI 1 "s_register_operand" "r")))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI (const_int 0)
+			    (match_dup 1))
+		  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "rscs\\t%0, %1, #0"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm")]
+)
+
 (define_expand "negsi2"
   [(set (match_operand:SI         0 "s_register_operand" "")
 	(neg:SI (match_operand:SI 1 "s_register_operand" "")))]
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-02-24 23:02 [ARM] Add support for overflow add, sub, and neg operations Michael Collison
@ 2016-02-25  9:51 ` Kyrill Tkachov
  2016-02-26 10:32   ` Michael Collison
  0 siblings, 1 reply; 7+ messages in thread
From: Kyrill Tkachov @ 2016-02-25  9:51 UTC (permalink / raw)
  To: Michael Collison, GCC Patches, Ramana Radhakrishnan

Hi Michael,

On 24/02/16 23:02, Michael Collison wrote:
> This patch adds support for builtin overflow of add, subtract and negate. This patch is targeted for gcc 7 stage 1. It was tested with no regressions in arm and thumb modes on the following targets:
>
> arm-non-linux-gnueabi
> arm-non-linux-gnuabihf
> armeb-none-linux-gnuabihf
> arm-non-eabi
>

I'll have a deeper look once we're closer to GCC 7 development.
I've got a few comments in the meantime.

> 2016-02-24 Michael Collison  <michael.collison@arm.com>
>
>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>     to represent the overflow bit.
>     * config/arm/arm.c (maybe_get_arm_condition_code):
>     Add support for CC_Vmode.
>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>     addsi3_compareV_upper): New patterns to support signed
>     builtin overflow add operations.
>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>     New patterns to support unsigned builtin add overflow operations.
>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>     builtin overflow subtract operations,
>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>     overflow operations.
>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New patterns
>     to support builtin overflow negate operations.
>
>

Can you please summarise what sequences are generated for these operations, and how
they are better than the default fallback sequences.
Also, we'd need tests for each of these overflow operations, since these are pretty complex
patterns that are being added.

Also, you may want to consider splitting this into a patch series, each adding a single
overflow operation, together with its tests. That way it will be easier to keep track of
which pattern applies to which use case and they can go in independently of each other.

+(define_expand "uaddv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_ARM"
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+

I notice this and many other patterns in this patch are guarded on TARGET_ARM. Is there any reason why they
should be restricted to arm state and not be TARGET_32BIT ?

Thanks,
Kyrill

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-02-25  9:51 ` Kyrill Tkachov
@ 2016-02-26 10:32   ` Michael Collison
  2016-02-29 11:13     ` Kyrill Tkachov
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-02-26 10:32 UTC (permalink / raw)
  To: Kyrill Tkachov, GCC Patches, Ramana Radhakrishnan



On 02/25/2016 02:51 AM, Kyrill Tkachov wrote:
> Hi Michael,
>
> On 24/02/16 23:02, Michael Collison wrote:
>> This patch adds support for builtin overflow of add, subtract and 
>> negate. This patch is targeted for gcc 7 stage 1. It was tested with 
>> no regressions in arm and thumb modes on the following targets:
>>
>> arm-non-linux-gnueabi
>> arm-non-linux-gnuabihf
>> armeb-none-linux-gnuabihf
>> arm-non-eabi
>>
>
> I'll have a deeper look once we're closer to GCC 7 development.
> I've got a few comments in the meantime.
>
>> 2016-02-24 Michael Collison <michael.collison@arm.com>
>>
>>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>>     to represent the overflow bit.
>>     * config/arm/arm.c (maybe_get_arm_condition_code):
>>     Add support for CC_Vmode.
>>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>>     addsi3_compareV_upper): New patterns to support signed
>>     builtin overflow add operations.
>>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>>     New patterns to support unsigned builtin add overflow operations.
>>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>>     builtin overflow subtract operations,
>>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>>     overflow operations.
>>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New 
>> patterns
>>     to support builtin overflow negate operations.
>>
>>
>
> Can you please summarise what sequences are generated for these 
> operations, and how
> they are better than the default fallback sequences.

Sure for a simple test case such as:

int
fn3 (int x, int y, int *ovf)
{
   int res;
   *ovf = __builtin_sadd_overflow (x, y, &res);
   return res;
}

Current trunk at -O2 generates

fn3:
         @ args = 0, pretend = 0, frame = 0
         @ frame_needed = 0, uses_anonymous_args = 0
         @ link register save eliminated.
         cmp     r1, #0
         mov     r3, #0
         add     r1, r0, r1
         blt     .L4
         cmp     r1, r0
         blt     .L3
.L2:
         str     r3, [r2]
         mov     r0, r1
         bx      lr
.L4:
         cmp     r1, r0
         ble     .L2
.L3:
         mov     r3, #1
         b       .L2

With the overflow patch this now generates:

        adds    r0, r0, r1
        movvs   r3, #1
        movvc   r3, #0
        str     r3, [r2]
        bx      lr

> Also, we'd need tests for each of these overflow operations, since 
> these are pretty complex
> patterns that are being added.

The patterns are tested now most notably by tests in:

c-c++-common/torture/builtin-arith-overflow*.c

I had a few failures I resolved so the builtin overflow arithmetic 
functions are definitely being exercised.
>
> Also, you may want to consider splitting this into a patch series, 
> each adding a single
> overflow operation, together with its tests. That way it will be 
> easier to keep track of
> which pattern applies to which use case and they can go in 
> independently of each other.

Let me know if you still fell the same way given the existing test cases.

>
> +(define_expand "uaddv<mode>4"
> +  [(match_operand:SIDI 0 "register_operand")
> +   (match_operand:SIDI 1 "register_operand")
> +   (match_operand:SIDI 2 "register_operand")
> +   (match_operand 3 "")]
> +  "TARGET_ARM"
> +{
> +  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], 
> operands[2]));
> +
> +  rtx x;
> +  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), 
> const0_rtx);
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                gen_rtx_LABEL_REF (VOIDmode, operands[3]),
> +                pc_rtx);
> +  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
> +  DONE;
> +})
> +
>
> I notice this and many other patterns in this patch are guarded on 
> TARGET_ARM. Is there any reason why they
> should be restricted to arm state and not be TARGET_32BIT ?
I thought about this as well. I will test will TARGET_32BIT and get back 
to you.
>
>
> Thanks,
> Kyrill

-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-02-26 10:32   ` Michael Collison
@ 2016-02-29 11:13     ` Kyrill Tkachov
  2016-02-29 11:25       ` Michael Collison
  2016-03-29  0:38       ` Michael Collison
  0 siblings, 2 replies; 7+ messages in thread
From: Kyrill Tkachov @ 2016-02-29 11:13 UTC (permalink / raw)
  To: Michael Collison, GCC Patches, Ramana Radhakrishnan


On 26/02/16 10:32, Michael Collison wrote:
>
>
> On 02/25/2016 02:51 AM, Kyrill Tkachov wrote:
>> Hi Michael,
>>
>> On 24/02/16 23:02, Michael Collison wrote:
>>> This patch adds support for builtin overflow of add, subtract and negate. This patch is targeted for gcc 7 stage 1. It was tested with no regressions in arm and thumb modes on the following targets:
>>>
>>> arm-non-linux-gnueabi
>>> arm-non-linux-gnuabihf
>>> armeb-none-linux-gnuabihf
>>> arm-non-eabi
>>>
>>
>> I'll have a deeper look once we're closer to GCC 7 development.
>> I've got a few comments in the meantime.
>>
>>> 2016-02-24 Michael Collison <michael.collison@arm.com>
>>>
>>>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>>>     to represent the overflow bit.
>>>     * config/arm/arm.c (maybe_get_arm_condition_code):
>>>     Add support for CC_Vmode.
>>>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>>>     addsi3_compareV_upper): New patterns to support signed
>>>     builtin overflow add operations.
>>>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>>>     New patterns to support unsigned builtin add overflow operations.
>>>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>>>     builtin overflow subtract operations,
>>>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>>>     overflow operations.
>>>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New patterns
>>>     to support builtin overflow negate operations.
>>>
>>>
>>
>> Can you please summarise what sequences are generated for these operations, and how
>> they are better than the default fallback sequences.
>
> Sure for a simple test case such as:
>
> int
> fn3 (int x, int y, int *ovf)
> {
>   int res;
>   *ovf = __builtin_sadd_overflow (x, y, &res);
>   return res;
> }
>
> Current trunk at -O2 generates
>
> fn3:
>         @ args = 0, pretend = 0, frame = 0
>         @ frame_needed = 0, uses_anonymous_args = 0
>         @ link register save eliminated.
>         cmp     r1, #0
>         mov     r3, #0
>         add     r1, r0, r1
>         blt     .L4
>         cmp     r1, r0
>         blt     .L3
> .L2:
>         str     r3, [r2]
>         mov     r0, r1
>         bx      lr
> .L4:
>         cmp     r1, r0
>         ble     .L2
> .L3:
>         mov     r3, #1
>         b       .L2
>
> With the overflow patch this now generates:
>
>        adds    r0, r0, r1
>        movvs   r3, #1
>        movvc   r3, #0
>        str     r3, [r2]
>        bx      lr
>

Thanks! That looks much better.

>> Also, we'd need tests for each of these overflow operations, since these are pretty complex
>> patterns that are being added.
>
> The patterns are tested now most notably by tests in:
>
> c-c++-common/torture/builtin-arith-overflow*.c
>
> I had a few failures I resolved so the builtin overflow arithmetic functions are definitely being exercised.

Great, that gives me more confidence on the correctness aspects but...

>>
>> Also, you may want to consider splitting this into a patch series, each adding a single
>> overflow operation, together with its tests. That way it will be easier to keep track of
>> which pattern applies to which use case and they can go in independently of each other.
>
> Let me know if you still fell the same way given the existing test cases.
>

... I'd like us to still have scan-assembler tests. The torture tests exercise the correctness,
but we'd want tests to catch regressions where we stop generating the new patterns due to other
optimisation changes, which would lead to code quality regressions.
So I'd like us to have scan-assembler tests for these sequences to make sure we generate the right
instructions.

Thanks,
Kyrill

>>
>> +(define_expand "uaddv<mode>4"
>> +  [(match_operand:SIDI 0 "register_operand")
>> +   (match_operand:SIDI 1 "register_operand")
>> +   (match_operand:SIDI 2 "register_operand")
>> +   (match_operand 3 "")]
>> +  "TARGET_ARM"
>> +{
>> +  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
>> +
>> +  rtx x;
>> +  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
>> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
>> +                gen_rtx_LABEL_REF (VOIDmode, operands[3]),
>> +                pc_rtx);
>> +  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
>> +  DONE;
>> +})
>> +
>>
>> I notice this and many other patterns in this patch are guarded on TARGET_ARM. Is there any reason why they
>> should be restricted to arm state and not be TARGET_32BIT ?
> I thought about this as well. I will test will TARGET_32BIT and get back to you.
>>
>>
>> Thanks,
>> Kyrill
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-02-29 11:13     ` Kyrill Tkachov
@ 2016-02-29 11:25       ` Michael Collison
  2016-03-29  0:38       ` Michael Collison
  1 sibling, 0 replies; 7+ messages in thread
From: Michael Collison @ 2016-02-29 11:25 UTC (permalink / raw)
  To: Kyrill Tkachov, GCC Patches, Ramana Radhakrishnan



On 2/29/2016 4:13 AM, Kyrill Tkachov wrote:
>
> On 26/02/16 10:32, Michael Collison wrote:
>>
>>
>> On 02/25/2016 02:51 AM, Kyrill Tkachov wrote:
>>> Hi Michael,
>>>
>>> On 24/02/16 23:02, Michael Collison wrote:
>>>> This patch adds support for builtin overflow of add, subtract and 
>>>> negate. This patch is targeted for gcc 7 stage 1. It was tested 
>>>> with no regressions in arm and thumb modes on the following targets:
>>>>
>>>> arm-non-linux-gnueabi
>>>> arm-non-linux-gnuabihf
>>>> armeb-none-linux-gnuabihf
>>>> arm-non-eabi
>>>>
>>>
>>> I'll have a deeper look once we're closer to GCC 7 development.
>>> I've got a few comments in the meantime.
>>>
>>>> 2016-02-24 Michael Collison <michael.collison@arm.com>
>>>>
>>>>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>>>>     to represent the overflow bit.
>>>>     * config/arm/arm.c (maybe_get_arm_condition_code):
>>>>     Add support for CC_Vmode.
>>>>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>>>>     addsi3_compareV_upper): New patterns to support signed
>>>>     builtin overflow add operations.
>>>>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>>>>     New patterns to support unsigned builtin add overflow operations.
>>>>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>>>>     builtin overflow subtract operations,
>>>>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>>>>     overflow operations.
>>>>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New 
>>>> patterns
>>>>     to support builtin overflow negate operations.
>>>>
>>>>
>>>
>>> Can you please summarise what sequences are generated for these 
>>> operations, and how
>>> they are better than the default fallback sequences.
>>
>> Sure for a simple test case such as:
>>
>> int
>> fn3 (int x, int y, int *ovf)
>> {
>>   int res;
>>   *ovf = __builtin_sadd_overflow (x, y, &res);
>>   return res;
>> }
>>
>> Current trunk at -O2 generates
>>
>> fn3:
>>         @ args = 0, pretend = 0, frame = 0
>>         @ frame_needed = 0, uses_anonymous_args = 0
>>         @ link register save eliminated.
>>         cmp     r1, #0
>>         mov     r3, #0
>>         add     r1, r0, r1
>>         blt     .L4
>>         cmp     r1, r0
>>         blt     .L3
>> .L2:
>>         str     r3, [r2]
>>         mov     r0, r1
>>         bx      lr
>> .L4:
>>         cmp     r1, r0
>>         ble     .L2
>> .L3:
>>         mov     r3, #1
>>         b       .L2
>>
>> With the overflow patch this now generates:
>>
>>        adds    r0, r0, r1
>>        movvs   r3, #1
>>        movvc   r3, #0
>>        str     r3, [r2]
>>        bx      lr
>>
>
> Thanks! That looks much better.
>
>>> Also, we'd need tests for each of these overflow operations, since 
>>> these are pretty complex
>>> patterns that are being added.
>>
>> The patterns are tested now most notably by tests in:
>>
>> c-c++-common/torture/builtin-arith-overflow*.c
>>
>> I had a few failures I resolved so the builtin overflow arithmetic 
>> functions are definitely being exercised.
>
> Great, that gives me more confidence on the correctness aspects but...

Not so fast. I went back and changed the TARGET_ARM conditions to 
TARGET_32BIT. When I did this some of the
test cases fail in thumb2 mode. I was a little surprised by this result 
since I generate the same rtl in both modes in almost
all cases. I am investigating.
>
>>>
>>> Also, you may want to consider splitting this into a patch series, 
>>> each adding a single
>>> overflow operation, together with its tests. That way it will be 
>>> easier to keep track of
>>> which pattern applies to which use case and they can go in 
>>> independently of each other.
>>
>> Let me know if you still fell the same way given the existing test 
>> cases.
>>
>
> ... I'd like us to still have scan-assembler tests. The torture tests 
> exercise the correctness,
> but we'd want tests to catch regressions where we stop generating the 
> new patterns due to other
> optimisation changes, which would lead to code quality regressions.
> So I'd like us to have scan-assembler tests for these sequences to 
> make sure we generate the right
> instructions.
I will definitely write some scan-assembler tests. Thanks for the feedback.

>
> Thanks,
> Kyrill
>
>>>
>>> +(define_expand "uaddv<mode>4"
>>> +  [(match_operand:SIDI 0 "register_operand")
>>> +   (match_operand:SIDI 1 "register_operand")
>>> +   (match_operand:SIDI 2 "register_operand")
>>> +   (match_operand 3 "")]
>>> +  "TARGET_ARM"
>>> +{
>>> +  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], 
>>> operands[2]));
>>> +
>>> +  rtx x;
>>> +  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), 
>>> const0_rtx);
>>> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
>>> +                gen_rtx_LABEL_REF (VOIDmode, operands[3]),
>>> +                pc_rtx);
>>> +  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
>>> +  DONE;
>>> +})
>>> +
>>>
>>> I notice this and many other patterns in this patch are guarded on 
>>> TARGET_ARM. Is there any reason why they
>>> should be restricted to arm state and not be TARGET_32BIT ?
>> I thought about this as well. I will test will TARGET_32BIT and get 
>> back to you.
>>>
>>>
>>> Thanks,
>>> Kyrill
>>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-02-29 11:13     ` Kyrill Tkachov
  2016-02-29 11:25       ` Michael Collison
@ 2016-03-29  0:38       ` Michael Collison
  2016-05-24  9:11         ` Kyrill Tkachov
  1 sibling, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-03-29  0:38 UTC (permalink / raw)
  To: Kyrill Tkachov, GCC Patches, Ramana Radhakrishnan

[-- Attachment #1: Type: text/plain, Size: 6487 bytes --]

An updated patch that resolves the issues with thumb2 support and adds 
test cases as requested. Looking to check this into GCC 7 stage1 when it 
opens.

2016-02-24  Michael Collison  <michael.collison@arm.com>

     * config/arm/arm-modes.def: Add new condition code mode CC_V
     to represent the overflow bit.
     * config/arm/arm.c (maybe_get_arm_condition_code):
     Add support for CC_Vmode.
     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
     addsi3_compareV_upper): New patterns to support signed
     builtin overflow add operations.
     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
     New patterns to support unsigned builtin add overflow operations.
     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
     builtin overflow subtract operations,
     (usubv<mode>4): New patterns to support unsigned builtin subtract
     overflow operations.
     (negvsi3, negvdi3, negdi2_compare, negsi2_carryin_compare): New 
patterns
     to support builtin overflow negate operations.
     * gcc.target/arm/builtin_saddl.c: New testcase.
     * gcc.target/arm/builtin_saddll.c: New testcase.
     * gcc.target/arm/builtin_uaddl.c: New testcase.
     * gcc.target/arm/builtin_uaddll.c: New testcase.
     * gcc.target/arm/builtin_ssubl.c: New testcase.
     * gcc.target/arm/builtin_ssubll.c: New testcase.
     * gcc.target/arm/builtin_usubl.c: New testcase.
     * gcc.target/arm/builtin_usubll.c: New testcase.

On 02/29/2016 04:13 AM, Kyrill Tkachov wrote:
>
> On 26/02/16 10:32, Michael Collison wrote:
>>
>>
>> On 02/25/2016 02:51 AM, Kyrill Tkachov wrote:
>>> Hi Michael,
>>>
>>> On 24/02/16 23:02, Michael Collison wrote:
>>>> This patch adds support for builtin overflow of add, subtract and 
>>>> negate. This patch is targeted for gcc 7 stage 1. It was tested 
>>>> with no regressions in arm and thumb modes on the following targets:
>>>>
>>>> arm-non-linux-gnueabi
>>>> arm-non-linux-gnuabihf
>>>> armeb-none-linux-gnuabihf
>>>> arm-non-eabi
>>>>
>>>
>>> I'll have a deeper look once we're closer to GCC 7 development.
>>> I've got a few comments in the meantime.
>>>
>>>> 2016-02-24 Michael Collison <michael.collison@arm.com>
>>>>
>>>>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>>>>     to represent the overflow bit.
>>>>     * config/arm/arm.c (maybe_get_arm_condition_code):
>>>>     Add support for CC_Vmode.
>>>>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>>>>     addsi3_compareV_upper): New patterns to support signed
>>>>     builtin overflow add operations.
>>>>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>>>>     New patterns to support unsigned builtin add overflow operations.
>>>>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>>>>     builtin overflow subtract operations,
>>>>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>>>>     overflow operations.
>>>>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New 
>>>> patterns
>>>>     to support builtin overflow negate operations.
>>>>
>>>>
>>>
>>> Can you please summarise what sequences are generated for these 
>>> operations, and how
>>> they are better than the default fallback sequences.
>>
>> Sure for a simple test case such as:
>>
>> int
>> fn3 (int x, int y, int *ovf)
>> {
>>   int res;
>>   *ovf = __builtin_sadd_overflow (x, y, &res);
>>   return res;
>> }
>>
>> Current trunk at -O2 generates
>>
>> fn3:
>>         @ args = 0, pretend = 0, frame = 0
>>         @ frame_needed = 0, uses_anonymous_args = 0
>>         @ link register save eliminated.
>>         cmp     r1, #0
>>         mov     r3, #0
>>         add     r1, r0, r1
>>         blt     .L4
>>         cmp     r1, r0
>>         blt     .L3
>> .L2:
>>         str     r3, [r2]
>>         mov     r0, r1
>>         bx      lr
>> .L4:
>>         cmp     r1, r0
>>         ble     .L2
>> .L3:
>>         mov     r3, #1
>>         b       .L2
>>
>> With the overflow patch this now generates:
>>
>>        adds    r0, r0, r1
>>        movvs   r3, #1
>>        movvc   r3, #0
>>        str     r3, [r2]
>>        bx      lr
>>
>
> Thanks! That looks much better.
>
>>> Also, we'd need tests for each of these overflow operations, since 
>>> these are pretty complex
>>> patterns that are being added.
>>
>> The patterns are tested now most notably by tests in:
>>
>> c-c++-common/torture/builtin-arith-overflow*.c
>>
>> I had a few failures I resolved so the builtin overflow arithmetic 
>> functions are definitely being exercised.
>
> Great, that gives me more confidence on the correctness aspects but...
>
>>>
>>> Also, you may want to consider splitting this into a patch series, 
>>> each adding a single
>>> overflow operation, together with its tests. That way it will be 
>>> easier to keep track of
>>> which pattern applies to which use case and they can go in 
>>> independently of each other.
>>
>> Let me know if you still fell the same way given the existing test 
>> cases.
>>
>
> ... I'd like us to still have scan-assembler tests. The torture tests 
> exercise the correctness,
> but we'd want tests to catch regressions where we stop generating the 
> new patterns due to other
> optimisation changes, which would lead to code quality regressions.
> So I'd like us to have scan-assembler tests for these sequences to 
> make sure we generate the right
> instructions.
>
> Thanks,
> Kyrill
>
>>>
>>> +(define_expand "uaddv<mode>4"
>>> +  [(match_operand:SIDI 0 "register_operand")
>>> +   (match_operand:SIDI 1 "register_operand")
>>> +   (match_operand:SIDI 2 "register_operand")
>>> +   (match_operand 3 "")]
>>> +  "TARGET_ARM"
>>> +{
>>> +  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], 
>>> operands[2]));
>>> +
>>> +  rtx x;
>>> +  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), 
>>> const0_rtx);
>>> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
>>> +                gen_rtx_LABEL_REF (VOIDmode, operands[3]),
>>> +                pc_rtx);
>>> +  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
>>> +  DONE;
>>> +})
>>> +
>>>
>>> I notice this and many other patterns in this patch are guarded on 
>>> TARGET_ARM. Is there any reason why they
>>> should be restricted to arm state and not be TARGET_32BIT ?
>> I thought about this as well. I will test will TARGET_32BIT and get 
>> back to you.
>>>
>>>
>>> Thanks,
>>> Kyrill
>>
>

-- 
Michael Collison
Linaro Toolchain Working Group
michael.collison@linaro.org


[-- Attachment #2: bugzilla-69663-v25-upstream.patch --]
[-- Type: text/x-patch, Size: 18746 bytes --]

diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 1819553..69231f2 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -59,6 +59,7 @@ CC_MODE (CC_DGEU);
 CC_MODE (CC_DGTU);
 CC_MODE (CC_C);
 CC_MODE (CC_N);
+CC_MODE (CC_V);
 
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c868490..406b306 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -22906,6 +22906,8 @@ maybe_get_arm_condition_code (rtx comparison)
 	{
 	case LTU: return ARM_CS;
 	case GEU: return ARM_CC;
+	case NE: return ARM_CS;
+	case EQ: return ARM_CC;
 	default: return ARM_NV;
 	}
 
@@ -22931,6 +22933,15 @@ maybe_get_arm_condition_code (rtx comparison)
 	default: return ARM_NV;
 	}
 
+    case CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return ARM_VS;
+	case EQ: return ARM_VC;
+	default: return ARM_NV;
+
+	}
+
     case CCmode:
       switch (comp_code)
 	{
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 47171b9..812f082 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -539,6 +539,42 @@
    (set_attr "type" "multiple")]
 )
 
+(define_expand "addv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
 (define_expand "addsi3"
   [(set (match_operand:SI          0 "s_register_operand" "")
 	(plus:SI (match_operand:SI 1 "s_register_operand" "")
@@ -616,6 +652,165 @@
  ]
 )
 
+(define_insn_and_split "adddi3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:TI
+	    (sign_extend:TI (match_operand:DI 1 "register_operand" "r"))
+	    (sign_extend:TI (match_operand:DI 2 "register_operand" "r")))
+	  (sign_extend:TI (plus:DI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC_V CC_REGNUM)
+		   (ne:CC_V
+		    (plus:DI (plus:DI
+			      (sign_extend:DI (match_dup 4))
+			      (sign_extend:DI (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+		    (plus:DI (sign_extend:DI
+			      (plus:SI (match_dup 4) (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+	     (set (match_dup 3) (plus:SI (plus:SI
+					  (match_dup 4) (match_dup 5))
+					 (ltu:SI (reg:CC_C CC_REGNUM)
+						 (const_int 0))))])]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+ [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "addsi3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:DI
+	    (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (sign_extend:DI (plus:SI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "adds%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*addsi3_compareV_upper"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:DI
+	   (plus:DI
+	    (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+	  (plus:DI (sign_extend:DI
+		    (plus:SI (match_dup 1) (match_dup 2)))
+		   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI (match_dup 1) (match_dup 2))
+	 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "adcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
+)
+
+(define_insn_and_split "adddi3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:TI
+	    (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
+	    (zero_extend:TI (match_operand:DI 2 "register_operand" "r")))
+	  (zero_extend:TI (plus:DI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC_C CC_REGNUM)
+		   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2))
+				 (match_dup 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC_C CC_REGNUM)
+		   (ne:CC_C
+		    (plus:DI (plus:DI
+			      (zero_extend:DI (match_dup 4))
+			      (zero_extend:DI (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+		    (plus:DI (zero_extend:DI
+			      (plus:SI (match_dup 4) (match_dup 5)))
+			     (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+	     (set (match_dup 3) (plus:SI
+				 (plus:SI (match_dup 4) (match_dup 5))
+				 (ltu:SI (reg:CC_C CC_REGNUM)
+					 (const_int 0))))])]
+  "
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+  }"
+ [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "*addsi3_compareC_upper"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:DI
+	   (plus:DI
+	    (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	    (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))
+	  (plus:DI (zero_extend:DI
+		    (plus:SI (match_dup 1) (match_dup 2)))
+		   (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0)))))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI
+	 (plus:SI (match_dup 1) (match_dup 2))
+	 (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_32BIT"
+  "adcs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "adcs_reg")]
+)
+
+(define_insn "addsi3_compareC"
+   [(set (reg:CC_C CC_REGNUM)
+	 (ne:CC_C
+	  (plus:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	  (zero_extend:DI
+	   (plus:SI (match_dup 1) (match_dup 2)))))
+    (set (match_operand:SI 0 "register_operand" "=r")
+	 (plus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "adds%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_sreg")]
+)
+
 (define_insn "addsi3_compare0"
   [(set (reg:CC_NOOV CC_REGNUM)
 	(compare:CC_NOOV
@@ -865,6 +1060,85 @@
     (set_attr "type" "adcs_reg")]
 )
 
+(define_expand "subv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_insn_and_split "subdi3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:DI 1 "register_operand" "r")
+	  (match_operand:DI 2 "register_operand" "r")))
+   (set (match_operand:DI 0 "register_operand" "=&r")
+	(minus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (match_dup 4) (match_dup 5)))
+	     (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5))
+			       (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])]
+  {
+    operands[3] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[4] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[2] = gen_lowpart (SImode, operands[2]);
+   }
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
+(define_insn "subsi3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:SI 1 "register_operand" "r")
+	  (match_operand:SI 2 "register_operand" "r")))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "subs%?\\t%0, %1, %2"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*subsi3_carryin"
   [(set (match_operand:SI 0 "s_register_operand" "=r,r")
         (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I")
@@ -4349,6 +4623,74 @@
 \f
 ;; Unary arithmetic insns
 
+(define_expand "negvsi3"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand 2 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_subsi3_compare (operands[0], const0_rtx, operands[1]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[2]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+(define_expand "negvdi3"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand 2 "")]
+  "TARGET_ARM"
+{
+
+  emit_insn (gen_negdi2_compare (operands[0], operands[1]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[2]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})
+
+
+(define_insn_and_split "negdi2_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (match_operand:DI 1 "register_operand" "0,r")))
+   (set (match_operand:DI 0 "register_operand" "=r,&r")
+	(minus:DI (const_int 0) (match_dup 1)))]
+  "TARGET_ARM"
+  "#"
+  "TARGET_ARM && reload_completed"
+  [(parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 1)))
+	      (set (match_dup 0) (minus:SI (const_int 0)
+					   (match_dup 1)))])
+   (parallel [(set (reg:CC CC_REGNUM)
+		   (compare:CC (const_int 0) (match_dup 3)))
+	     (set (match_dup 2)
+		  (minus:SI
+		   (minus:SI (const_int 0) (match_dup 3))
+		   (ltu:SI (reg:CC_C CC_REGNUM)
+			   (const_int 0))))])]
+  {
+    operands[2] = gen_highpart (SImode, operands[0]);
+    operands[0] = gen_lowpart (SImode, operands[0]);
+    operands[3] = gen_highpart (SImode, operands[1]);
+    operands[1] = gen_lowpart (SImode, operands[1]);
+  }
+  [(set_attr "conds" "set")
+   (set_attr "length" "8")
+   (set_attr "type" "multiple")]
+)
+
 (define_expand "negdi2"
  [(parallel
    [(set (match_operand:DI 0 "s_register_operand" "")
@@ -4389,6 +4731,20 @@
    (set_attr "type" "multiple")]
 )
 
+(define_insn "*negsi2_carryin_compare"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC (const_int 0)
+		    (match_operand:SI 1 "s_register_operand" "r")))
+   (set (match_operand:SI 0 "s_register_operand" "=r")
+	(minus:SI (minus:SI (const_int 0)
+			    (match_dup 1))
+		  (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))]
+  "TARGET_ARM"
+  "rscs\\t%0, %1, #0"
+  [(set_attr "conds" "set")
+   (set_attr "type" "alus_imm")]
+)
+
 (define_expand "negsi2"
   [(set (match_operand:SI         0 "s_register_operand" "")
 	(neg:SI (match_operand:SI 1 "s_register_operand" "")))]
diff --git a/gcc/testsuite/gcc.target/arm/builtin_saddl.c b/gcc/testsuite/gcc.target/arm/builtin_saddl.c
new file mode 100644
index 0000000..e9dab3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_saddl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+long overflow_add (long x, long y)
+{
+  long r;
+
+  int ovr = __builtin_saddl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "adds" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_saddll.c b/gcc/testsuite/gcc.target/arm/builtin_saddll.c
new file mode 100644
index 0000000..c4db30e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_saddll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+long long overflow_add (long long x, long long y)
+{
+  long long r;
+
+  int ovr = __builtin_saddll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "adds" }} */
+/* {dg-final { scan-assembler "adcs" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_ssubl.c b/gcc/testsuite/gcc.target/arm/builtin_ssubl.c
new file mode 100644
index 0000000..766cddb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_ssubl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+long overflow_sub (long x, long y)
+{
+  long r;
+
+  int ovr = __builtin_ssubl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "subs" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_ssubll.c b/gcc/testsuite/gcc.target/arm/builtin_ssubll.c
new file mode 100644
index 0000000..085e92a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_ssubll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+long long overflow_sub (long long x, long long y)
+{
+  long long r;
+
+  int ovr = __builtin_ssubll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "subs" }} */
+/* {dg-final { scan-assembler "sbcs" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_uaddl.c b/gcc/testsuite/gcc.target/arm/builtin_uaddl.c
new file mode 100644
index 0000000..1ea59f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_uaddl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+unsigned long overflow_add (unsigned long x, unsigned long y)
+{
+  unsigned long r;
+
+  int ovr = __builtin_uaddl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "adds" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_uaddll.c b/gcc/testsuite/gcc.target/arm/builtin_uaddll.c
new file mode 100644
index 0000000..acf21a0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_uaddll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+unsigned long long overflow_add (unsigned long long x, unsigned long long y)
+{
+  unsigned long long r;
+
+  int ovr = __builtin_uaddll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "adds" }} */
+/* {dg-final { scan-assembler "adcs" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_usubl.c b/gcc/testsuite/gcc.target/arm/builtin_usubl.c
new file mode 100644
index 0000000..9c508cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_usubl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+unsigned long overflow_sub (unsigned long x, unsigned long y)
+{
+  unsigned long r;
+
+  int ovr = __builtin_usubl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "subs" }} */
diff --git a/gcc/testsuite/gcc.target/arm/builtin_usubll.c b/gcc/testsuite/gcc.target/arm/builtin_usubll.c
new file mode 100644
index 0000000..8f513f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_usubll.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+unsigned long long overflow_sub (unsigned long long x, unsigned long long y)
+{
+  unsigned long long r;
+
+  int ovr = __builtin_usubll_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "subs" }} */
+/* {dg-final { scan-assembler "sbcs" }} */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [ARM] Add support for overflow add, sub, and neg operations
  2016-03-29  0:38       ` Michael Collison
@ 2016-05-24  9:11         ` Kyrill Tkachov
  0 siblings, 0 replies; 7+ messages in thread
From: Kyrill Tkachov @ 2016-05-24  9:11 UTC (permalink / raw)
  To: Michael Collison, GCC Patches, Ramana Radhakrishnan

Hi Michael,

Sorry for the delay in reviewing. A few comments at the bottom.

On 29/03/16 00:19, Michael Collison wrote:
> An updated patch that resolves the issues with thumb2 support and adds test cases as requested. Looking to check this into GCC 7 stage1 when it opens.
>
> 2016-02-24  Michael Collison  <michael.collison@arm.com>
>
>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>     to represent the overflow bit.
>     * config/arm/arm.c (maybe_get_arm_condition_code):
>     Add support for CC_Vmode.
>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>     addsi3_compareV_upper): New patterns to support signed
>     builtin overflow add operations.
>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>     New patterns to support unsigned builtin add overflow operations.
>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>     builtin overflow subtract operations,
>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>     overflow operations.
>     (negvsi3, negvdi3, negdi2_compare, negsi2_carryin_compare): New patterns
>     to support builtin overflow negate operations.
>     * gcc.target/arm/builtin_saddl.c: New testcase.
>     * gcc.target/arm/builtin_saddll.c: New testcase.
>     * gcc.target/arm/builtin_uaddl.c: New testcase.
>     * gcc.target/arm/builtin_uaddll.c: New testcase.
>     * gcc.target/arm/builtin_ssubl.c: New testcase.
>     * gcc.target/arm/builtin_ssubll.c: New testcase.
>     * gcc.target/arm/builtin_usubl.c: New testcase.
>     * gcc.target/arm/builtin_usubll.c: New testcase.
>
> On 02/29/2016 04:13 AM, Kyrill Tkachov wrote:
>>
>> On 26/02/16 10:32, Michael Collison wrote:
>>>
>>>
>>> On 02/25/2016 02:51 AM, Kyrill Tkachov wrote:
>>>> Hi Michael,
>>>>
>>>> On 24/02/16 23:02, Michael Collison wrote:
>>>>> This patch adds support for builtin overflow of add, subtract and negate. This patch is targeted for gcc 7 stage 1. It was tested with no regressions in arm and thumb modes on the following targets:
>>>>>
>>>>> arm-non-linux-gnueabi
>>>>> arm-non-linux-gnuabihf
>>>>> armeb-none-linux-gnuabihf
>>>>> arm-non-eabi
>>>>>
>>>>
>>>> I'll have a deeper look once we're closer to GCC 7 development.
>>>> I've got a few comments in the meantime.
>>>>
>>>>> 2016-02-24 Michael Collison <michael.collison@arm.com>
>>>>>
>>>>>     * config/arm/arm-modes.def: Add new condition code mode CC_V
>>>>>     to represent the overflow bit.
>>>>>     * config/arm/arm.c (maybe_get_arm_condition_code):
>>>>>     Add support for CC_Vmode.
>>>>>     * config/arm/arm.md (addv<mode>4, add<mode>3_compareV,
>>>>>     addsi3_compareV_upper): New patterns to support signed
>>>>>     builtin overflow add operations.
>>>>>     (uaddv<mode>4, add<mode>3_compareC, addsi3_compareV_upper):
>>>>>     New patterns to support unsigned builtin add overflow operations.
>>>>>     (subv<mode>4, sub<mode>3_compare1): New patterns to support signed
>>>>>     builtin overflow subtract operations,
>>>>>     (usubv<mode>4): New patterns to support unsigned builtin subtract
>>>>>     overflow operations.
>>>>>     (negvsi3, negvdi3, negdi2_compre, negsi2_carryin_compare): New patterns
>>>>>     to support builtin overflow negate operations.
>>>>>
>>>>>
>>>>
>>>> Can you please summarise what sequences are generated for these operations, and how
>>>> they are better than the default fallback sequences.
>>>
>>> Sure for a simple test case such as:
>>>
>>> int
>>> fn3 (int x, int y, int *ovf)
>>> {
>>>   int res;
>>>   *ovf = __builtin_sadd_overflow (x, y, &res);
>>>   return res;
>>> }
>>>
>>> Current trunk at -O2 generates
>>>
>>> fn3:
>>>         @ args = 0, pretend = 0, frame = 0
>>>         @ frame_needed = 0, uses_anonymous_args = 0
>>>         @ link register save eliminated.
>>>         cmp     r1, #0
>>>         mov     r3, #0
>>>         add     r1, r0, r1
>>>         blt     .L4
>>>         cmp     r1, r0
>>>         blt     .L3
>>> .L2:
>>>         str     r3, [r2]
>>>         mov     r0, r1
>>>         bx      lr
>>> .L4:
>>>         cmp     r1, r0
>>>         ble     .L2
>>> .L3:
>>>         mov     r3, #1
>>>         b       .L2
>>>
>>> With the overflow patch this now generates:
>>>
>>>        adds    r0, r0, r1
>>>        movvs   r3, #1
>>>        movvc   r3, #0
>>>        str     r3, [r2]
>>>        bx      lr
>>>
>>
>> Thanks! That looks much better.
>>
>>>> Also, we'd need tests for each of these overflow operations, since these are pretty complex
>>>> patterns that are being added.
>>>
>>> The patterns are tested now most notably by tests in:
>>>
>>> c-c++-common/torture/builtin-arith-overflow*.c
>>>
>>> I had a few failures I resolved so the builtin overflow arithmetic functions are definitely being exercised.
>>
>> Great, that gives me more confidence on the correctness aspects but...
>>
>>>>
>>>> Also, you may want to consider splitting this into a patch series, each adding a single
>>>> overflow operation, together with its tests. That way it will be easier to keep track of
>>>> which pattern applies to which use case and they can go in independently of each other.
>>>
>>> Let me know if you still fell the same way given the existing test cases.
>>>
>>
>> ... I'd like us to still have scan-assembler tests. The torture tests exercise the correctness,
>> but we'd want tests to catch regressions where we stop generating the new patterns due to other
>> optimisation changes, which would lead to code quality regressions.
>> So I'd like us to have scan-assembler tests for these sequences to make sure we generate the right
>> instructions.
>>
>> Thanks,
>> Kyrill
>>
>>>>
>>>> +(define_expand "uaddv<mode>4"
>>>> +  [(match_operand:SIDI 0 "register_operand")
>>>> +   (match_operand:SIDI 1 "register_operand")
>>>> +   (match_operand:SIDI 2 "register_operand")
>>>> +   (match_operand 3 "")]
>>>> +  "TARGET_ARM"
>>>> +{
>>>> +  emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
>>>> +
>>>> +  rtx x;
>>>> +  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx);
>>>> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
>>>> +                gen_rtx_LABEL_REF (VOIDmode, operands[3]),
>>>> +                pc_rtx);
>>>> +  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
>>>> +  DONE;
>>>> +})
>>>> +
>>>>
>>>> I notice this and many other patterns in this patch are guarded on TARGET_ARM. Is there any reason why they
>>>> should be restricted to arm state and not be TARGET_32BIT ?
>>> I thought about this as well. I will test will TARGET_32BIT and get back to you.
>>>>
>>>>
>>>> Thanks,
>>>> Kyrill

+(define_expand "addv<mode>4"
+  [(match_operand:SIDI 0 "register_operand")
+   (match_operand:SIDI 1 "register_operand")
+   (match_operand:SIDI 2 "register_operand")
+   (match_operand 3 "")]
+  "TARGET_32BIT"
+{
+  emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2]));
+
+  rtx x;
+  x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[3]),
+			    pc_rtx);
+  emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  DONE;
+})

This sequence (gen_rtx_NE, gen_rtx_IF_THEN_ELSE, emit_jump_insn) is used many times in this patch.
Please factor it out into a helper function in arm.c (and expose it through arm-protos.h) to eliminate
the code duplication. In the implementation of that function you can use emit_unlikely_jump instead of
emit_jump_insn for the branch so that the probability of the branch is set appropriately low since
overflow is expected to be the unlikely outcome in these operations.

  
+(define_insn_and_split "adddi3_compareV"
+  [(set (reg:CC_V CC_REGNUM)
+	(ne:CC_V
+	  (plus:TI
+	    (sign_extend:TI (match_operand:DI 1 "register_operand" "r"))
+	    (sign_extend:TI (match_operand:DI 2 "register_operand" "r")))
+	  (sign_extend:TI (plus:DI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:DI 0 "register_operand" "=&r")
+	(plus:DI (match_dup 1) (match_dup 2)))]
+  "TARGET_32BIT"
+  "#"
+  "TARGET_32BIT && reload_completed"

For the split condition just write "&& reload_completed".
This means that the split condition is the match condition (TARGET_32BIT) with
"&& reload_completed" appended to it. I know there are a few places in arm.md that
don't follow that, but I hope to clean those up some day.
Same with the other splitters in this patch.

diff --git a/gcc/testsuite/gcc.target/arm/builtin_saddl.c b/gcc/testsuite/gcc.target/arm/builtin_saddl.c
new file mode 100644
index 0000000..e9dab3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/builtin_saddl.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+extern void overflow_handler ();
+
+long overflow_add (long x, long y)
+{
+  long r;
+
+  int ovr = __builtin_saddl_overflow (x, y, &r);
+  if (ovr)
+    overflow_handler ();
+
+  return r;
+}
+
+/* {dg-final { scan-assembler "adds" }} */

IIRC DejaGNU can be picky with spaces between the braces and the dg-* directives
please write this as:

{ dg-final { scan-assembler "adds" } }

I think the patch looks good otherwise.
Please respin with the above addressed and I think it'll be good to go.

Thanks for sticking with it,
Kyrill

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-05-24  9:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-24 23:02 [ARM] Add support for overflow add, sub, and neg operations Michael Collison
2016-02-25  9:51 ` Kyrill Tkachov
2016-02-26 10:32   ` Michael Collison
2016-02-29 11:13     ` Kyrill Tkachov
2016-02-29 11:25       ` Michael Collison
2016-03-29  0:38       ` Michael Collison
2016-05-24  9:11         ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).