public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, aarch64] Fix pr69305 -- addti miscompilation
@ 2016-01-24 11:19 Richard Henderson
  2016-01-27 17:31 ` James Greenhalgh
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2016-01-24 11:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 564 bytes --]

As Jakub notes in the PR, the representation for add_compare and sub_compare 
were wrong.  And several of the add_carryin patterns were duplicates.

This adds a CC_Cmode for which only the Carry bit is valid.

The patch appears to generate moderately decent code.  For gcc7 we should look 
into why we'll prefer to mark an output REG_UNUSED instead of matching the 
pattern with that output removed.  This results in continuing to use adds 
(though simplifying adc) after we've proved that there will be no carry into 
the high part of an adds+adc pair.

Ok?


r~

[-- Attachment #2: y --]
[-- Type: text/plain, Size: 16723 bytes --]

	* config/aarch64/aarch64-modes.def (CC_Cmode): New.
	* config/aarch64/aarch64.c (aarch64_select_cc_mode): Add check for
	use of CC_Cmode.
	(aarch64_get_condition_code_1): Handle CC_Cmode.
	* config/aarch64/aarch64.md (addti3): Use adddi3_compare1.
	(*add<mode>3_compare1_cconly): New.
	(add<mode>3_compare1): New.
	(add<mode>3_carryin, *addsi3_carryin_uxtw): Sort compare operand
	to be first.  Use aarch64_carry_operation.
	(*add<mode>3_carryin_alt1, *addsi3_carryin_alt1_uxtw): Remove.
	(*add<mode>3_carryin_alt2, *addsi3_carryin_alt2_uxtw): Remove.
	(*add<mode>3_carryin_alt3, *addsi3_carryin_alt3_uxtw): Remove.
	(subti3): Use subdi3_compare1.
	(*sub<mode>3_compare0): Rename from sub<mode>3_compare0.
	(sub<mode>3_compare1): New.
	(*sub<mode>3_carryin0, *subsi3_carryin_uxtw): New.
	(*sub<mode>3_carryin): Use aarch64_borrow_operation.
	(*subsi3_carryin_uxtw): Likewise.
	(*ngc<mode>, *ngcsi_uxtw): Likewise.
	(*sub<mode>3_carryin_alt, *subsi3_carryin_alt_uxtw): New.
	* config/aarch64/iterators.md (DWI): New.
	* config/aarch64/predicates.md (aarch64_carry_operation): New.
	(aarch64_borrow_operation): New.

testsuite/
	* testsuite/gcc.target/aarch64/ccmp_1.c: Accept wzr for zero.
	* testsuite/gcc.target/aarch64/tst_3.c: Accept ands for tst.



diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 3fab205..7de0b3f 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -25,6 +25,7 @@ CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS).  */
 CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS).  */
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 38c7443..0c18ab2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4185,6 +4185,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
     return ((code == GT || code == GE || code == LE || code == LT)
 	    ? CC_SESWPmode : CC_ZESWPmode);
 
+  /* A test for unsigned overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == ZERO_EXTEND)
+    return CC_Cmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -4284,6 +4291,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case CC_Cmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_CS;
+	case EQ: return AARCH64_CC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
       break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 71fc514..363785e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1710,7 +1710,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_adddi3_compare1 (low, gen_lowpart (DImode, operands[1]),
 				  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -1755,6 +1755,44 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
+(define_insn "*add<mode>3_compare1_cconly"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 0 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))
+	  (zero_extend:<DWI>
+	    (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
+(define_insn "add<mode>3_compare1"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 1 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")))
+	  (zero_extend:<DWI>
+	    (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %<w>2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+)
+
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -2074,105 +2112,42 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "add<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-	      (plus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(match_operand:GPI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	      (plus:SI
-	       (match_operand:SI 1 "register_operand" "r")
-	       (match_operand:SI 2 "register_operand" "r")))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt1"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(match_operand:GPI 2 "register_operand" "r"))
-              (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+(define_expand "add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI
+	    (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+	    (match_operand:GPI 1 "aarch64_reg_or_zero"))
+	  (match_operand:GPI 2 "aarch64_reg_or_zero")))]
    ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt1_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-	       (match_operand:SI 1 "register_operand" "r")
-	       (match_operand:SI 2 "register_operand" "r"))
-              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
    ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt2"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-		(match_operand:GPI 1 "register_operand" "r"))
-	      (match_operand:GPI 2 "register_operand" "r")))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
 )
 
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt2_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	       (match_operand:SI 1 "register_operand" "r"))
-	      (match_operand:SI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
+;; Note that a single add with carry is matched by cinc,
+;; and the adc_reg and csel types are matched into the same
+;; pipelines by existing cores.
 
-(define_insn "*add<mode>3_carryin_alt3"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-		(match_operand:GPI 2 "register_operand" "r"))
-	      (match_operand:GPI 1 "register_operand" "r")))]
+(define_insn "*add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	  (plus:GPI
+	    (match_operand:GPI 3 "aarch64_carry_operation" "")
+	    (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ"))
+	  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt3_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	       (match_operand:SI 2 "register_operand" "r"))
-	      (match_operand:SI 1 "register_operand" "r"))))]
+(define_insn "*addsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI
+	    (plus:SI
+	      (match_operand:SI 3 "aarch64_carry_operation" "")
+	      (match_operand:SI 1 "aarch64_reg_or_zero" "rZ"))
+	    (match_operand:SI 2 "aarch64_reg_or_zero" "rZ"))))]
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2281,7 +2256,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
 				  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -2293,7 +2268,7 @@
   DONE;
 })
 
-(define_insn "sub<mode>3_compare0"
+(define_insn "*sub<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
 				  (match_operand:GPI 2 "register_operand" "r"))
@@ -2318,6 +2293,18 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "sub<mode>3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*sub_<shift>_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -2440,13 +2427,53 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "sub<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (minus:GPI (minus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-	       (match_operand:GPI 2 "register_operand" "r")))]
+;; The hardware description is op1 + ~op2 + C.
+;;                           = op1 + (-op2 + 1) + (1 - !C)
+;;                           = op1 - op2 - 1 + 1 - !C
+;;                           = op1 - op2 - !C.
+;; We describe the later.
+
+(define_insn "*sub<mode>3_carryin0"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	  (match_operand:GPI 2 "aarch64_borrow_operation" "")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:SI 2 "aarch64_borrow_operation" ""))))]
+   ""
+   "sbc\\t%w0, %w1, wzr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero")
+	    (match_operand:GPI 2 "register_operand"))
+	  (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+   ""
+   ""
+)
+
+(define_insn "*sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:GPI 2 "register_operand" "r"))
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+
    ""
    "sbc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
@@ -2454,13 +2481,40 @@
 
 ;; zero_extend version of the above
 (define_insn "*subsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (minus:SI (minus:SI
-		(match_operand:SI 1 "register_operand" "r")
-		(ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-	       (match_operand:SI 2 "register_operand" "r"))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (minus:SI
+	      (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:SI 2 "register_operand" "r"))
+	    (match_operand:SI 3 "aarch64_borrow_operation" ""))))]
+
+   ""
+   "sbc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryin_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:GPI 3 "aarch64_borrow_operation" ""))
+	  (match_operand:GPI 2 "register_operand" "r")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_alt_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (minus:SI
+	      (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:SI 3 "aarch64_borrow_operation" ""))
+	    (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "sbc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2564,8 +2618,9 @@
 
 (define_insn "*ngc<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-		   (match_operand:GPI 1 "register_operand" "r")))]
+	(minus:GPI
+	  (neg:GPI (match_operand:GPI 2 "aarch64_borrow_operation" ""))
+	  (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "ngc\\t%<w>0, %<w>1"
   [(set_attr "type" "adc_reg")]
@@ -2574,8 +2629,9 @@
 (define_insn "*ngcsi_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-		   (match_operand:SI 1 "register_operand" "r"))))]
+	  (minus:SI
+	    (neg:SI (match_operand:SI 2 "aarch64_borrow_operation" ""))
+	    (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "ngc\\t%w0, %w1"
   [(set_attr "type" "adc_reg")]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 49598a2..d9bd391 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -350,6 +350,9 @@
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
+;; For doubling width of an integer mode
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
 		    (SF "s") (DF "d")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3eb33fa..fa6f96d 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -242,6 +242,25 @@
   return aarch64_get_condition_code (op) >= 0;
 })
 
+(define_special_predicate "aarch64_carry_operation"
+  (match_code "ne,geu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
+
+(define_special_predicate "aarch64_borrow_operation"
+  (match_code "eq,ltu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
 
 ;; True if the operand is memory reference suitable for a load/store exclusive.
 (define_predicate "aarch64_sync_memory_operand"
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index 7c39b61..2b1e87b 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -85,7 +85,7 @@ f13 (int a, int b)
 /* { dg-final { scan-assembler "cmp\t(.)+34" } } */
 /* { dg-final { scan-assembler "cmp\t(.)+35" } } */
 
-/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, (0|wzr)" 4 } } */
 /* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
 /* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_3.c b/gcc/testsuite/gcc.target/aarch64/tst_3.c
index 2204b33..3fea633 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_3.c
@@ -9,4 +9,4 @@ f1 (int x)
   return x;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*1" } } */
+/* { dg-final { scan-assembler "(tst|ands)\t(x|w)\[0-9\]*.*1" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, aarch64] Fix pr69305 -- addti miscompilation
  2016-01-24 11:19 [PATCH, aarch64] Fix pr69305 -- addti miscompilation Richard Henderson
@ 2016-01-27 17:31 ` James Greenhalgh
  2016-01-27 18:18   ` Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: James Greenhalgh @ 2016-01-27 17:31 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

On Sun, Jan 24, 2016 at 03:19:35AM -0800, Richard Henderson wrote:
> As Jakub notes in the PR, the representation for add_compare and
> sub_compare were wrong.  And several of the add_carryin patterns
> were duplicates.
> 
> This adds a CC_Cmode for which only the Carry bit is valid.
> 
> The patch appears to generate moderately decent code.  For gcc7 we
> should look into why we'll prefer to mark an output REG_UNUSED
> instead of matching the pattern with that output removed.  This
> results in continuing to use adds (though simplifying adc) after
> we've proved that there will be no carry into the high part of an
> adds+adc pair.
> 
> Ok?
> 
> 
> r~

Hi Richard,

Some tiny nits below:

> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 71fc514..363785e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1755,6 +1755,44 @@
>    [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
>  )
>  
> +(define_insn "*add<mode>3_compare1_cconly"

I don't understand the naming scheme, it got me a wee bit confused with
add<mode>3_compare0 and friends, where the 0 indicates a comparison with
zero...

> +  [(set (reg:CC_C CC_REGNUM)
> +	(ne:CC_C
> +	  (plus:<DWI>
> +	    (zero_extend:<DWI>
> +	      (match_operand:GPI 0 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
> +	    (zero_extend:<DWI>
> +	      (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))
> +	  (zero_extend:<DWI>
> +	    (plus:GPI (match_dup 0) (match_dup 1)))))]
> +  ""
> +  "@
> +  cmn\\t%<w>0, %<w>1
> +  cmn\\t%<w>0, %<w>1
> +  cmp\\t%<w>0, #%n1"
> +  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
> +)
> +
> +(define_insn "add<mode>3_compare1"
> +  [(set (reg:CC_C CC_REGNUM)
> +	(ne:CC_C
> +	  (plus:<DWI>
> +	    (zero_extend:<DWI>
> +	      (match_operand:GPI 1 "aarch64_reg_or_zero" "%rZ,rZ,rZ"))
> +	    (zero_extend:<DWI>
> +	      (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")))
> +	  (zero_extend:<DWI>
> +	    (plus:GPI (match_dup 1) (match_dup 2)))))
> +   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
> +	(plus:GPI (match_dup 1) (match_dup 2)))]
> +  ""
> +  "@
> +  adds\\t%<w>0, %<w>1, %<w>2
> +  adds\\t%<w>0, %<w>1, %<w>2
> +  subs\\t%<w>0, %<w>1, #%n2"
> +  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
> +)
> +
>  (define_insn "*adds_shift_imm_<mode>"
>    [(set (reg:CC_NZ CC_REGNUM)
>  	(compare:CC_NZ

<snip>

> +;; Note that a single add with carry is matched by cinc,
> +;; and the adc_reg and csel types are matched into the same
> +;; pipelines by existing cores.

I can't see us remembering to update this comment on pipeline models
were it to ever become false. Maybe just drop it?

> @@ -2440,13 +2427,53 @@
>    [(set_attr "type" "alu_ext")]
>  )
>  
> -(define_insn "sub<mode>3_carryin"
> -  [(set
> -    (match_operand:GPI 0 "register_operand" "=r")
> -    (minus:GPI (minus:GPI
> -		(match_operand:GPI 1 "register_operand" "r")
> -		(ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
> -	       (match_operand:GPI 2 "register_operand" "r")))]
> +;; The hardware description is op1 + ~op2 + C.
> +;;                           = op1 + (-op2 + 1) + (1 - !C)
> +;;                           = op1 - op2 - 1 + 1 - !C
> +;;                           = op1 - op2 - !C.
> +;; We describe the later.

s/later/latter/

Otherwise, this is OK.

Thanks,
James

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, aarch64] Fix pr69305 -- addti miscompilation
  2016-01-27 17:31 ` James Greenhalgh
@ 2016-01-27 18:18   ` Richard Henderson
  2016-01-28 17:52     ` Richard Henderson
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2016-01-27 18:18 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

On 01/27/2016 09:31 AM, James Greenhalgh wrote:
>> +(define_insn "*add<mode>3_compare1_cconly"
> 
> I don't understand the naming scheme, it got me a wee bit confused with
> add<mode>3_compare0 and friends, where the 0 indicates a comparison with
> zero...

Ah, well, I didn't get that 0 indicated comparison with zero.
I thought it was simply zero-th out of a list of alternatives.
Certainly quite a lot of other patterns are like that.

Indeed, the add.*carryin patterns that I delete in this very
patch were marked [0-3].

>> +;; Note that a single add with carry is matched by cinc,
>> +;; and the adc_reg and csel types are matched into the same
>> +;; pipelines by existing cores.
> 
> I can't see us remembering to update this comment on pipeline models
> were it to ever become false. Maybe just drop it?

Fair enough; I'll just mention cinc without the scheduling note.


r~

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, aarch64] Fix pr69305 -- addti miscompilation
  2016-01-27 18:18   ` Richard Henderson
@ 2016-01-28 17:52     ` Richard Henderson
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2016-01-28 17:52 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 186 bytes --]

Here's the patch as committed.

It includes a few name changes from the addv patch I posted,
which correspond nicely with a name clarification that James
requested during feedback.


r~

[-- Attachment #2: z --]
[-- Type: text/plain, Size: 18783 bytes --]

	PR target/69305
	* config/aarch64/aarch64-modes.def (CC_Cmode): New
	* config/aarch64/aarch64-protos.h: Update.
	* config/aarch64/aarch64.c (aarch64_zero_extend_const_eq): New.
	(aarch64_select_cc_mode): Add check for use of CC_Cmode.
	(aarch64_get_condition_code_1): Handle CC_Cmode.
	* config/aarch64/aarch64.md (addti3): Use adddi3_compareC.
	(*add<mode>3_compareC_cconly_imm): New.
	(*add<mode>3_compareC_cconly): New.
	(*add<mode>3_compareC_imm): New.
	(add<mode>3_compareC): New.
	(add<mode>3_carryin, *addsi3_carryin_uxtw): Sort compare operand
	to be first.  Use aarch64_carry_operation.
	(*add<mode>3_carryin_alt1, *addsi3_carryin_alt1_uxtw): Remove.
	(*add<mode>3_carryin_alt2, *addsi3_carryin_alt2_uxtw): Remove.
	(*add<mode>3_carryin_alt3, *addsi3_carryin_alt3_uxtw): Remove.
	(subti3): Use subdi3_compare1.
	(*sub<mode>3_compare0): Rename from sub<mode>3_compare0.
	(sub<mode>3_compare1): New.
	(*sub<mode>3_carryin0, *subsi3_carryin_uxtw): New.
	(*sub<mode>3_carryin): Use aarch64_borrow_operation.
	(*subsi3_carryin_uxtw): Likewise.
	(*ngc<mode>, *ngcsi_uxtw): Likewise.
	(*sub<mode>3_carryin_alt, *subsi3_carryin_alt_uxtw): New.
	* config/aarch64/iterators.md (DWI): New.
	* config/aarch64/predicates.md (aarch64_carry_operation): New.
	(aarch64_borrow_operation): New.


diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 3fab205..7de0b3f 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -25,6 +25,7 @@ CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS).  */
 CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS).  */
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bd900c6..78870e2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -290,6 +290,7 @@ void aarch64_declare_function_name (FILE *, const char*, tree);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_modes_tieable_p (machine_mode mode1,
 			      machine_mode mode2);
+bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
 bool aarch64_mov_operand_p (rtx, machine_mode);
 int aarch64_simd_attr_length_rglist (enum machine_mode);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bba7a9b..aee6685 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1493,6 +1493,16 @@ aarch64_split_simd_move (rtx dst, rtx src)
     }
 }
 
+bool
+aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
+			      machine_mode ymode, rtx y)
+{
+  rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
+  gcc_assert (r != NULL);
+  return rtx_equal_p (x, r);
+}
+			      
+
 static rtx
 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
 {
@@ -4189,6 +4199,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
     return ((code == GT || code == GE || code == LE || code == LT)
 	    ? CC_SESWPmode : CC_ZESWPmode);
 
+  /* A test for unsigned overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == ZERO_EXTEND)
+    return CC_Cmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -4288,6 +4305,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case CC_Cmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_CS;
+	case EQ: return AARCH64_CC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
       break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5d35261..b42f550 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1710,7 +1710,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]),
 				  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -1755,6 +1755,71 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
+(define_insn "*add<mode>3_compareC_cconly_imm"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
+	    (match_operand:<DWI> 2 "const_scalar_int_operand" ""))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (match_dup 0)
+	      (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))]
+  "aarch64_zero_extend_const_eq (<DWI>mode, operands[2],
+				 <MODE>mode, operands[1])"
+  "@
+  cmn\\t%<w>0, %<w>1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_imm")]
+)
+
+(define_insn "*add<mode>3_compareC_cconly"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
+	  (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+  ""
+  "cmn\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_sreg")]
+)
+
+(define_insn "*add<mode>3_compareC_imm"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r"))
+	    (match_operand:<DWI> 3 "const_scalar_int_operand" ""))
+	  (zero_extend:<DWI>
+	    (plus:GPI
+	      (match_dup 1)
+	      (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))
+   (set (match_operand:GPI 0 "register_operand" "=r,r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  "aarch64_zero_extend_const_eq (<DWI>mode, operands[3],
+                                 <MODE>mode, operands[2])"
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_imm")]
+)
+ 
+(define_insn "add<mode>3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+	(ne:CC_C
+	  (plus:<DWI>
+	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
+	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
+	  (zero_extend:<DWI>
+	    (plus:GPI (match_dup 1) (match_dup 2)))))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "adds\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*adds_shift_imm_<mode>"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -2074,105 +2139,41 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "add<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-	      (plus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(match_operand:GPI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	      (plus:SI
-	       (match_operand:SI 1 "register_operand" "r")
-	       (match_operand:SI 2 "register_operand" "r")))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt1"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(match_operand:GPI 2 "register_operand" "r"))
-              (geu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+(define_expand "add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+	(plus:GPI
+	  (plus:GPI
+	    (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+	    (match_operand:GPI 1 "aarch64_reg_or_zero"))
+	  (match_operand:GPI 2 "aarch64_reg_or_zero")))]
    ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
-
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt1_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-	       (match_operand:SI 1 "register_operand" "r")
-	       (match_operand:SI 2 "register_operand" "r"))
-              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
    ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
 )
 
-(define_insn "*add<mode>3_carryin_alt2"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-		(match_operand:GPI 1 "register_operand" "r"))
-	      (match_operand:GPI 2 "register_operand" "r")))]
-   ""
-   "adc\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "adc_reg")]
-)
+;; Note that add with carry with two zero inputs is matched by cset,
+;; and that add with carry with one zero input is matched by cinc.
 
-;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt2_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	       (match_operand:SI 1 "register_operand" "r"))
-	      (match_operand:SI 2 "register_operand" "r"))))]
-   ""
-   "adc\\t%w0, %w1, %w2"
-  [(set_attr "type" "adc_reg")]
-)
-
-(define_insn "*add<mode>3_carryin_alt3"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (plus:GPI (plus:GPI
-                (geu:GPI (reg:CC CC_REGNUM) (const_int 0))
-		(match_operand:GPI 2 "register_operand" "r"))
-	      (match_operand:GPI 1 "register_operand" "r")))]
+(define_insn "*add<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	  (plus:GPI
+	    (match_operand:GPI 3 "aarch64_carry_operation" "")
+	    (match_operand:GPI 1 "register_operand" "r"))
+	  (match_operand:GPI 2 "register_operand" "r")))]
    ""
    "adc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
 )
 
 ;; zero_extend version of above
-(define_insn "*addsi3_carryin_alt3_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (plus:SI (plus:SI
-               (geu:SI (reg:CC CC_REGNUM) (const_int 0))
-	       (match_operand:SI 2 "register_operand" "r"))
-	      (match_operand:SI 1 "register_operand" "r"))))]
+(define_insn "*addsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (plus:SI
+	    (plus:SI
+	      (match_operand:SI 3 "aarch64_carry_operation" "")
+	      (match_operand:SI 1 "register_operand" "r"))
+	    (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "adc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2281,7 +2282,7 @@
   ""
 {
   rtx low = gen_reg_rtx (DImode);
-  emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]),
+  emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]),
 				  gen_lowpart (DImode, operands[2])));
 
   rtx high = gen_reg_rtx (DImode);
@@ -2293,7 +2294,7 @@
   DONE;
 })
 
-(define_insn "sub<mode>3_compare0"
+(define_insn "*sub<mode>3_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r")
 				  (match_operand:GPI 2 "register_operand" "r"))
@@ -2318,6 +2319,18 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+(define_insn "sub<mode>3_compare1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "alus_sreg")]
+)
+
 (define_insn "*sub_<shift>_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -2440,13 +2453,53 @@
   [(set_attr "type" "alu_ext")]
 )
 
-(define_insn "sub<mode>3_carryin"
-  [(set
-    (match_operand:GPI 0 "register_operand" "=r")
-    (minus:GPI (minus:GPI
-		(match_operand:GPI 1 "register_operand" "r")
-		(ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-	       (match_operand:GPI 2 "register_operand" "r")))]
+;; The hardware description is op1 + ~op2 + C.
+;;                           = op1 + (-op2 + 1) + (1 - !C)
+;;                           = op1 - op2 - 1 + 1 - !C
+;;                           = op1 - op2 - !C.
+;; We describe the latter.
+
+(define_insn "*sub<mode>3_carryin0"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	  (match_operand:GPI 2 "aarch64_borrow_operation" "")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:SI 2 "aarch64_borrow_operation" ""))))]
+   ""
+   "sbc\\t%w0, %w1, wzr"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_expand "sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero")
+	    (match_operand:GPI 2 "register_operand"))
+	  (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))]
+   ""
+   ""
+)
+
+(define_insn "*sub<mode>3_carryin"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:GPI 2 "register_operand" "r"))
+	  (match_operand:GPI 3 "aarch64_borrow_operation" "")))]
+
    ""
    "sbc\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
@@ -2454,13 +2507,40 @@
 
 ;; zero_extend version of the above
 (define_insn "*subsi3_carryin_uxtw"
-  [(set
-    (match_operand:DI 0 "register_operand" "=r")
-    (zero_extend:DI
-     (minus:SI (minus:SI
-		(match_operand:SI 1 "register_operand" "r")
-		(ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-	       (match_operand:SI 2 "register_operand" "r"))))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (minus:SI
+	      (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:SI 2 "register_operand" "r"))
+	    (match_operand:SI 3 "aarch64_borrow_operation" ""))))]
+
+   ""
+   "sbc\\t%w0, %w1, %w2"
+  [(set_attr "type" "adc_reg")]
+)
+
+(define_insn "*sub<mode>3_carryin_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(minus:GPI
+	  (minus:GPI
+	    (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	    (match_operand:GPI 3 "aarch64_borrow_operation" ""))
+	  (match_operand:GPI 2 "register_operand" "r")))]
+   ""
+   "sbc\\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "type" "adc_reg")]
+)
+
+;; zero_extend version of the above
+(define_insn "*subsi3_carryin_alt_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (minus:SI
+	    (minus:SI
+	      (match_operand:SI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:SI 3 "aarch64_borrow_operation" ""))
+	    (match_operand:SI 2 "register_operand" "r"))))]
    ""
    "sbc\\t%w0, %w1, %w2"
   [(set_attr "type" "adc_reg")]
@@ -2564,8 +2644,9 @@
 
 (define_insn "*ngc<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(minus:GPI (neg:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)))
-		   (match_operand:GPI 1 "register_operand" "r")))]
+	(minus:GPI
+	  (neg:GPI (match_operand:GPI 2 "aarch64_borrow_operation" ""))
+	  (match_operand:GPI 1 "register_operand" "r")))]
   ""
   "ngc\\t%<w>0, %<w>1"
   [(set_attr "type" "adc_reg")]
@@ -2574,8 +2655,9 @@
 (define_insn "*ngcsi_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (minus:SI (neg:SI (ltu:SI (reg:CC CC_REGNUM) (const_int 0)))
-		   (match_operand:SI 1 "register_operand" "r"))))]
+	  (minus:SI
+	    (neg:SI (match_operand:SI 2 "aarch64_borrow_operation" ""))
+	    (match_operand:SI 1 "register_operand" "r"))))]
   ""
   "ngc\\t%w0, %w1"
   [(set_attr "type" "adc_reg")]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 49598a2..d9bd391 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -350,6 +350,9 @@
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
+;; For doubling width of an integer mode
+(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
 ;; For scalar usage of vector/FP registers
 (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
 		    (SF "s") (DF "d")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e96dc00..e80e406 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -242,6 +242,25 @@
   return aarch64_get_condition_code (op) >= 0;
 })
 
+(define_special_predicate "aarch64_carry_operation"
+  (match_code "ne,geu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
+
+(define_special_predicate "aarch64_borrow_operation"
+  (match_code "eq,ltu")
+{
+  if (XEXP (op, 1) != const0_rtx)
+    return false;
+  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
+  rtx op0 = XEXP (op, 0);
+  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+})
 
 ;; True if the operand is memory reference suitable for a load/store exclusive.
 (define_predicate "aarch64_sync_memory_operand"
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index fd38b2c..7c962cb 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -85,7 +85,7 @@ f13 (int a, int b)
 /* { dg-final { scan-assembler "cmp\t(.)+34" } } */
 /* { dg-final { scan-assembler "cmp\t(.)+35" } } */
 
-/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, (0|wzr)" 4 } } */
 /* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
 /* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_3.c b/gcc/testsuite/gcc.target/aarch64/tst_3.c
index 2204b33..3fea633 100644
--- a/gcc/testsuite/gcc.target/aarch64/tst_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/tst_3.c
@@ -9,4 +9,4 @@ f1 (int x)
   return x;
 }
 
-/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*1" } } */
+/* { dg-final { scan-assembler "(tst|ands)\t(x|w)\[0-9\]*.*1" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-01-28 17:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-24 11:19 [PATCH, aarch64] Fix pr69305 -- addti miscompilation Richard Henderson
2016-01-27 17:31 ` James Greenhalgh
2016-01-27 18:18   ` Richard Henderson
2016-01-28 17:52     ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).