* [PATCH][Aarch64] Add support for overflow add and sub operations @ 2016-11-30 23:06 Michael Collison 0 siblings, 0 replies; 8+ messages in thread From: Michael Collison @ 2016-11-30 23:06 UTC (permalink / raw) To: gcc-patches; +Cc: nd, rth, James Greenhalgh [-- Attachment #1: Type: text/plain, Size: 1915 bytes --] Hi, This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: int f (int x, int y, int *ovf) { int res; *ovf = __builtin_sadd_overflow (x, y, &res); return res; } Current trunk at -O2 generates f: mov w3, w0 mov w4, 0 add w0, w0, w1 tbnz w1, #31, .L4 cmp w0, w3 blt .L3 .L2: str w4, [x2] ret .p2align 3 .L4: cmp w0, w3 ble .L2 .L3: mov w4, 1 b .L2 With the patch this now generates: f: adds w0, w0, w1 cset w1, vs str w1, [x2] ret Tested on aarch64-linux-gnu with no regressions. Okay for trunk? 2016-11-30 Michael Collison <michael.collison@arm.com> Richard Henderson <rth@redhat.com> * config/aarch64/aarch64-modes.def (CC_V): New. * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test for signed overflow using CC_Vmode. (aarch64_get_condition_code_1): Handle CC_Vmode. * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. (addti3): Create simpler code if low part is already known to be 0. (addvti4, uaddvti4): New. (*add<GPI>3_compareC_cconly_imm): New. (*add<GPI>3_compareC_cconly): New. (*add<GPI>3_compareC_imm): New. (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not handle constants within this pattern. (*add<GPI>3_compareV_cconly_imm): New. (*add<GPI>3_compareV_cconly): New. (*add<GPI>3_compareV_imm): New. (add<GPI>3_compareV): New. (add<GPI>3_carryinC, add<GPI>3_carryinV): New. (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. (subv<GPI>4, usubv<GPI>4): New. (subti): Handle op1 zero. (subvti4, usub4ti4): New. (*sub<GPI>3_compare1_imm): New. (sub<GPI>3_carryinCV): New. (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New [-- Attachment #2: rth_overflow_ipreview1.patch --] [-- Type: application/octet-stream, Size: 21909 bytes --] diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index de8227f..71c2069 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6078b16..e020d24 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4324,6 +4324,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (y) == ZERO_EXTEND) return CC_Cmode; + /* A test for signed overflow. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + && code == NE + && GET_CODE (x) == PLUS + && GET_CODE (y) == SIGN_EXTEND) + return CC_Vmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -4430,6 +4437,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) } break; + case CC_Vmode: + switch (comp_code) + { + case NE: return AARCH64_VS; + case EQ: return AARCH64_VC; + default: return -1; + } + break; + default: return -1; break; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6afaf90..a074341 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1720,25 +1720,155 @@ } ) +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + if (l2 == const0_rtx) + { + l0 = l1; + if (!aarch64_pluslong_operand (h2, DImode)) + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3 (h0, h1, h2)); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); DONE; }) +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; + }) + (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1837,6 +1967,66 @@ [(set_attr "type" "alus_sreg")] ) +;; Note that since we're sign-extending, match the immediate in GPI +;; rather than in DWI. Since CONST_INT is modeless, this works fine. +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] + ) + (define_insn "*adds_shift_imm_<mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -2196,6 +2386,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2292,22 +2614,158 @@ (set_attr "simd" "*,yes")] ) +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + + rtx x; + x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = gen_highpart (DImode, operands[2]); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryin (h0, h1, h2)); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2))); + } + else + { + if (CONST_INT_P (l2)) + { + l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2))); + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3_compareC (l0, l1, l2)); + } + else + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2)); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_NE (VOIDmode, gen_rtx_REG (CC_Vmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); + DONE; +}) + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = simplify_gen_subreg (DImode, operands[2], TImode, + subreg_highpart_offset (DImode, TImode)); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_subdi3_compare1 (h0, h1, force_reg (DImode, h2))); + } + else + { + if (CONST_INT_P (l2)) + { + l2 = force_reg (DImode, GEN_INT (-UINTVAL (l2))); + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3_compareC (l0, l1, l2)); + } + else + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryinCV (h0, force_reg (DImode, h1), h2)); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + rtx x; + x = gen_rtx_LTU (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM), const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, operands[3]), + pc_rtx); + emit_jump_insn (gen_rtx_SET (pc_rtx, x)); DONE; }) @@ -2336,6 +2794,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %<w>3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2563,6 +3037,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") -- 1.9.1 ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH][Aarch64] Add support for overflow add and sub operations @ 2017-05-19 6:27 Michael Collison 2017-05-19 11:00 ` Christophe Lyon 0 siblings, 1 reply; 8+ messages in thread From: Michael Collison @ 2017-05-19 6:27 UTC (permalink / raw) To: gcc-patches; +Cc: nd [-- Attachment #1: Type: text/plain, Size: 3144 bytes --] Hi, This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: Sure for a simple test case such as: int f (int x, int y, int *ovf) { int res; *ovf = __builtin_sadd_overflow (x, y, &res); return res; } Current trunk at -O2 generates f: mov w3, w0 mov w4, 0 add w0, w0, w1 tbnz w1, #31, .L4 cmp w0, w3 blt .L3 .L2: str w4, [x2] ret .p2align 3 .L4: cmp w0, w3 ble .L2 .L3: mov w4, 1 b .L2 With the patch this now generates: f: adds w0, w0, w1 cset w1, vs str w1, [x2] ret Original patch from Richard Henderson: https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html Okay for trunk? 2017-05-17 Michael Collison <michael.collison@arm.com> Richard Henderson <rth@redhat.com> * config/aarch64/aarch64-modes.def (CC_V): New. * config/aarch64/aarch64-protos.h (aarch64_add_128bit_scratch_regs): Declare (aarch64_add_128bit_scratch_regs): Declare. (aarch64_expand_subvti): Declare. (aarch64_gen_unlikely_cbranch): Declare * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test for signed overflow using CC_Vmode. (aarch64_get_condition_code_1): Handle CC_Vmode. (aarch64_gen_unlikely_cbranch): New function. (aarch64_add_128bit_scratch_regs): New function. (aarch64_subv_128bit_scratch_regs): New function. (aarch64_expand_subvti): New function. * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. (addti3): Create simpler code if low part is already known to be 0. (addvti4, uaddvti4): New. (*add<GPI>3_compareC_cconly_imm): New. (*add<GPI>3_compareC_cconly): New. (*add<GPI>3_compareC_imm): New. (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not handle constants within this pattern. (*add<GPI>3_compareV_cconly_imm): New. (*add<GPI>3_compareV_cconly): New. (*add<GPI>3_compareV_imm): New. (add<GPI>3_compareV): New. (add<GPI>3_carryinC, add<GPI>3_carryinV): New. (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. (subv<GPI>4, usubv<GPI>4): New. (subti): Handle op1 zero. (subvti4, usub4ti4): New. (*sub<GPI>3_compare1_imm): New. (sub<GPI>3_carryinCV): New. (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. [-- Attachment #2: PR6308.patch --] [-- Type: application/octet-stream, Size: 29469 bytes --] diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 45f7a44..244e490 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index f55d4ba..f38b2b8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); bool aarch64_return_address_signing_enabled (void); void aarch64_save_restore_target_globals (tree); +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2); +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2); +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + rtx low_in2, rtx high_dest, rtx high_in1, + rtx high_in2); + /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); @@ -412,6 +424,8 @@ bool aarch64_float_const_representable_p (rtx); #if defined (RTX_CODE) +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, + rtx label_ref); bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f343d92..71a651c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (y) == ZERO_EXTEND) return CC_Cmode; + /* A test for signed overflow. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + && code == NE + && GET_CODE (x) == PLUS + && GET_CODE (y) == SIGN_EXTEND) + return CC_Vmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) } break; + case CC_Vmode: + switch (comp_code) + { + case NE: return AARCH64_VS; + case EQ: return AARCH64_VC; + default: return -1; + } + break; + default: return -1; } @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) return true; } +/* Generate RTL for a conditional branch with rtx comparison CODE in + mode CC_MODE. The destination of the unlikely conditional branch + is LABEL_REF. */ + +void +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, + rtx label_ref) +{ + rtx x; + x = gen_rtx_fmt_ee (code, VOIDmode, + gen_rtx_REG (cc_mode, CC_REGNUM), + const0_rtx); + + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, label_ref), + pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = gen_lowpart (DImode, op1); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = gen_highpart (DImode, op1); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); +} + +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_lowpart_offset (DImode, TImode)); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_highpart_offset (DImode, TImode)); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); + +} + +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + rtx low_in2, rtx high_dest, rtx high_in1, + rtx high_in2) +{ + if (low_in2 == const0_rtx) + { + low_dest = low_in1; + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, + force_reg (DImode, high_in2))); + } + else + { + if (CONST_INT_P (low_in2)) + { + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); + high_in2 = force_reg (DImode, high_in2); + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); + } + else + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); + emit_insn (gen_subdi3_carryinCV (high_dest, + force_reg (DImode, high_in1), + high_in2)); + } + + emit_move_insn (gen_lowpart (DImode, op0), low_dest); + emit_move_insn (gen_highpart (DImode, op0), high_dest); + +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a693a3b..3976ecb 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1711,25 +1711,123 @@ } ) +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + + DONE; +}) + + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0,l1,l2,h0,h1,h2; - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + if (!aarch64_pluslong_operand (h2, DImode)) + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3 (h0, h1, h2)); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); DONE; }) +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + DONE; + }) + (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1828,10 +1926,70 @@ [(set_attr "type" "alus_sreg")] ) +;; Note that since we're sign-extending, match the immediate in GPI +;; rather than in DWI. Since CONST_INT is modeless, this works fine. +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*adds_shift_imm_<mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (ASHIFT:GPI + (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) (match_operand:GPI 3 "register_operand" "r")) @@ -2187,6 +2345,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2283,22 +2573,86 @@ (set_attr "simd" "*,yes")] ) +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); + + DONE; +}) + (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = gen_highpart (DImode, operands[2]); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryin (h0, h1, h2)); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) @@ -2327,6 +2681,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %<w>3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2554,6 +2924,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c new file mode 100644 index 0000000..6d84bb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_add (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "addcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c new file mode 100644 index 0000000..9768a98 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_add (long x, long y) +{ + long r; + + int ovr = __builtin_saddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c new file mode 100644 index 0000000..126a526 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_add (long long x, long long y) +{ + long long r; + + int ovr = __builtin_saddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c new file mode 100644 index 0000000..c1261e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_sub (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c new file mode 100644 index 0000000..1040464 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_sub (long x, long y) +{ + long r; + + int ovr = __builtin_ssubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c new file mode 100644 index 0000000..a03df88 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_sub (long long x, long long y) +{ + long long r; + + int ovr = __builtin_ssubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c new file mode 100644 index 0000000..8c7c998 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_add (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "addcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c new file mode 100644 index 0000000..e325591 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_add (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_uaddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c new file mode 100644 index 0000000..5f42886 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_add (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_uaddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c new file mode 100644 index 0000000..a84f4a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c new file mode 100644 index 0000000..ed033da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_sub (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_usubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c new file mode 100644 index 0000000..a742f0c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_sub (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_usubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + -- 1.9.1 ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-05-19 6:27 Michael Collison @ 2017-05-19 11:00 ` Christophe Lyon 2017-05-19 21:42 ` Michael Collison 0 siblings, 1 reply; 8+ messages in thread From: Christophe Lyon @ 2017-05-19 11:00 UTC (permalink / raw) To: Michael Collison; +Cc: gcc-patches, nd Hi Michael, On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: > Hi, > > This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: > > Sure for a simple test case such as: > > int > f (int x, int y, int *ovf) > { > int res; > *ovf = __builtin_sadd_overflow (x, y, &res); > return res; > } > > Current trunk at -O2 generates > > f: > mov w3, w0 > mov w4, 0 > add w0, w0, w1 > tbnz w1, #31, .L4 > cmp w0, w3 > blt .L3 > .L2: > str w4, [x2] > ret > .p2align 3 > .L4: > cmp w0, w3 > ble .L2 > .L3: > mov w4, 1 > b .L2 > > > With the patch this now generates: > > f: > adds w0, w0, w1 > cset w1, vs > str w1, [x2] > ret > > > Original patch from Richard Henderson: > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html > > > Okay for trunk? > > 2017-05-17 Michael Collison <michael.collison@arm.com> > Richard Henderson <rth@redhat.com> > > * config/aarch64/aarch64-modes.def (CC_V): New. > * config/aarch64/aarch64-protos.h > (aarch64_add_128bit_scratch_regs): Declare > (aarch64_add_128bit_scratch_regs): Declare. > (aarch64_expand_subvti): Declare. > (aarch64_gen_unlikely_cbranch): Declare > * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test > for signed overflow using CC_Vmode. > (aarch64_get_condition_code_1): Handle CC_Vmode. > (aarch64_gen_unlikely_cbranch): New function. > (aarch64_add_128bit_scratch_regs): New function. > (aarch64_subv_128bit_scratch_regs): New function. > (aarch64_expand_subvti): New function. > * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. > (addti3): Create simpler code if low part is already known to be 0. > (addvti4, uaddvti4): New. > (*add<GPI>3_compareC_cconly_imm): New. > (*add<GPI>3_compareC_cconly): New. > (*add<GPI>3_compareC_imm): New. > (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not > handle constants within this pattern. > (*add<GPI>3_compareV_cconly_imm): New. > (*add<GPI>3_compareV_cconly): New. > (*add<GPI>3_compareV_imm): New. > (add<GPI>3_compareV): New. > (add<GPI>3_carryinC, add<GPI>3_carryinV): New. > (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. > (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. > (subv<GPI>4, usubv<GPI>4): New. > (subti): Handle op1 zero. > (subvti4, usub4ti4): New. > (*sub<GPI>3_compare1_imm): New. > (sub<GPI>3_carryinCV): New. > (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. > (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. > * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. > * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. > * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. > * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. > * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. > * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. I've tried your patch, and 2 of the new tests FAIL: gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs Am I missing something? Thanks, Christophe ^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-05-19 11:00 ` Christophe Lyon @ 2017-05-19 21:42 ` Michael Collison 2017-07-05 9:38 ` Richard Earnshaw (lists) 0 siblings, 1 reply; 8+ messages in thread From: Michael Collison @ 2017-05-19 21:42 UTC (permalink / raw) To: Christophe Lyon; +Cc: gcc-patches, nd [-- Attachment #1: Type: text/plain, Size: 4671 bytes --] Christophe, I had a type in the two test cases: "addcs" should have been "adcs". I caught this previously but submitted the previous patch incorrectly. Updated patch attached. Okay for trunk? -----Original Message----- From: Christophe Lyon [mailto:christophe.lyon@linaro.org] Sent: Friday, May 19, 2017 3:59 AM To: Michael Collison <Michael.Collison@arm.com> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub operations Hi Michael, On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: > Hi, > > This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: > > Sure for a simple test case such as: > > int > f (int x, int y, int *ovf) > { > int res; > *ovf = __builtin_sadd_overflow (x, y, &res); > return res; > } > > Current trunk at -O2 generates > > f: > mov w3, w0 > mov w4, 0 > add w0, w0, w1 > tbnz w1, #31, .L4 > cmp w0, w3 > blt .L3 > .L2: > str w4, [x2] > ret > .p2align 3 > .L4: > cmp w0, w3 > ble .L2 > .L3: > mov w4, 1 > b .L2 > > > With the patch this now generates: > > f: > adds w0, w0, w1 > cset w1, vs > str w1, [x2] > ret > > > Original patch from Richard Henderson: > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html > > > Okay for trunk? > > 2017-05-17 Michael Collison <michael.collison@arm.com> > Richard Henderson <rth@redhat.com> > > * config/aarch64/aarch64-modes.def (CC_V): New. > * config/aarch64/aarch64-protos.h > (aarch64_add_128bit_scratch_regs): Declare > (aarch64_add_128bit_scratch_regs): Declare. > (aarch64_expand_subvti): Declare. > (aarch64_gen_unlikely_cbranch): Declare > * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test > for signed overflow using CC_Vmode. > (aarch64_get_condition_code_1): Handle CC_Vmode. > (aarch64_gen_unlikely_cbranch): New function. > (aarch64_add_128bit_scratch_regs): New function. > (aarch64_subv_128bit_scratch_regs): New function. > (aarch64_expand_subvti): New function. > * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. > (addti3): Create simpler code if low part is already known to be 0. > (addvti4, uaddvti4): New. > (*add<GPI>3_compareC_cconly_imm): New. > (*add<GPI>3_compareC_cconly): New. > (*add<GPI>3_compareC_imm): New. > (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not > handle constants within this pattern. > (*add<GPI>3_compareV_cconly_imm): New. > (*add<GPI>3_compareV_cconly): New. > (*add<GPI>3_compareV_imm): New. > (add<GPI>3_compareV): New. > (add<GPI>3_carryinC, add<GPI>3_carryinV): New. > (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. > (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. > (subv<GPI>4, usubv<GPI>4): New. > (subti): Handle op1 zero. > (subvti4, usub4ti4): New. > (*sub<GPI>3_compare1_imm): New. > (sub<GPI>3_carryinCV): New. > (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. > (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. > * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. > * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. > * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. > * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. > * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. > * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. > * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. > * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. I've tried your patch, and 2 of the new tests FAIL: gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs Am I missing something? Thanks, Christophe [-- Attachment #2: pr6308v2.patch --] [-- Type: application/octet-stream, Size: 29467 bytes --] diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 45f7a44..244e490 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index f55d4ba..f38b2b8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); bool aarch64_return_address_signing_enabled (void); void aarch64_save_restore_target_globals (tree); +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2); +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2); +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + rtx low_in2, rtx high_dest, rtx high_in1, + rtx high_in2); + /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); @@ -412,6 +424,8 @@ bool aarch64_float_const_representable_p (rtx); #if defined (RTX_CODE) +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, + rtx label_ref); bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index f343d92..71a651c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (y) == ZERO_EXTEND) return CC_Cmode; + /* A test for signed overflow. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + && code == NE + && GET_CODE (x) == PLUS + && GET_CODE (y) == SIGN_EXTEND) + return CC_Vmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) } break; + case CC_Vmode: + switch (comp_code) + { + case NE: return AARCH64_VS; + case EQ: return AARCH64_VC; + default: return -1; + } + break; + default: return -1; } @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) return true; } +/* Generate RTL for a conditional branch with rtx comparison CODE in + mode CC_MODE. The destination of the unlikely conditional branch + is LABEL_REF. */ + +void +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, + rtx label_ref) +{ + rtx x; + x = gen_rtx_fmt_ee (code, VOIDmode, + gen_rtx_REG (cc_mode, CC_REGNUM), + const0_rtx); + + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, label_ref), + pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = gen_lowpart (DImode, op1); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = gen_highpart (DImode, op1); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); +} + +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_lowpart_offset (DImode, TImode)); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_highpart_offset (DImode, TImode)); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); + +} + +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + rtx low_in2, rtx high_dest, rtx high_in1, + rtx high_in2) +{ + if (low_in2 == const0_rtx) + { + low_dest = low_in1; + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, + force_reg (DImode, high_in2))); + } + else + { + if (CONST_INT_P (low_in2)) + { + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); + high_in2 = force_reg (DImode, high_in2); + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); + } + else + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); + emit_insn (gen_subdi3_carryinCV (high_dest, + force_reg (DImode, high_in1), + high_in2)); + } + + emit_move_insn (gen_lowpart (DImode, op0), low_dest); + emit_move_insn (gen_highpart (DImode, op0), high_dest); + +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a693a3b..3976ecb 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1711,25 +1711,123 @@ } ) +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + + DONE; +}) + + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0,l1,l2,h0,h1,h2; - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + if (!aarch64_pluslong_operand (h2, DImode)) + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3 (h0, h1, h2)); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); DONE; }) +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + DONE; + }) + (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1828,10 +1926,70 @@ [(set_attr "type" "alus_sreg")] ) +;; Note that since we're sign-extending, match the immediate in GPI +;; rather than in DWI. Since CONST_INT is modeless, this works fine. +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*adds_shift_imm_<mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (ASHIFT:GPI + (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) (match_operand:GPI 3 "register_operand" "r")) @@ -2187,6 +2345,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (ne:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (ne:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2283,22 +2573,86 @@ (set_attr "simd" "*,yes")] ) +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); + + DONE; +}) + (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = gen_highpart (DImode, operands[2]); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryin (h0, h1, h2)); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) @@ -2327,6 +2681,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %<w>3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2554,6 +2924,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c new file mode 100644 index 0000000..0b31500 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_add (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "adcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c new file mode 100644 index 0000000..9768a98 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_add (long x, long y) +{ + long r; + + int ovr = __builtin_saddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c new file mode 100644 index 0000000..126a526 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_add (long long x, long long y) +{ + long long r; + + int ovr = __builtin_saddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c new file mode 100644 index 0000000..c1261e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_sub (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c new file mode 100644 index 0000000..1040464 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_sub (long x, long y) +{ + long r; + + int ovr = __builtin_ssubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c new file mode 100644 index 0000000..a03df88 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_sub (long long x, long long y) +{ + long long r; + + int ovr = __builtin_ssubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c new file mode 100644 index 0000000..c573c2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_add (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "adcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c new file mode 100644 index 0000000..e325591 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_add (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_uaddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c new file mode 100644 index 0000000..5f42886 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_add (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_uaddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c new file mode 100644 index 0000000..a84f4a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c new file mode 100644 index 0000000..ed033da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_sub (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_usubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c new file mode 100644 index 0000000..a742f0c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_sub (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_usubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + -- 1.9.1 ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-05-19 21:42 ` Michael Collison @ 2017-07-05 9:38 ` Richard Earnshaw (lists) 2017-07-06 7:29 ` Michael Collison 2017-08-01 6:33 ` Michael Collison 0 siblings, 2 replies; 8+ messages in thread From: Richard Earnshaw (lists) @ 2017-07-05 9:38 UTC (permalink / raw) To: Michael Collison, Christophe Lyon; +Cc: gcc-patches, nd On 19/05/17 22:11, Michael Collison wrote: > Christophe, > > I had a type in the two test cases: "addcs" should have been "adcs". I caught this previously but submitted the previous patch incorrectly. Updated patch attached. > > Okay for trunk? > Apologies for the delay responding, I've been procrastinating over this one. In part it's due to the size of the patch with very little top-level description of what's the motivation and overall approach to the problem. It would really help review if this could be split into multiple patches with a description of what each stage achieves. Anyway, there are a couple of obvious formatting issues to deal with first, before we get into the details of the patch. > -----Original Message----- > From: Christophe Lyon [mailto:christophe.lyon@linaro.org] > Sent: Friday, May 19, 2017 3:59 AM > To: Michael Collison <Michael.Collison@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> > Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub operations > > Hi Michael, > > > On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: >> Hi, >> >> This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: >> >> Sure for a simple test case such as: >> >> int >> f (int x, int y, int *ovf) >> { >> int res; >> *ovf = __builtin_sadd_overflow (x, y, &res); >> return res; >> } >> >> Current trunk at -O2 generates >> >> f: >> mov w3, w0 >> mov w4, 0 >> add w0, w0, w1 >> tbnz w1, #31, .L4 >> cmp w0, w3 >> blt .L3 >> .L2: >> str w4, [x2] >> ret >> .p2align 3 >> .L4: >> cmp w0, w3 >> ble .L2 >> .L3: >> mov w4, 1 >> b .L2 >> >> >> With the patch this now generates: >> >> f: >> adds w0, w0, w1 >> cset w1, vs >> str w1, [x2] >> ret >> >> >> Original patch from Richard Henderson: >> >> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html >> >> >> Okay for trunk? >> >> 2017-05-17 Michael Collison <michael.collison@arm.com> >> Richard Henderson <rth@redhat.com> >> >> * config/aarch64/aarch64-modes.def (CC_V): New. >> * config/aarch64/aarch64-protos.h >> (aarch64_add_128bit_scratch_regs): Declare >> (aarch64_add_128bit_scratch_regs): Declare. >> (aarch64_expand_subvti): Declare. >> (aarch64_gen_unlikely_cbranch): Declare >> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test >> for signed overflow using CC_Vmode. >> (aarch64_get_condition_code_1): Handle CC_Vmode. >> (aarch64_gen_unlikely_cbranch): New function. >> (aarch64_add_128bit_scratch_regs): New function. >> (aarch64_subv_128bit_scratch_regs): New function. >> (aarch64_expand_subvti): New function. >> * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. >> (addti3): Create simpler code if low part is already known to be 0. >> (addvti4, uaddvti4): New. >> (*add<GPI>3_compareC_cconly_imm): New. >> (*add<GPI>3_compareC_cconly): New. >> (*add<GPI>3_compareC_imm): New. >> (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not >> handle constants within this pattern. >> (*add<GPI>3_compareV_cconly_imm): New. >> (*add<GPI>3_compareV_cconly): New. >> (*add<GPI>3_compareV_imm): New. >> (add<GPI>3_compareV): New. >> (add<GPI>3_carryinC, add<GPI>3_carryinV): New. >> (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. >> (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. >> (subv<GPI>4, usubv<GPI>4): New. >> (subti): Handle op1 zero. >> (subvti4, usub4ti4): New. >> (*sub<GPI>3_compare1_imm): New. >> (sub<GPI>3_carryinCV): New. >> (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. >> (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. >> * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. > > I've tried your patch, and 2 of the new tests FAIL: > gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs > gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs > > Am I missing something? > > Thanks, > > Christophe > > > pr6308v2.patch > > > diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def > index 45f7a44..244e490 100644 > --- a/gcc/config/aarch64/aarch64-modes.def > +++ b/gcc/config/aarch64/aarch64-modes.def > @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); > CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ > CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ > CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ > +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ > > /* Half-precision floating point for __fp16. */ > FLOAT_MODE (HF, 2, 0); > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index f55d4ba..f38b2b8 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); > void aarch64_reset_previous_fndecl (void); > bool aarch64_return_address_signing_enabled (void); > void aarch64_save_restore_target_globals (tree); > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2); > + It's a little bit inconsistent, but the general style in aarch64-protos.h is not to include parameter names in prototypes, just their types. > > /* Initialize builtins for SIMD intrinsics. */ > void init_aarch64_simd_builtins (void); > @@ -412,6 +424,8 @@ bool aarch64_float_const_representable_p (rtx); > > #if defined (RTX_CODE) > > +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, > + rtx label_ref); > bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool); > machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); > rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index f343d92..71a651c 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) > && GET_CODE (y) == ZERO_EXTEND) > return CC_Cmode; > > + /* A test for signed overflow. */ > + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) > + && code == NE > + && GET_CODE (x) == PLUS > + && GET_CODE (y) == SIGN_EXTEND) > + return CC_Vmode; > + > /* For everything else, return CCmode. */ > return CCmode; > } > @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) > } > break; > > + case CC_Vmode: > + switch (comp_code) > + { > + case NE: return AARCH64_VS; > + case EQ: return AARCH64_VC; > + default: return -1; > + } > + break; > + > default: > return -1; > } > @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) > return true; > } > > +/* Generate RTL for a conditional branch with rtx comparison CODE in > + mode CC_MODE. The destination of the unlikely conditional branch > + is LABEL_REF. */ > + > +void > +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, > + rtx label_ref) > +{ > + rtx x; > + x = gen_rtx_fmt_ee (code, VOIDmode, > + gen_rtx_REG (cc_mode, CC_REGNUM), > + const0_rtx); > + > + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, > + gen_rtx_LABEL_REF (VOIDmode, label_ref), > + pc_rtx); > + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); > +} > + > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, Function names must start in column 1, with the return type on the preceding line. All functions should have a top-level comment describing what they do (their contract with the caller). > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = gen_lowpart (DImode, op1); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = gen_highpart (DImode, op1); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); > +} > + > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, Same here. > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_highpart_offset (DImode, TImode)); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); > + > +} > + > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, And here. > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2) > +{ > + if (low_in2 == const0_rtx) > + { > + low_dest = low_in1; > + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, > + force_reg (DImode, high_in2))); > + } > + else > + { > + if (CONST_INT_P (low_in2)) > + { > + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); > + high_in2 = force_reg (DImode, high_in2); > + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); > + } > + else > + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); > + emit_insn (gen_subdi3_carryinCV (high_dest, > + force_reg (DImode, high_in1), > + high_in2)); > + } > + > + emit_move_insn (gen_lowpart (DImode, op0), low_dest); > + emit_move_insn (gen_highpart (DImode, op0), high_dest); > + > +} > + > /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ > > static unsigned HOST_WIDE_INT > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index a693a3b..3976ecb 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1711,25 +1711,123 @@ > } > ) > > +(define_expand "addv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "uaddv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] With no rtl in the expand to describe this pattern, it really should have a top-level comment explaining the arguments (reference to the manual is probably OK in this case). > + "" > +{ > + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); > + > + DONE; > +}) > + > + > (define_expand "addti3" > [(set (match_operand:TI 0 "register_operand" "") > (plus:TI (match_operand:TI 1 "register_operand" "") > - (match_operand:TI 2 "register_operand" "")))] > + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0,l1,l2,h0,h1,h2; > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + if (!aarch64_pluslong_operand (h2, DImode)) > + h2 = force_reg (DImode, h2); > + emit_insn (gen_adddi3 (h0, h1, h2)); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > DONE; > }) > > +(define_expand "addvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] Same here. > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "uaddvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); > + DONE; > + }) > + > (define_insn "add<mode>3_compare0" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > @@ -1828,10 +1926,70 @@ > [(set_attr "type" "alus_sreg")] > ) > > +;; Note that since we're sign-extending, match the immediate in GPI > +;; rather than in DWI. Since CONST_INT is modeless, this works fine. > +(define_insn "*add<mode>3_compareV_cconly_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) > + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "@ > + cmn\\t%<w>0, %<w>1 > + cmp\\t%<w>0, #%n1" > + [(set_attr "type" "alus_imm")] > +) > + > +(define_insn "*add<mode>3_compareV_cconly" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V Use of ne is wrong here. The condition register should be set to the result of a compare rtl construct. The same applies elsewhere within this patch. NE is then used on the result of the comparison. The mode of the compare then indicates what might or might not be valid in the way the comparison is finally constructed. Note that this issue may go back to the earlier patches that this is based on, but those are equally incorrect and wil need fixing as well at some point. We shouldn't prepetuate the issue. > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "cmn\\t%<w>0, %<w>1" > + [(set_attr "type" "alus_sreg")] > +) > + > +(define_insn "*add<mode>3_compareV_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r,r")) > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> > + (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "@ > + adds\\t%<w>0, %<w>1, %<w>2 > + subs\\t%<w>0, %<w>1, #%n2" > + [(set_attr "type" "alus_imm,alus_imm")] > +) > + > +(define_insn "add<mode>3_compareV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "adds\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "alus_sreg")] > +) > + > (define_insn "*adds_shift_imm_<mode>" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > - (plus:GPI (ASHIFT:GPI > + (plus:GPI (ASHIFT:GPI > (match_operand:GPI 1 "register_operand" "r") > (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) > (match_operand:GPI 3 "register_operand" "r")) > @@ -2187,6 +2345,138 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "add<mode>3_carryinC" > + [(parallel > + [(set (match_dup 3) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 4) > + (zero_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); > + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinC_zero" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinC" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_expand "add<mode>3_carryinV" > + [(parallel > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 3) > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); > + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinV_zero" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*add_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (plus:GPI (and:GPI > @@ -2283,22 +2573,86 @@ > (set_attr "simd" "*,yes")] > ) > > +(define_expand "subv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "usubv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > + > + DONE; > +}) > + > (define_expand "subti3" > [(set (match_operand:TI 0 "register_operand" "") > - (minus:TI (match_operand:TI 1 "register_operand" "") > + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") > (match_operand:TI 2 "register_operand" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0 = gen_reg_rtx (DImode); > + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_lowpart_offset (DImode, TImode)); > + rtx l2 = gen_lowpart (DImode, operands[2]); > + rtx h0 = gen_reg_rtx (DImode); > + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_highpart_offset (DImode, TImode)); > + rtx h2 = gen_highpart (DImode, operands[2]); > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); > + emit_insn (gen_subdi3_carryin (h0, h1, h2)); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + DONE; > +}) > + > +(define_expand "subvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "usubvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > DONE; > }) > > @@ -2327,6 +2681,22 @@ > [(set_attr "type" "alus_sreg")] > ) > > +(define_insn "*sub<mode>3_compare1_imm" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] > + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" > + "@ > + subs\\t%<w>0, %<w>1, %<w>2 > + adds\\t%<w>0, %<w>1, %<w>3" > + [(set_attr "type" "alus_imm")] > +) > + > (define_insn "sub<mode>3_compare1" > [(set (reg:CC CC_REGNUM) > (compare:CC > @@ -2554,6 +2924,85 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "sub<mode>3_carryinCV" > + [(parallel > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] > + "" > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (neg:GPI (match_dup 1)) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, %<w>1" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*sub_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (minus:GPI (match_operand:GPI 4 "register_operand" "rk") > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > new file mode 100644 > index 0000000..0b31500 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_add (__int128 x, __int128 y) > +{ > + __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > new file mode 100644 > index 0000000..9768a98 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_add (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_saddl_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > new file mode 100644 > index 0000000..126a526 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_add (long long x, long long y) > +{ > + long long r; > + > + int ovr = __builtin_saddll_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > new file mode 100644 > index 0000000..c1261e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_sub (__int128 x, __int128 y) > +{ > + __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > new file mode 100644 > index 0000000..1040464 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_sub (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_ssubl_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > new file mode 100644 > index 0000000..a03df88 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_sub (long long x, long long y) > +{ > + long long r; > + > + int ovr = __builtin_ssubll_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > new file mode 100644 > index 0000000..c573c2a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_add (unsigned __int128 x, unsigned __int128 y) > +{ > + unsigned __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > new file mode 100644 > index 0000000..e325591 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_add (unsigned long x, unsigned long y) > +{ > + unsigned long r; > + > + int ovr = __builtin_uaddl_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > new file mode 100644 > index 0000000..5f42886 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_add (unsigned long long x, unsigned long long y) > +{ > + unsigned long long r; > + > + int ovr = __builtin_uaddll_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > new file mode 100644 > index 0000000..a84f4a4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned __int128 y) > +{ > + unsigned __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > new file mode 100644 > index 0000000..ed033da > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_sub (unsigned long x, unsigned long y) > +{ > + unsigned long r; > + > + int ovr = __builtin_usubl_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > new file mode 100644 > index 0000000..a742f0c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_sub (unsigned long long x, unsigned long long y) > +{ > + unsigned long long r; > + > + int ovr = __builtin_usubll_overflow (x, y, &r); > + if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > ^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-07-05 9:38 ` Richard Earnshaw (lists) @ 2017-07-06 7:29 ` Michael Collison 2017-07-06 8:22 ` Richard Earnshaw (lists) 2017-08-01 6:33 ` Michael Collison 1 sibling, 1 reply; 8+ messages in thread From: Michael Collison @ 2017-07-06 7:29 UTC (permalink / raw) To: Richard Earnshaw, Christophe Lyon; +Cc: gcc-patches, nd Richard, Can you explain "Use of ne is wrong here. The condition register should be set to the result of a compare rtl construct. The same applies elsewhere within this patch. NE is then used on the result of the comparison. The mode of the compare then indicates what might or might not be valid in the way the comparison is finally constructed."? Why is "ne" wrong? I don't doubt you are correct, but I see nothing in the internals manual that forbids it. I want to understand what issues this exposes. As you indicate I used this idiom in the arm port when I added the overflow operations there as well. Additionally other targets seem to use the comparison operators this way (i386 for the umulv). Regards, Michael Collison -----Original Message----- From: Richard Earnshaw (lists) [mailto:Richard.Earnshaw@arm.com] Sent: Wednesday, July 5, 2017 2:38 AM To: Michael Collison <Michael.Collison@arm.com>; Christophe Lyon <christophe.lyon@linaro.org> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub operations On 19/05/17 22:11, Michael Collison wrote: > Christophe, > > I had a type in the two test cases: "addcs" should have been "adcs". I caught this previously but submitted the previous patch incorrectly. Updated patch attached. > > Okay for trunk? > Apologies for the delay responding, I've been procrastinating over this one. In part it's due to the size of the patch with very little top-level description of what's the motivation and overall approach to the problem. It would really help review if this could be split into multiple patches with a description of what each stage achieves. Anyway, there are a couple of obvious formatting issues to deal with first, before we get into the details of the patch. > -----Original Message----- > From: Christophe Lyon [mailto:christophe.lyon@linaro.org] > Sent: Friday, May 19, 2017 3:59 AM > To: Michael Collison <Michael.Collison@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> > Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub > operations > > Hi Michael, > > > On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: >> Hi, >> >> This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: >> >> Sure for a simple test case such as: >> >> int >> f (int x, int y, int *ovf) >> { >> int res; >> *ovf = __builtin_sadd_overflow (x, y, &res); >> return res; >> } >> >> Current trunk at -O2 generates >> >> f: >> mov w3, w0 >> mov w4, 0 >> add w0, w0, w1 >> tbnz w1, #31, .L4 >> cmp w0, w3 >> blt .L3 >> .L2: >> str w4, [x2] >> ret >> .p2align 3 >> .L4: >> cmp w0, w3 >> ble .L2 >> .L3: >> mov w4, 1 >> b .L2 >> >> >> With the patch this now generates: >> >> f: >> adds w0, w0, w1 >> cset w1, vs >> str w1, [x2] >> ret >> >> >> Original patch from Richard Henderson: >> >> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html >> >> >> Okay for trunk? >> >> 2017-05-17 Michael Collison <michael.collison@arm.com> >> Richard Henderson <rth@redhat.com> >> >> * config/aarch64/aarch64-modes.def (CC_V): New. >> * config/aarch64/aarch64-protos.h >> (aarch64_add_128bit_scratch_regs): Declare >> (aarch64_add_128bit_scratch_regs): Declare. >> (aarch64_expand_subvti): Declare. >> (aarch64_gen_unlikely_cbranch): Declare >> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test >> for signed overflow using CC_Vmode. >> (aarch64_get_condition_code_1): Handle CC_Vmode. >> (aarch64_gen_unlikely_cbranch): New function. >> (aarch64_add_128bit_scratch_regs): New function. >> (aarch64_subv_128bit_scratch_regs): New function. >> (aarch64_expand_subvti): New function. >> * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. >> (addti3): Create simpler code if low part is already known to be 0. >> (addvti4, uaddvti4): New. >> (*add<GPI>3_compareC_cconly_imm): New. >> (*add<GPI>3_compareC_cconly): New. >> (*add<GPI>3_compareC_imm): New. >> (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not >> handle constants within this pattern. >> (*add<GPI>3_compareV_cconly_imm): New. >> (*add<GPI>3_compareV_cconly): New. >> (*add<GPI>3_compareV_imm): New. >> (add<GPI>3_compareV): New. >> (add<GPI>3_carryinC, add<GPI>3_carryinV): New. >> (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. >> (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. >> (subv<GPI>4, usubv<GPI>4): New. >> (subti): Handle op1 zero. >> (subvti4, usub4ti4): New. >> (*sub<GPI>3_compare1_imm): New. >> (sub<GPI>3_carryinCV): New. >> (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. >> (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. >> * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. > > I've tried your patch, and 2 of the new tests FAIL: > gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs > gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs > > Am I missing something? > > Thanks, > > Christophe > > > pr6308v2.patch > > > diff --git a/gcc/config/aarch64/aarch64-modes.def > b/gcc/config/aarch64/aarch64-modes.def > index 45f7a44..244e490 100644 > --- a/gcc/config/aarch64/aarch64-modes.def > +++ b/gcc/config/aarch64/aarch64-modes.def > @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); > CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ > CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ > CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ > +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ > > /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, > 0); diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index f55d4ba..f38b2b8 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); void > aarch64_reset_previous_fndecl (void); bool > aarch64_return_address_signing_enabled (void); void > aarch64_save_restore_target_globals (tree); > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2); > + It's a little bit inconsistent, but the general style in aarch64-protos.h is not to include parameter names in prototypes, just their types. > > /* Initialize builtins for SIMD intrinsics. */ void > init_aarch64_simd_builtins (void); @@ -412,6 +424,8 @@ bool > aarch64_float_const_representable_p (rtx); > > #if defined (RTX_CODE) > > +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, > + rtx label_ref); > bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, > bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx > aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git > a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index > f343d92..71a651c 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) > && GET_CODE (y) == ZERO_EXTEND) > return CC_Cmode; > > + /* A test for signed overflow. */ > + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) > + && code == NE > + && GET_CODE (x) == PLUS > + && GET_CODE (y) == SIGN_EXTEND) > + return CC_Vmode; > + > /* For everything else, return CCmode. */ > return CCmode; > } > @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) > } > break; > > + case CC_Vmode: > + switch (comp_code) > + { > + case NE: return AARCH64_VS; > + case EQ: return AARCH64_VC; > + default: return -1; > + } > + break; > + > default: > return -1; > } > @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) > return true; > } > > +/* Generate RTL for a conditional branch with rtx comparison CODE in > + mode CC_MODE. The destination of the unlikely conditional branch > + is LABEL_REF. */ > + > +void > +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, > + rtx label_ref) > +{ > + rtx x; > + x = gen_rtx_fmt_ee (code, VOIDmode, > + gen_rtx_REG (cc_mode, CC_REGNUM), > + const0_rtx); > + > + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, > + gen_rtx_LABEL_REF (VOIDmode, label_ref), > + pc_rtx); > + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); } > + > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx > +*low_dest, Function names must start in column 1, with the return type on the preceding line. All functions should have a top-level comment describing what they do (their contract with the caller). > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = gen_lowpart (DImode, op1); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = gen_highpart (DImode, op1); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); } > + > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx > +*low_dest, Same here. > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_highpart_offset (DImode, TImode)); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); > + > +} > + > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, And here. > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2) > +{ > + if (low_in2 == const0_rtx) > + { > + low_dest = low_in1; > + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, > + force_reg (DImode, high_in2))); > + } > + else > + { > + if (CONST_INT_P (low_in2)) > + { > + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); > + high_in2 = force_reg (DImode, high_in2); > + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); > + } > + else > + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); > + emit_insn (gen_subdi3_carryinCV (high_dest, > + force_reg (DImode, high_in1), > + high_in2)); > + } > + > + emit_move_insn (gen_lowpart (DImode, op0), low_dest); > + emit_move_insn (gen_highpart (DImode, op0), high_dest); > + > +} > + > /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ > > static unsigned HOST_WIDE_INT > diff --git a/gcc/config/aarch64/aarch64.md > b/gcc/config/aarch64/aarch64.md index a693a3b..3976ecb 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1711,25 +1711,123 @@ > } > ) > > +(define_expand "addv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "uaddv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] With no rtl in the expand to describe this pattern, it really should have a top-level comment explaining the arguments (reference to the manual is probably OK in this case). > + "" > +{ > + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); > + > + DONE; > +}) > + > + > (define_expand "addti3" > [(set (match_operand:TI 0 "register_operand" "") > (plus:TI (match_operand:TI 1 "register_operand" "") > - (match_operand:TI 2 "register_operand" "")))] > + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0,l1,l2,h0,h1,h2; > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + if (!aarch64_pluslong_operand (h2, DImode)) > + h2 = force_reg (DImode, h2); > + emit_insn (gen_adddi3 (h0, h1, h2)); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > DONE; > }) > > +(define_expand "addvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] Same here. > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "uaddvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); DONE; > + }) > + > (define_insn "add<mode>3_compare0" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > @@ -1828,10 +1926,70 @@ > [(set_attr "type" "alus_sreg")] > ) > > +;; Note that since we're sign-extending, match the immediate in GPI > +;; rather than in DWI. Since CONST_INT is modeless, this works fine. > +(define_insn "*add<mode>3_compareV_cconly_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) > + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "@ > + cmn\\t%<w>0, %<w>1 > + cmp\\t%<w>0, #%n1" > + [(set_attr "type" "alus_imm")] > +) > + > +(define_insn "*add<mode>3_compareV_cconly" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V Use of ne is wrong here. The condition register should be set to the result of a compare rtl construct. The same applies elsewhere within this patch. NE is then used on the result of the comparison. The mode of the compare then indicates what might or might not be valid in the way the comparison is finally constructed. Note that this issue may go back to the earlier patches that this is based on, but those are equally incorrect and wil need fixing as well at some point. We shouldn't prepetuate the issue. > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "cmn\\t%<w>0, %<w>1" > + [(set_attr "type" "alus_sreg")] > +) > + > +(define_insn "*add<mode>3_compareV_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r,r")) > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> > + (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "@ > + adds\\t%<w>0, %<w>1, %<w>2 > + subs\\t%<w>0, %<w>1, #%n2" > + [(set_attr "type" "alus_imm,alus_imm")] > +) > + > +(define_insn "add<mode>3_compareV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "adds\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "alus_sreg")] > +) > + > (define_insn "*adds_shift_imm_<mode>" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > - (plus:GPI (ASHIFT:GPI > + (plus:GPI (ASHIFT:GPI > (match_operand:GPI 1 "register_operand" "r") > (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) > (match_operand:GPI 3 "register_operand" "r")) @@ -2187,6 > +2345,138 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "add<mode>3_carryinC" > + [(parallel > + [(set (match_dup 3) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 4) > + (zero_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); > + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinC_zero" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinC" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_expand "add<mode>3_carryinV" > + [(parallel > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 3) > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); > + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinV_zero" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*add_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (plus:GPI (and:GPI > @@ -2283,22 +2573,86 @@ > (set_attr "simd" "*,yes")] > ) > > +(define_expand "subv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "usubv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > + > + DONE; > +}) > + > (define_expand "subti3" > [(set (match_operand:TI 0 "register_operand" "") > - (minus:TI (match_operand:TI 1 "register_operand" "") > + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") > (match_operand:TI 2 "register_operand" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0 = gen_reg_rtx (DImode); > + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_lowpart_offset (DImode, TImode)); > + rtx l2 = gen_lowpart (DImode, operands[2]); > + rtx h0 = gen_reg_rtx (DImode); > + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_highpart_offset (DImode, TImode)); > + rtx h2 = gen_highpart (DImode, operands[2]); > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); emit_insn > + (gen_subdi3_carryin (h0, h1, h2)); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + DONE; > +}) > + > +(define_expand "subvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "usubvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > DONE; > }) > > @@ -2327,6 +2681,22 @@ > [(set_attr "type" "alus_sreg")] > ) > > +(define_insn "*sub<mode>3_compare1_imm" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] > + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" > + "@ > + subs\\t%<w>0, %<w>1, %<w>2 > + adds\\t%<w>0, %<w>1, %<w>3" > + [(set_attr "type" "alus_imm")] > +) > + > (define_insn "sub<mode>3_compare1" > [(set (reg:CC CC_REGNUM) > (compare:CC > @@ -2554,6 +2924,85 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "sub<mode>3_carryinCV" > + [(parallel > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] > + "" > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (neg:GPI (match_dup 1)) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, %<w>1" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*sub_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git > a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > new file mode 100644 > index 0000000..0b31500 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_add (__int128 x, __int128 y) { > + __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > new file mode 100644 > index 0000000..9768a98 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_add (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_saddl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > new file mode 100644 > index 0000000..126a526 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_add (long long x, long long y) { > + long long r; > + > + int ovr = __builtin_saddll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > new file mode 100644 > index 0000000..c1261e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_sub (__int128 x, __int128 y) { > + __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > new file mode 100644 > index 0000000..1040464 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_sub (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_ssubl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > new file mode 100644 > index 0000000..a03df88 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_sub (long long x, long long y) { > + long long r; > + > + int ovr = __builtin_ssubll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > new file mode 100644 > index 0000000..c573c2a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_add (unsigned __int128 x, unsigned > +__int128 y) { > + unsigned __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > new file mode 100644 > index 0000000..e325591 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_add (unsigned long x, unsigned long y) { > + unsigned long r; > + > + int ovr = __builtin_uaddl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > new file mode 100644 > index 0000000..5f42886 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_add (unsigned long long x, unsigned long > +long y) { > + unsigned long long r; > + > + int ovr = __builtin_uaddll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > new file mode 100644 > index 0000000..a84f4a4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned > +__int128 y) { > + unsigned __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > new file mode 100644 > index 0000000..ed033da > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_sub (unsigned long x, unsigned long y) { > + unsigned long r; > + > + int ovr = __builtin_usubl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > new file mode 100644 > index 0000000..a742f0c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_sub (unsigned long long x, unsigned long > +long y) { > + unsigned long long r; > + > + int ovr = __builtin_usubll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-07-06 7:29 ` Michael Collison @ 2017-07-06 8:22 ` Richard Earnshaw (lists) 0 siblings, 0 replies; 8+ messages in thread From: Richard Earnshaw (lists) @ 2017-07-06 8:22 UTC (permalink / raw) To: Michael Collison; +Cc: gcc-patches On 06/07/17 08:29, Michael Collison wrote: > Richard, > > Can you explain "Use of ne is wrong here. The condition register should > be set to the result of a compare rtl construct. The same applies > elsewhere within this patch. NE is then used on the result of the > comparison. The mode of the compare then indicates what might or might > not be valid in the way the comparison is finally constructed."? > > Why is "ne" wrong? I don't doubt you are correct, but I see nothing in > the internals manual that forbids it. I want to understand what issues > this exposes. > Because the idiomatic form on a machine with a flags register is CCreg:mode = COMPARE:mode (A, B) which is then used with <cond-op> (CCreg:mode, 0) where cond-op is NE, EQ, GE, ... as appropriate. > As you indicate I used this idiom in the arm port when I added the > overflow operations there as well. Additionally other targets seem to > use the comparison operators this way (i386 for the umulv). Some targets really have boolean predicate operations that set results explicitly in GP registers as the truth of A < B, etc. On those machines using pred-reg = cond-op (A, B) makes sense, but not on ARM or AArch64. R. > > Regards, > > Michael Collison > > -----Original Message----- > From: Richard Earnshaw (lists) [mailto:Richard.Earnshaw@arm.com] > Sent: Wednesday, July 5, 2017 2:38 AM > To: Michael Collison <Michael.Collison@arm.com>; Christophe Lyon > <christophe.lyon@linaro.org> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> > Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub > operations > > On 19/05/17 22:11, Michael Collison wrote: >> Christophe, >> >> I had a type in the two test cases: "addcs" should have been "adcs". I caught this previously but submitted the previous patch incorrectly. Updated patch attached. >> >> Okay for trunk? >> > > Apologies for the delay responding, I've been procrastinating over this > one. In part it's due to the size of the patch with very little > top-level description of what's the motivation and overall approach to > the problem. > > It would really help review if this could be split into multiple patches > with a description of what each stage achieves. > > Anyway, there are a couple of obvious formatting issues to deal with > first, before we get into the details of the patch. > >> -----Original Message----- >> From: Christophe Lyon [mailto:christophe.lyon@linaro.org] >> Sent: Friday, May 19, 2017 3:59 AM >> To: Michael Collison <Michael.Collison@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> >> Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub >> operations >> >> Hi Michael, >> >> >> On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: >>> Hi, >>> >>> This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: >>> >>> Sure for a simple test case such as: >>> >>> int >>> f (int x, int y, int *ovf) >>> { >>> int res; >>> *ovf = __builtin_sadd_overflow (x, y, &res); >>> return res; >>> } >>> >>> Current trunk at -O2 generates >>> >>> f: >>> mov w3, w0 >>> mov w4, 0 >>> add w0, w0, w1 >>> tbnz w1, #31, .L4 >>> cmp w0, w3 >>> blt .L3 >>> .L2: >>> str w4, [x2] >>> ret >>> .p2align 3 >>> .L4: >>> cmp w0, w3 >>> ble .L2 >>> .L3: >>> mov w4, 1 >>> b .L2 >>> >>> >>> With the patch this now generates: >>> >>> f: >>> adds w0, w0, w1 >>> cset w1, vs >>> str w1, [x2] >>> ret >>> >>> >>> Original patch from Richard Henderson: >>> >>> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html >>> >>> >>> Okay for trunk? >>> >>> 2017-05-17 Michael Collison <michael.collison@arm.com> >>> Richard Henderson <rth@redhat.com> >>> >>> * config/aarch64/aarch64-modes.def (CC_V): New. >>> * config/aarch64/aarch64-protos.h >>> (aarch64_add_128bit_scratch_regs): Declare >>> (aarch64_add_128bit_scratch_regs): Declare. >>> (aarch64_expand_subvti): Declare. >>> (aarch64_gen_unlikely_cbranch): Declare >>> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test >>> for signed overflow using CC_Vmode. >>> (aarch64_get_condition_code_1): Handle CC_Vmode. >>> (aarch64_gen_unlikely_cbranch): New function. >>> (aarch64_add_128bit_scratch_regs): New function. >>> (aarch64_subv_128bit_scratch_regs): New function. >>> (aarch64_expand_subvti): New function. >>> * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. >>> (addti3): Create simpler code if low part is already known to be 0. >>> (addvti4, uaddvti4): New. >>> (*add<GPI>3_compareC_cconly_imm): New. >>> (*add<GPI>3_compareC_cconly): New. >>> (*add<GPI>3_compareC_imm): New. >>> (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not >>> handle constants within this pattern. >>> (*add<GPI>3_compareV_cconly_imm): New. >>> (*add<GPI>3_compareV_cconly): New. >>> (*add<GPI>3_compareV_imm): New. >>> (add<GPI>3_compareV): New. >>> (add<GPI>3_carryinC, add<GPI>3_carryinV): New. >>> (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. >>> (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. >>> (subv<GPI>4, usubv<GPI>4): New. >>> (subti): Handle op1 zero. >>> (subvti4, usub4ti4): New. >>> (*sub<GPI>3_compare1_imm): New. >>> (sub<GPI>3_carryinCV): New. >>> (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. >>> (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. >>> * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. >>> * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. >> >> I've tried your patch, and 2 of the new tests FAIL: >> gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs >> gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs >> >> Am I missing something? >> >> Thanks, >> >> Christophe >> >> >> pr6308v2.patch >> >> >> diff --git a/gcc/config/aarch64/aarch64-modes.def >> b/gcc/config/aarch64/aarch64-modes.def >> index 45f7a44..244e490 100644 >> --- a/gcc/config/aarch64/aarch64-modes.def >> +++ b/gcc/config/aarch64/aarch64-modes.def >> @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); >> CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ >> CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ >> CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ >> +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ >> >> /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, >> 0); diff --git a/gcc/config/aarch64/aarch64-protos.h >> b/gcc/config/aarch64/aarch64-protos.h >> index f55d4ba..f38b2b8 100644 >> --- a/gcc/config/aarch64/aarch64-protos.h >> +++ b/gcc/config/aarch64/aarch64-protos.h >> @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); void >> aarch64_reset_previous_fndecl (void); bool >> aarch64_return_address_signing_enabled (void); void >> aarch64_save_restore_target_globals (tree); >> +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, >> + rtx *low_in1, rtx *low_in2, >> + rtx *high_dest, rtx *high_in1, >> + rtx *high_in2); >> +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, >> + rtx *low_in1, rtx *low_in2, >> + rtx *high_dest, rtx *high_in1, >> + rtx *high_in2); >> +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, >> + rtx low_in2, rtx high_dest, rtx high_in1, >> + rtx high_in2); >> + > > It's a little bit inconsistent, but the general style in > aarch64-protos.h is not to include parameter names in prototypes, just > their types. > >> >> /* Initialize builtins for SIMD intrinsics. */ void >> init_aarch64_simd_builtins (void); @@ -412,6 +424,8 @@ bool >> aarch64_float_const_representable_p (rtx); >> >> #if defined (RTX_CODE) >> >> +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, >> + rtx label_ref); >> bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, >> bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx >> aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git >> a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index >> f343d92..71a651c 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) >> && GET_CODE (y) == ZERO_EXTEND) >> return CC_Cmode; >> >> + /* A test for signed overflow. */ >> + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) >> + && code == NE >> + && GET_CODE (x) == PLUS >> + && GET_CODE (y) == SIGN_EXTEND) >> + return CC_Vmode; >> + >> /* For everything else, return CCmode. */ >> return CCmode; >> } >> @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) >> } >> break; >> >> + case CC_Vmode: >> + switch (comp_code) >> + { >> + case NE: return AARCH64_VS; >> + case EQ: return AARCH64_VC; >> + default: return -1; >> + } >> + break; >> + >> default: >> return -1; >> } >> @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) >> return true; >> } >> >> +/* Generate RTL for a conditional branch with rtx comparison CODE in >> + mode CC_MODE. The destination of the unlikely conditional branch >> + is LABEL_REF. */ >> + >> +void >> +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, >> + rtx label_ref) >> +{ >> + rtx x; >> + x = gen_rtx_fmt_ee (code, VOIDmode, >> + gen_rtx_REG (cc_mode, CC_REGNUM), >> + const0_rtx); >> + >> + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, >> + gen_rtx_LABEL_REF (VOIDmode, label_ref), >> + pc_rtx); >> + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); } >> + >> +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx >> +*low_dest, > > Function names must start in column 1, with the return type on the > preceding line. All functions should have a top-level comment > describing what they do (their contract with the caller). > >> + rtx *low_in1, rtx *low_in2, >> + rtx *high_dest, rtx *high_in1, >> + rtx *high_in2) >> +{ >> + *low_dest = gen_reg_rtx (DImode); >> + *low_in1 = gen_lowpart (DImode, op1); >> + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, >> + subreg_lowpart_offset (DImode, TImode)); >> + *high_dest = gen_reg_rtx (DImode); >> + *high_in1 = gen_highpart (DImode, op1); >> + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, >> + subreg_highpart_offset (DImode, TImode)); } >> + >> +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx >> +*low_dest, > > Same here. > >> + rtx *low_in1, rtx *low_in2, >> + rtx *high_dest, rtx *high_in1, >> + rtx *high_in2) >> +{ >> + *low_dest = gen_reg_rtx (DImode); >> + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, >> + subreg_lowpart_offset (DImode, TImode)); >> + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, >> + subreg_lowpart_offset (DImode, TImode)); >> + *high_dest = gen_reg_rtx (DImode); >> + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, >> + subreg_highpart_offset (DImode, TImode)); >> + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, >> + subreg_highpart_offset (DImode, TImode)); >> + >> +} >> + >> +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, > And here. > >> + rtx low_in2, rtx high_dest, rtx high_in1, >> + rtx high_in2) >> +{ >> + if (low_in2 == const0_rtx) >> + { >> + low_dest = low_in1; >> + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, >> + force_reg (DImode, high_in2))); >> + } >> + else >> + { >> + if (CONST_INT_P (low_in2)) >> + { >> + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); >> + high_in2 = force_reg (DImode, high_in2); >> + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); >> + } >> + else >> + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); >> + emit_insn (gen_subdi3_carryinCV (high_dest, >> + force_reg (DImode, high_in1), >> + high_in2)); >> + } >> + >> + emit_move_insn (gen_lowpart (DImode, op0), low_dest); >> + emit_move_insn (gen_highpart (DImode, op0), high_dest); >> + >> +} >> + >> /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ >> >> static unsigned HOST_WIDE_INT >> diff --git a/gcc/config/aarch64/aarch64.md >> b/gcc/config/aarch64/aarch64.md index a693a3b..3976ecb 100644 >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -1711,25 +1711,123 @@ >> } >> ) >> >> +(define_expand "addv<mode>4" >> + [(match_operand:GPI 0 "register_operand") >> + (match_operand:GPI 1 "register_operand") >> + (match_operand:GPI 2 "register_operand") >> + (match_operand 3 "")] >> + "" >> +{ >> + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], >> +operands[2])); >> + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); >> + >> + DONE; >> +}) >> + >> +(define_expand "uaddv<mode>4" >> + [(match_operand:GPI 0 "register_operand") >> + (match_operand:GPI 1 "register_operand") >> + (match_operand:GPI 2 "register_operand") >> + (match_operand 3 "")] > > With no rtl in the expand to describe this pattern, it really should > have a top-level comment explaining the arguments (reference to the > manual is probably OK in this case). > >> + "" >> +{ >> + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], >> +operands[2])); >> + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); >> + >> + DONE; >> +}) >> + >> + >> (define_expand "addti3" >> [(set (match_operand:TI 0 "register_operand" "") >> (plus:TI (match_operand:TI 1 "register_operand" "") >> - (match_operand:TI 2 "register_operand" "")))] >> + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] >> "" >> { >> - rtx low = gen_reg_rtx (DImode); >> - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), >> - gen_lowpart (DImode, operands[2]))); >> + rtx l0,l1,l2,h0,h1,h2; >> >> - rtx high = gen_reg_rtx (DImode); >> - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), >> - gen_highpart (DImode, operands[2]))); >> + aarch64_add_128bit_scratch_regs (operands[1], operands[2], >> + &l0, &l1, &l2, &h0, &h1, &h2); >> + >> + if (l2 == const0_rtx) >> + { >> + l0 = l1; >> + if (!aarch64_pluslong_operand (h2, DImode)) >> + h2 = force_reg (DImode, h2); >> + emit_insn (gen_adddi3 (h0, h1, h2)); >> + } >> + else >> + { >> + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); >> + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); >> + } >> + >> + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); >> + emit_move_insn (gen_highpart (DImode, operands[0]), h0); >> >> - emit_move_insn (gen_lowpart (DImode, operands[0]), low); >> - emit_move_insn (gen_highpart (DImode, operands[0]), high); >> DONE; >> }) >> >> +(define_expand "addvti4" >> + [(match_operand:TI 0 "register_operand" "") >> + (match_operand:TI 1 "register_operand" "") >> + (match_operand:TI 2 "aarch64_reg_or_imm" "") >> + (match_operand 3 "")] > > Same here. > >> + "" >> +{ >> + rtx l0,l1,l2,h0,h1,h2; >> + >> + aarch64_add_128bit_scratch_regs (operands[1], operands[2], >> + &l0, &l1, &l2, &h0, &h1, &h2); >> + >> + if (l2 == const0_rtx) >> + { >> + l0 = l1; >> + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); >> + } >> + else >> + { >> + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); >> + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); >> + } >> + >> + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); >> + emit_move_insn (gen_highpart (DImode, operands[0]), h0); >> + >> + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); >> + DONE; >> +}) >> + >> +(define_expand "uaddvti4" >> + [(match_operand:TI 0 "register_operand" "") >> + (match_operand:TI 1 "register_operand" "") >> + (match_operand:TI 2 "aarch64_reg_or_imm" "") >> + (match_operand 3 "")] >> + "" >> +{ >> + rtx l0,l1,l2,h0,h1,h2; >> + >> + aarch64_add_128bit_scratch_regs (operands[1], operands[2], >> + &l0, &l1, &l2, &h0, &h1, &h2); >> + >> + if (l2 == const0_rtx) >> + { >> + l0 = l1; >> + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); >> + } >> + else >> + { >> + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); >> + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); >> + } >> + >> + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); >> + emit_move_insn (gen_highpart (DImode, operands[0]), h0); >> + >> + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); DONE; >> + }) >> + >> (define_insn "add<mode>3_compare0" >> [(set (reg:CC_NZ CC_REGNUM) >> (compare:CC_NZ >> @@ -1828,10 +1926,70 @@ >> [(set_attr "type" "alus_sreg")] >> ) >> >> +;; Note that since we're sign-extending, match the immediate in GPI >> +;; rather than in DWI. Since CONST_INT is modeless, this works fine. >> +(define_insn "*add<mode>3_compareV_cconly_imm" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) >> + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) >> + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] >> + "" >> + "@ >> + cmn\\t%<w>0, %<w>1 >> + cmp\\t%<w>0, #%n1" >> + [(set_attr "type" "alus_imm")] >> +) >> + >> +(define_insn "*add<mode>3_compareV_cconly" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V > > Use of ne is wrong here. The condition register should be set to the > result of a compare rtl construct. The same applies elsewhere within > this patch. NE is then used on the result of the comparison. The mode > of the compare then indicates what might or might not be valid in the > way the comparison is finally constructed. > > Note that this issue may go back to the earlier patches that this is > based on, but those are equally incorrect and wil need fixing as well at > some point. We shouldn't prepetuate the issue. > >> + (plus:<DWI> >> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) >> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) >> + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] >> + "" >> + "cmn\\t%<w>0, %<w>1" >> + [(set_attr "type" "alus_sreg")] >> +) >> + >> +(define_insn "*add<mode>3_compareV_imm" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r,r")) >> + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) >> + (sign_extend:<DWI> >> + (plus:GPI (match_dup 1) (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand" "=r,r") >> + (plus:GPI (match_dup 1) (match_dup 2)))] >> + "" >> + "@ >> + adds\\t%<w>0, %<w>1, %<w>2 >> + subs\\t%<w>0, %<w>1, #%n2" >> + [(set_attr "type" "alus_imm,alus_imm")] >> +) >> + >> +(define_insn "add<mode>3_compareV" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) >> + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) >> + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (plus:GPI (match_dup 1) (match_dup 2)))] >> + "" >> + "adds\\t%<w>0, %<w>1, %<w>2" >> + [(set_attr "type" "alus_sreg")] >> +) >> + >> (define_insn "*adds_shift_imm_<mode>" >> [(set (reg:CC_NZ CC_REGNUM) >> (compare:CC_NZ >> - (plus:GPI (ASHIFT:GPI >> + (plus:GPI (ASHIFT:GPI >> (match_operand:GPI 1 "register_operand" "r") >> (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) >> (match_operand:GPI 3 "register_operand" "r")) @@ -2187,6 >> +2345,138 @@ >> [(set_attr "type" "adc_reg")] >> ) >> >> +(define_expand "add<mode>3_carryinC" >> + [(parallel >> + [(set (match_dup 3) >> + (ne:CC_C >> + (plus:<DWI> >> + (plus:<DWI> >> + (match_dup 4) >> + (zero_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r"))) >> + (zero_extend:<DWI> >> + (match_operand:GPI 2 "register_operand" "r"))) >> + (zero_extend:<DWI> >> + (plus:GPI >> + (plus:GPI (match_dup 5) (match_dup 1)) >> + (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI >> + (plus:GPI (match_dup 5) (match_dup 1)) >> + (match_dup 2)))])] >> + "" >> +{ >> + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); >> + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); >> + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); >> +}) >> + >> +(define_insn "*add<mode>3_carryinC_zero" >> + [(set (reg:CC_C CC_REGNUM) >> + (ne:CC_C >> + (plus:<DWI> >> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") >> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) >> + (zero_extend:<DWI> >> + (plus:GPI >> + (match_operand:GPI 3 "aarch64_carry_operation" "") >> + (match_dup 1))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI (match_dup 3) (match_dup 1)))] >> + "" >> + "adcs\\t%<w>0, %<w>1, <w>zr" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_insn "*add<mode>3_carryinC" >> + [(set (reg:CC_C CC_REGNUM) >> + (ne:CC_C >> + (plus:<DWI> >> + (plus:<DWI> >> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") >> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) >> + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) >> + (zero_extend:<DWI> >> + (plus:GPI >> + (plus:GPI >> + (match_operand:GPI 4 "aarch64_carry_operation" "") >> + (match_dup 1)) >> + (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI >> + (plus:GPI (match_dup 4) (match_dup 1)) >> + (match_dup 2)))] >> + "" >> + "adcs\\t%<w>0, %<w>1, %<w>2" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_expand "add<mode>3_carryinV" >> + [(parallel >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (plus:<DWI> >> + (match_dup 3) >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r"))) >> + (sign_extend:<DWI> >> + (match_operand:GPI 2 "register_operand" "r"))) >> + (sign_extend:<DWI> >> + (plus:GPI >> + (plus:GPI (match_dup 4) (match_dup 1)) >> + (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI >> + (plus:GPI (match_dup 4) (match_dup 1)) >> + (match_dup 2)))])] >> + "" >> +{ >> + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); >> + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); >> + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); >> +}) >> + >> +(define_insn "*add<mode>3_carryinV_zero" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") >> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) >> + (sign_extend:<DWI> >> + (plus:GPI >> + (match_operand:GPI 3 "aarch64_carry_operation" "") >> + (match_dup 1))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI (match_dup 3) (match_dup 1)))] >> + "" >> + "adcs\\t%<w>0, %<w>1, <w>zr" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_insn "*add<mode>3_carryinV" >> + [(set (reg:CC_V CC_REGNUM) >> + (ne:CC_V >> + (plus:<DWI> >> + (plus:<DWI> >> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") >> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) >> + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) >> + (sign_extend:<DWI> >> + (plus:GPI >> + (plus:GPI >> + (match_operand:GPI 4 "aarch64_carry_operation" "") >> + (match_dup 1)) >> + (match_dup 2))))) >> + (set (match_operand:GPI 0 "register_operand") >> + (plus:GPI >> + (plus:GPI (match_dup 4) (match_dup 1)) >> + (match_dup 2)))] >> + "" >> + "adcs\\t%<w>0, %<w>1, %<w>2" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> (define_insn "*add_uxt<mode>_shift2" >> [(set (match_operand:GPI 0 "register_operand" "=rk") >> (plus:GPI (and:GPI >> @@ -2283,22 +2573,86 @@ >> (set_attr "simd" "*,yes")] >> ) >> >> +(define_expand "subv<mode>4" >> + [(match_operand:GPI 0 "register_operand") >> + (match_operand:GPI 1 "aarch64_reg_or_zero") >> + (match_operand:GPI 2 "aarch64_reg_or_zero") >> + (match_operand 3 "")] >> + "" >> +{ >> + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], >> +operands[2])); >> + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); >> + >> + DONE; >> +}) >> + >> +(define_expand "usubv<mode>4" >> + [(match_operand:GPI 0 "register_operand") >> + (match_operand:GPI 1 "aarch64_reg_or_zero") >> + (match_operand:GPI 2 "aarch64_reg_or_zero") >> + (match_operand 3 "")] >> + "" >> +{ >> + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], >> +operands[2])); >> + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); >> + >> + DONE; >> +}) >> + >> (define_expand "subti3" >> [(set (match_operand:TI 0 "register_operand" "") >> - (minus:TI (match_operand:TI 1 "register_operand" "") >> + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") >> (match_operand:TI 2 "register_operand" "")))] >> "" >> { >> - rtx low = gen_reg_rtx (DImode); >> - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), >> - gen_lowpart (DImode, operands[2]))); >> + rtx l0 = gen_reg_rtx (DImode); >> + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, >> + subreg_lowpart_offset (DImode, TImode)); >> + rtx l2 = gen_lowpart (DImode, operands[2]); >> + rtx h0 = gen_reg_rtx (DImode); >> + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, >> + subreg_highpart_offset (DImode, TImode)); >> + rtx h2 = gen_highpart (DImode, operands[2]); >> >> - rtx high = gen_reg_rtx (DImode); >> - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), >> - gen_highpart (DImode, operands[2]))); >> + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); emit_insn >> + (gen_subdi3_carryin (h0, h1, h2)); >> >> - emit_move_insn (gen_lowpart (DImode, operands[0]), low); >> - emit_move_insn (gen_highpart (DImode, operands[0]), high); >> + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); >> + emit_move_insn (gen_highpart (DImode, operands[0]), h0); >> + DONE; >> +}) >> + >> +(define_expand "subvti4" >> + [(match_operand:TI 0 "register_operand") >> + (match_operand:TI 1 "aarch64_reg_or_zero") >> + (match_operand:TI 2 "aarch64_reg_or_imm") >> + (match_operand 3 "")] >> + "" >> +{ >> + rtx l0,l1,l2,h0,h1,h2; >> + >> + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], >> + &l0, &l1, &l2, &h0, &h1, &h2); >> + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); >> + >> + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); >> + DONE; >> +}) >> + >> +(define_expand "usubvti4" >> + [(match_operand:TI 0 "register_operand") >> + (match_operand:TI 1 "aarch64_reg_or_zero") >> + (match_operand:TI 2 "aarch64_reg_or_imm") >> + (match_operand 3 "")] >> + "" >> +{ >> + rtx l0,l1,l2,h0,h1,h2; >> + >> + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], >> + &l0, &l1, &l2, &h0, &h1, &h2); >> + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); >> + >> + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); >> DONE; >> }) >> >> @@ -2327,6 +2681,22 @@ >> [(set_attr "type" "alus_sreg")] >> ) >> >> +(define_insn "*sub<mode>3_compare1_imm" >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") >> + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) >> + (set (match_operand:GPI 0 "register_operand" "=r,r") >> + (plus:GPI >> + (match_dup 1) >> + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] >> + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" >> + "@ >> + subs\\t%<w>0, %<w>1, %<w>2 >> + adds\\t%<w>0, %<w>1, %<w>3" >> + [(set_attr "type" "alus_imm")] >> +) >> + >> (define_insn "sub<mode>3_compare1" >> [(set (reg:CC CC_REGNUM) >> (compare:CC >> @@ -2554,6 +2924,85 @@ >> [(set_attr "type" "adc_reg")] >> ) >> >> +(define_expand "sub<mode>3_carryinCV" >> + [(parallel >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) >> + (plus:<DWI> >> + (sign_extend:<DWI> >> + (match_operand:GPI 2 "register_operand" "r")) >> + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (minus:GPI >> + (minus:GPI (match_dup 1) (match_dup 2)) >> + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] >> + "" >> +) >> + >> +(define_insn "*sub<mode>3_carryinCV_z1_z2" >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (const_int 0) >> + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] >> + "" >> + "sbcs\\t%<w>0, <w>zr, <w>zr" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_insn "*sub<mode>3_carryinCV_z1" >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (const_int 0) >> + (plus:<DWI> >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r")) >> + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (minus:GPI >> + (neg:GPI (match_dup 1)) >> + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] >> + "" >> + "sbcs\\t%<w>0, <w>zr, %<w>1" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_insn "*sub<mode>3_carryinCV_z2" >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r")) >> + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (minus:GPI >> + (match_dup 1) >> + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] >> + "" >> + "sbcs\\t%<w>0, %<w>1, <w>zr" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> +(define_insn "*sub<mode>3_carryinCV" >> + [(set (reg:CC CC_REGNUM) >> + (compare:CC >> + (sign_extend:<DWI> >> + (match_operand:GPI 1 "register_operand" "r")) >> + (plus:<DWI> >> + (sign_extend:<DWI> >> + (match_operand:GPI 2 "register_operand" "r")) >> + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) >> + (set (match_operand:GPI 0 "register_operand" "=r") >> + (minus:GPI >> + (minus:GPI (match_dup 1) (match_dup 2)) >> + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] >> + "" >> + "sbcs\\t%<w>0, %<w>1, %<w>2" >> + [(set_attr "type" "adc_reg")] >> +) >> + >> (define_insn "*sub_uxt<mode>_shift2" >> [(set (match_operand:GPI 0 "register_operand" "=rk") >> (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git >> a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c >> new file mode 100644 >> index 0000000..0b31500 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +__int128 overflow_add (__int128 x, __int128 y) { >> + __int128 r; >> + >> + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> +/* { dg-final { scan-assembler "adcs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c >> new file mode 100644 >> index 0000000..9768a98 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +long overflow_add (long x, long y) >> +{ >> + long r; >> + >> + int ovr = __builtin_saddl_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c >> new file mode 100644 >> index 0000000..126a526 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +long long overflow_add (long long x, long long y) { >> + long long r; >> + >> + int ovr = __builtin_saddll_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> + >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c >> new file mode 100644 >> index 0000000..c1261e3 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +__int128 overflow_sub (__int128 x, __int128 y) { >> + __int128 r; >> + >> + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> +/* { dg-final { scan-assembler "sbcs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c >> new file mode 100644 >> index 0000000..1040464 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +long overflow_sub (long x, long y) >> +{ >> + long r; >> + >> + int ovr = __builtin_ssubl_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c >> new file mode 100644 >> index 0000000..a03df88 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +long long overflow_sub (long long x, long long y) { >> + long long r; >> + >> + int ovr = __builtin_ssubll_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> + >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c >> new file mode 100644 >> index 0000000..c573c2a >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned __int128 overflow_add (unsigned __int128 x, unsigned >> +__int128 y) { >> + unsigned __int128 r; >> + >> + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> +/* { dg-final { scan-assembler "adcs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c >> new file mode 100644 >> index 0000000..e325591 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned long overflow_add (unsigned long x, unsigned long y) { >> + unsigned long r; >> + >> + int ovr = __builtin_uaddl_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c >> new file mode 100644 >> index 0000000..5f42886 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned long long overflow_add (unsigned long long x, unsigned long >> +long y) { >> + unsigned long long r; >> + >> + int ovr = __builtin_uaddll_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "adds" } } */ >> + >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c >> new file mode 100644 >> index 0000000..a84f4a4 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned >> +__int128 y) { >> + unsigned __int128 r; >> + >> + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> +/* { dg-final { scan-assembler "sbcs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c >> new file mode 100644 >> index 0000000..ed033da >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c >> @@ -0,0 +1,17 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned long overflow_sub (unsigned long x, unsigned long y) { >> + unsigned long r; >> + >> + int ovr = __builtin_usubl_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c >> b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c >> new file mode 100644 >> index 0000000..a742f0c >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c >> @@ -0,0 +1,18 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2" } */ >> + >> +extern void overflow_handler (); >> + >> +unsigned long long overflow_sub (unsigned long long x, unsigned long >> +long y) { >> + unsigned long long r; >> + >> + int ovr = __builtin_usubll_overflow (x, y, &r); if (ovr) >> + overflow_handler (); >> + >> + return r; >> +} >> + >> +/* { dg-final { scan-assembler "subs" } } */ >> + >> > ^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [PATCH][Aarch64] Add support for overflow add and sub operations 2017-07-05 9:38 ` Richard Earnshaw (lists) 2017-07-06 7:29 ` Michael Collison @ 2017-08-01 6:33 ` Michael Collison 1 sibling, 0 replies; 8+ messages in thread From: Michael Collison @ 2017-08-01 6:33 UTC (permalink / raw) To: Richard Earnshaw, Christophe Lyon; +Cc: gcc-patches, nd [-- Attachment #1: Type: text/plain, Size: 42133 bytes --] Updated the patch per Richard's comments in particular the issues relating to use of NE: " Use of ne is wrong here. The condition register should be set to the result of a compare rtl construct. The same applies elsewhere within this patch. NE is then used on the result of the comparison. The mode of the compare then indicates what might or might not be valid in the way the comparison is finally constructed." Okay for trunk? 2017-08-01 Michael Collison <michael.collison@arm.com> Richard Henderson <rth@redhat.com> * config/aarch64/aarch64-modes.def (CC_V): New. * config/aarch64/aarch64-protos.h (aarch64_add_128bit_scratch_regs): Declare (aarch64_add_128bit_scratch_regs): Declare. (aarch64_expand_subvti): Declare. (aarch64_gen_unlikely_cbranch): Declare * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test for signed overflow using CC_Vmode. (aarch64_get_condition_code_1): Handle CC_Vmode. (aarch64_gen_unlikely_cbranch): New function. (aarch64_add_128bit_scratch_regs): New function. (aarch64_subv_128bit_scratch_regs): New function. (aarch64_expand_subvti): New function. * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. (addti3): Create simpler code if low part is already known to be 0. (addvti4, uaddvti4): New. (*add<GPI>3_compareC_cconly_imm): New. (*add<GPI>3_compareC_cconly): New. (*add<GPI>3_compareC_imm): New. (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not handle constants within this pattern. (*add<GPI>3_compareV_cconly_imm): New. (*add<GPI>3_compareV_cconly): New. (*add<GPI>3_compareV_imm): New. (add<GPI>3_compareV): New. (add<GPI>3_carryinC, add<GPI>3_carryinV): New. (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. (subv<GPI>4, usubv<GPI>4): New. (subti): Handle op1 zero. (subvti4, usub4ti4): New. (*sub<GPI>3_compare1_imm): New. (sub<GPI>3_carryinCV): New. (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. -----Original Message----- From: Richard Earnshaw (lists) [mailto:Richard.Earnshaw@arm.com] Sent: Wednesday, July 5, 2017 2:38 AM To: Michael Collison <Michael.Collison@arm.com>; Christophe Lyon <christophe.lyon@linaro.org> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub operations On 19/05/17 22:11, Michael Collison wrote: > Christophe, > > I had a type in the two test cases: "addcs" should have been "adcs". I caught this previously but submitted the previous patch incorrectly. Updated patch attached. > > Okay for trunk? > Apologies for the delay responding, I've been procrastinating over this one. In part it's due to the size of the patch with very little top-level description of what's the motivation and overall approach to the problem. It would really help review if this could be split into multiple patches with a description of what each stage achieves. Anyway, there are a couple of obvious formatting issues to deal with first, before we get into the details of the patch. > -----Original Message----- > From: Christophe Lyon [mailto:christophe.lyon@linaro.org] > Sent: Friday, May 19, 2017 3:59 AM > To: Michael Collison <Michael.Collison@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com> > Subject: Re: [PATCH][Aarch64] Add support for overflow add and sub > operations > > Hi Michael, > > > On 19 May 2017 at 07:12, Michael Collison <Michael.Collison@arm.com> wrote: >> Hi, >> >> This patch improves code generations for builtin arithmetic overflow operations for the aarch64 backend. As an example for a simple test case such as: >> >> Sure for a simple test case such as: >> >> int >> f (int x, int y, int *ovf) >> { >> int res; >> *ovf = __builtin_sadd_overflow (x, y, &res); >> return res; >> } >> >> Current trunk at -O2 generates >> >> f: >> mov w3, w0 >> mov w4, 0 >> add w0, w0, w1 >> tbnz w1, #31, .L4 >> cmp w0, w3 >> blt .L3 >> .L2: >> str w4, [x2] >> ret >> .p2align 3 >> .L4: >> cmp w0, w3 >> ble .L2 >> .L3: >> mov w4, 1 >> b .L2 >> >> >> With the patch this now generates: >> >> f: >> adds w0, w0, w1 >> cset w1, vs >> str w1, [x2] >> ret >> >> >> Original patch from Richard Henderson: >> >> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01903.html >> >> >> Okay for trunk? >> >> 2017-05-17 Michael Collison <michael.collison@arm.com> >> Richard Henderson <rth@redhat.com> >> >> * config/aarch64/aarch64-modes.def (CC_V): New. >> * config/aarch64/aarch64-protos.h >> (aarch64_add_128bit_scratch_regs): Declare >> (aarch64_add_128bit_scratch_regs): Declare. >> (aarch64_expand_subvti): Declare. >> (aarch64_gen_unlikely_cbranch): Declare >> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test >> for signed overflow using CC_Vmode. >> (aarch64_get_condition_code_1): Handle CC_Vmode. >> (aarch64_gen_unlikely_cbranch): New function. >> (aarch64_add_128bit_scratch_regs): New function. >> (aarch64_subv_128bit_scratch_regs): New function. >> (aarch64_expand_subvti): New function. >> * config/aarch64/aarch64.md (addv<GPI>4, uaddv<GPI>4): New. >> (addti3): Create simpler code if low part is already known to be 0. >> (addvti4, uaddvti4): New. >> (*add<GPI>3_compareC_cconly_imm): New. >> (*add<GPI>3_compareC_cconly): New. >> (*add<GPI>3_compareC_imm): New. >> (*add<GPI>3_compareC): Rename from add<GPI>3_compare1; do not >> handle constants within this pattern. >> (*add<GPI>3_compareV_cconly_imm): New. >> (*add<GPI>3_compareV_cconly): New. >> (*add<GPI>3_compareV_imm): New. >> (add<GPI>3_compareV): New. >> (add<GPI>3_carryinC, add<GPI>3_carryinV): New. >> (*add<GPI>3_carryinC_zero, *add<GPI>3_carryinV_zero): New. >> (*add<GPI>3_carryinC, *add<GPI>3_carryinV): New. >> (subv<GPI>4, usubv<GPI>4): New. >> (subti): Handle op1 zero. >> (subvti4, usub4ti4): New. >> (*sub<GPI>3_compare1_imm): New. >> (sub<GPI>3_carryinCV): New. >> (*sub<GPI>3_carryinCV_z1_z2, *sub<GPI>3_carryinCV_z1): New. >> (*sub<GPI>3_carryinCV_z2, *sub<GPI>3_carryinCV): New. >> * testsuite/gcc.target/arm/builtin_sadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_saddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uadd_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_uaddll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_ssubll.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usub_128.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubl.c: New testcase. >> * testsuite/gcc.target/arm/builtin_usubll.c: New testcase. > > I've tried your patch, and 2 of the new tests FAIL: > gcc.target/aarch64/builtin_sadd_128.c scan-assembler addcs > gcc.target/aarch64/builtin_uadd_128.c scan-assembler addcs > > Am I missing something? > > Thanks, > > Christophe > > > pr6308v2.patch > > > diff --git a/gcc/config/aarch64/aarch64-modes.def > b/gcc/config/aarch64/aarch64-modes.def > index 45f7a44..244e490 100644 > --- a/gcc/config/aarch64/aarch64-modes.def > +++ b/gcc/config/aarch64/aarch64-modes.def > @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); > CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ > CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ > CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ > +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ > > /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, > 0); diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index f55d4ba..f38b2b8 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -388,6 +388,18 @@ void aarch64_relayout_simd_types (void); void > aarch64_reset_previous_fndecl (void); bool > aarch64_return_address_signing_enabled (void); void > aarch64_save_restore_target_globals (tree); > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2); > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2); > + It's a little bit inconsistent, but the general style in aarch64-protos.h is not to include parameter names in prototypes, just their types. > > /* Initialize builtins for SIMD intrinsics. */ void > init_aarch64_simd_builtins (void); @@ -412,6 +424,8 @@ bool > aarch64_float_const_representable_p (rtx); > > #if defined (RTX_CODE) > > +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, > + rtx label_ref); > bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, > bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx > aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git > a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index > f343d92..71a651c 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -4716,6 +4716,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) > && GET_CODE (y) == ZERO_EXTEND) > return CC_Cmode; > > + /* A test for signed overflow. */ > + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) > + && code == NE > + && GET_CODE (x) == PLUS > + && GET_CODE (y) == SIGN_EXTEND) > + return CC_Vmode; > + > /* For everything else, return CCmode. */ > return CCmode; > } > @@ -4822,6 +4829,15 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) > } > break; > > + case CC_Vmode: > + switch (comp_code) > + { > + case NE: return AARCH64_VS; > + case EQ: return AARCH64_VC; > + default: return -1; > + } > + break; > + > default: > return -1; > } > @@ -13630,6 +13646,88 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) > return true; > } > > +/* Generate RTL for a conditional branch with rtx comparison CODE in > + mode CC_MODE. The destination of the unlikely conditional branch > + is LABEL_REF. */ > + > +void > +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, > + rtx label_ref) > +{ > + rtx x; > + x = gen_rtx_fmt_ee (code, VOIDmode, > + gen_rtx_REG (cc_mode, CC_REGNUM), > + const0_rtx); > + > + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, > + gen_rtx_LABEL_REF (VOIDmode, label_ref), > + pc_rtx); > + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); } > + > +void aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx > +*low_dest, Function names must start in column 1, with the return type on the preceding line. All functions should have a top-level comment describing what they do (their contract with the caller). > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = gen_lowpart (DImode, op1); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = gen_highpart (DImode, op1); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); } > + > +void aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx > +*low_dest, Same here. > + rtx *low_in1, rtx *low_in2, > + rtx *high_dest, rtx *high_in1, > + rtx *high_in2) > +{ > + *low_dest = gen_reg_rtx (DImode); > + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_lowpart_offset (DImode, TImode)); > + *high_dest = gen_reg_rtx (DImode); > + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, > + subreg_highpart_offset (DImode, TImode)); > + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, > + subreg_highpart_offset (DImode, TImode)); > + > +} > + > +void aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, And here. > + rtx low_in2, rtx high_dest, rtx high_in1, > + rtx high_in2) > +{ > + if (low_in2 == const0_rtx) > + { > + low_dest = low_in1; > + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, > + force_reg (DImode, high_in2))); > + } > + else > + { > + if (CONST_INT_P (low_in2)) > + { > + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); > + high_in2 = force_reg (DImode, high_in2); > + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); > + } > + else > + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); > + emit_insn (gen_subdi3_carryinCV (high_dest, > + force_reg (DImode, high_in1), > + high_in2)); > + } > + > + emit_move_insn (gen_lowpart (DImode, op0), low_dest); > + emit_move_insn (gen_highpart (DImode, op0), high_dest); > + > +} > + > /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ > > static unsigned HOST_WIDE_INT > diff --git a/gcc/config/aarch64/aarch64.md > b/gcc/config/aarch64/aarch64.md index a693a3b..3976ecb 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -1711,25 +1711,123 @@ > } > ) > > +(define_expand "addv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "uaddv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "register_operand") > + (match_operand:GPI 2 "register_operand") > + (match_operand 3 "")] With no rtl in the expand to describe this pattern, it really should have a top-level comment explaining the arguments (reference to the manual is probably OK in this case). > + "" > +{ > + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); > + > + DONE; > +}) > + > + > (define_expand "addti3" > [(set (match_operand:TI 0 "register_operand" "") > (plus:TI (match_operand:TI 1 "register_operand" "") > - (match_operand:TI 2 "register_operand" "")))] > + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0,l1,l2,h0,h1,h2; > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + if (!aarch64_pluslong_operand (h2, DImode)) > + h2 = force_reg (DImode, h2); > + emit_insn (gen_adddi3 (h0, h1, h2)); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > DONE; > }) > > +(define_expand "addvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] Same here. > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "uaddvti4" > + [(match_operand:TI 0 "register_operand" "") > + (match_operand:TI 1 "register_operand" "") > + (match_operand:TI 2 "aarch64_reg_or_imm" "") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_add_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + > + if (l2 == const0_rtx) > + { > + l0 = l1; > + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); > + } > + else > + { > + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); > + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); > + } > + > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); DONE; > + }) > + > (define_insn "add<mode>3_compare0" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > @@ -1828,10 +1926,70 @@ > [(set_attr "type" "alus_sreg")] > ) > > +;; Note that since we're sign-extending, match the immediate in GPI > +;; rather than in DWI. Since CONST_INT is modeless, this works fine. > +(define_insn "*add<mode>3_compareV_cconly_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) > + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "@ > + cmn\\t%<w>0, %<w>1 > + cmp\\t%<w>0, #%n1" > + [(set_attr "type" "alus_imm")] > +) > + > +(define_insn "*add<mode>3_compareV_cconly" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V Use of ne is wrong here. The condition register should be set to the result of a compare rtl construct. The same applies elsewhere within this patch. NE is then used on the result of the comparison. The mode of the compare then indicates what might or might not be valid in the way the comparison is finally constructed. Note that this issue may go back to the earlier patches that this is based on, but those are equally incorrect and wil need fixing as well at some point. We shouldn't prepetuate the issue. > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] > + "" > + "cmn\\t%<w>0, %<w>1" > + [(set_attr "type" "alus_sreg")] > +) > + > +(define_insn "*add<mode>3_compareV_imm" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r,r")) > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) > + (sign_extend:<DWI> > + (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "@ > + adds\\t%<w>0, %<w>1, %<w>2 > + subs\\t%<w>0, %<w>1, #%n2" > + [(set_attr "type" "alus_imm,alus_imm")] > +) > + > +(define_insn "add<mode>3_compareV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (plus:GPI (match_dup 1) (match_dup 2)))] > + "" > + "adds\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "alus_sreg")] > +) > + > (define_insn "*adds_shift_imm_<mode>" > [(set (reg:CC_NZ CC_REGNUM) > (compare:CC_NZ > - (plus:GPI (ASHIFT:GPI > + (plus:GPI (ASHIFT:GPI > (match_operand:GPI 1 "register_operand" "r") > (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) > (match_operand:GPI 3 "register_operand" "r")) @@ -2187,6 > +2345,138 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "add<mode>3_carryinC" > + [(parallel > + [(set (match_dup 3) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 4) > + (zero_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 5) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); > + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinC_zero" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinC" > + [(set (reg:CC_C CC_REGNUM) > + (ne:CC_C > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (zero_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_expand "add<mode>3_carryinV" > + [(parallel > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_dup 3) > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))])] > + "" > +{ > + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); > + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); > + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); > +}) > + > +(define_insn "*add<mode>3_carryinV_zero" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (match_operand:<DWI> 2 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (match_operand:GPI 3 "aarch64_carry_operation" "") > + (match_dup 1))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI (match_dup 3) (match_dup 1)))] > + "" > + "adcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*add<mode>3_carryinV" > + [(set (reg:CC_V CC_REGNUM) > + (ne:CC_V > + (plus:<DWI> > + (plus:<DWI> > + (match_operand:<DWI> 3 "aarch64_carry_operation" "") > + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) > + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) > + (sign_extend:<DWI> > + (plus:GPI > + (plus:GPI > + (match_operand:GPI 4 "aarch64_carry_operation" "") > + (match_dup 1)) > + (match_dup 2))))) > + (set (match_operand:GPI 0 "register_operand") > + (plus:GPI > + (plus:GPI (match_dup 4) (match_dup 1)) > + (match_dup 2)))] > + "" > + "adcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*add_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (plus:GPI (and:GPI > @@ -2283,22 +2573,86 @@ > (set_attr "simd" "*,yes")] > ) > > +(define_expand "subv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + > + DONE; > +}) > + > +(define_expand "usubv<mode>4" > + [(match_operand:GPI 0 "register_operand") > + (match_operand:GPI 1 "aarch64_reg_or_zero") > + (match_operand:GPI 2 "aarch64_reg_or_zero") > + (match_operand 3 "")] > + "" > +{ > + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], > +operands[2])); > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > + > + DONE; > +}) > + > (define_expand "subti3" > [(set (match_operand:TI 0 "register_operand" "") > - (minus:TI (match_operand:TI 1 "register_operand" "") > + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") > (match_operand:TI 2 "register_operand" "")))] > "" > { > - rtx low = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), > - gen_lowpart (DImode, operands[2]))); > + rtx l0 = gen_reg_rtx (DImode); > + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_lowpart_offset (DImode, TImode)); > + rtx l2 = gen_lowpart (DImode, operands[2]); > + rtx h0 = gen_reg_rtx (DImode); > + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, > + subreg_highpart_offset (DImode, TImode)); > + rtx h2 = gen_highpart (DImode, operands[2]); > > - rtx high = gen_reg_rtx (DImode); > - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), > - gen_highpart (DImode, operands[2]))); > + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); emit_insn > + (gen_subdi3_carryin (h0, h1, h2)); > > - emit_move_insn (gen_lowpart (DImode, operands[0]), low); > - emit_move_insn (gen_highpart (DImode, operands[0]), high); > + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); > + emit_move_insn (gen_highpart (DImode, operands[0]), h0); > + DONE; > +}) > + > +(define_expand "subvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); > + DONE; > +}) > + > +(define_expand "usubvti4" > + [(match_operand:TI 0 "register_operand") > + (match_operand:TI 1 "aarch64_reg_or_zero") > + (match_operand:TI 2 "aarch64_reg_or_imm") > + (match_operand 3 "")] > + "" > +{ > + rtx l0,l1,l2,h0,h1,h2; > + > + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], > + &l0, &l1, &l2, &h0, &h1, &h2); > + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); > + > + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); > DONE; > }) > > @@ -2327,6 +2681,22 @@ > [(set_attr "type" "alus_sreg")] > ) > > +(define_insn "*sub<mode>3_compare1_imm" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") > + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) > + (set (match_operand:GPI 0 "register_operand" "=r,r") > + (plus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] > + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" > + "@ > + subs\\t%<w>0, %<w>1, %<w>2 > + adds\\t%<w>0, %<w>1, %<w>3" > + [(set_attr "type" "alus_imm")] > +) > + > (define_insn "sub<mode>3_compare1" > [(set (reg:CC CC_REGNUM) > (compare:CC > @@ -2554,6 +2924,85 @@ > [(set_attr "type" "adc_reg")] > ) > > +(define_expand "sub<mode>3_carryinCV" > + [(parallel > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] > + "" > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z1" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (const_int 0) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (neg:GPI (match_dup 1)) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, <w>zr, %<w>1" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV_z2" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (match_dup 1) > + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, <w>zr" > + [(set_attr "type" "adc_reg")] > +) > + > +(define_insn "*sub<mode>3_carryinCV" > + [(set (reg:CC CC_REGNUM) > + (compare:CC > + (sign_extend:<DWI> > + (match_operand:GPI 1 "register_operand" "r")) > + (plus:<DWI> > + (sign_extend:<DWI> > + (match_operand:GPI 2 "register_operand" "r")) > + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) > + (set (match_operand:GPI 0 "register_operand" "=r") > + (minus:GPI > + (minus:GPI (match_dup 1) (match_dup 2)) > + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] > + "" > + "sbcs\\t%<w>0, %<w>1, %<w>2" > + [(set_attr "type" "adc_reg")] > +) > + > (define_insn "*sub_uxt<mode>_shift2" > [(set (match_operand:GPI 0 "register_operand" "=rk") > (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git > a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > new file mode 100644 > index 0000000..0b31500 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_add (__int128 x, __int128 y) { > + __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > new file mode 100644 > index 0000000..9768a98 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_add (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_saddl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > new file mode 100644 > index 0000000..126a526 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_add (long long x, long long y) { > + long long r; > + > + int ovr = __builtin_saddll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > new file mode 100644 > index 0000000..c1261e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +__int128 overflow_sub (__int128 x, __int128 y) { > + __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > new file mode 100644 > index 0000000..1040464 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long overflow_sub (long x, long y) > +{ > + long r; > + > + int ovr = __builtin_ssubl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > new file mode 100644 > index 0000000..a03df88 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +long long overflow_sub (long long x, long long y) { > + long long r; > + > + int ovr = __builtin_ssubll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > new file mode 100644 > index 0000000..c573c2a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_add (unsigned __int128 x, unsigned > +__int128 y) { > + unsigned __int128 r; > + > + int ovr = __builtin_add_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > +/* { dg-final { scan-assembler "adcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > new file mode 100644 > index 0000000..e325591 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_add (unsigned long x, unsigned long y) { > + unsigned long r; > + > + int ovr = __builtin_uaddl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > new file mode 100644 > index 0000000..5f42886 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_add (unsigned long long x, unsigned long > +long y) { > + unsigned long long r; > + > + int ovr = __builtin_uaddll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "adds" } } */ > + > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > new file mode 100644 > index 0000000..a84f4a4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned > +__int128 y) { > + unsigned __int128 r; > + > + int ovr = __builtin_sub_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > +/* { dg-final { scan-assembler "sbcs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > new file mode 100644 > index 0000000..ed033da > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c > @@ -0,0 +1,17 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long overflow_sub (unsigned long x, unsigned long y) { > + unsigned long r; > + > + int ovr = __builtin_usubl_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > new file mode 100644 > index 0000000..a742f0c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +extern void overflow_handler (); > + > +unsigned long long overflow_sub (unsigned long long x, unsigned long > +long y) { > + unsigned long long r; > + > + int ovr = __builtin_usubll_overflow (x, y, &r); if (ovr) > + overflow_handler (); > + > + return r; > +} > + > +/* { dg-final { scan-assembler "subs" } } */ > + > [-- Attachment #2: gnutools-6308-v4.patch --] [-- Type: application/octet-stream, Size: 30944 bytes --] diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 45f7a44..244e490 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -24,6 +24,7 @@ CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ FLOAT_MODE (HF, 2, 0); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e397ff4..b67d4d1 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -407,6 +407,16 @@ void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); bool aarch64_return_address_signing_enabled (void); void aarch64_save_restore_target_globals (tree); +void aarch64_add_128bit_scratch_regs (rtx, rtx, rtx *, + rtx *, rtx *, + rtx *, rtx *, + rtx *); +void aarch64_subv_128bit_scratch_regs (rtx, rtx, rtx *, + rtx *, rtx *, + rtx *, rtx *, rtx *); +void aarch64_expand_subvti (rtx, rtx, rtx, + rtx, rtx, rtx, rtx); + /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); @@ -431,6 +441,8 @@ bool aarch64_float_const_representable_p (rtx); #if defined (RTX_CODE) +void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, + rtx label_ref); bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ef1b5a8..035543e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -4882,6 +4882,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) && GET_CODE (y) == ZERO_EXTEND) return CC_Cmode; + /* A test for signed overflow. */ + if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + && code == NE + && GET_CODE (x) == PLUS + && GET_CODE (y) == SIGN_EXTEND) + return CC_Vmode; + /* For everything else, return CCmode. */ return CCmode; } @@ -4988,6 +4995,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) } break; + case CC_Vmode: + switch (comp_code) + { + case NE: return AARCH64_VS; + case EQ: return AARCH64_VC; + default: return -1; + } + break; + default: return -1; } @@ -13969,6 +13985,97 @@ aarch64_split_dimode_const_store (rtx dst, rtx src) return true; } +/* Generate RTL for a conditional branch with rtx comparison CODE in + mode CC_MODE. The destination of the unlikely conditional branch + is LABEL_REF. */ + +void +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, + rtx label_ref) +{ + rtx x; + x = gen_rtx_fmt_ee (code, VOIDmode, + gen_rtx_REG (cc_mode, CC_REGNUM), + const0_rtx); + + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, + gen_rtx_LABEL_REF (VOIDmode, label_ref), + pc_rtx); + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + +/* Generate DImode scratch registers for 128-bit (TImode) addition. */ + +void +aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = gen_lowpart (DImode, op1); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = gen_highpart (DImode, op1); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); +} + +/* Generate DImode scratch registers for 128-bit (TImode) subtraction. */ + +void +aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest, + rtx *low_in1, rtx *low_in2, + rtx *high_dest, rtx *high_in1, + rtx *high_in2) +{ + *low_dest = gen_reg_rtx (DImode); + *low_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_lowpart_offset (DImode, TImode)); + *low_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_lowpart_offset (DImode, TImode)); + *high_dest = gen_reg_rtx (DImode); + *high_in1 = simplify_gen_subreg (DImode, op1, TImode, + subreg_highpart_offset (DImode, TImode)); + *high_in2 = simplify_gen_subreg (DImode, op2, TImode, + subreg_highpart_offset (DImode, TImode)); + +} + +/* Generate RTL for 128-bit (TImode) subtraction. */ + +void +aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + rtx low_in2, rtx high_dest, rtx high_in1, + rtx high_in2) +{ + if (low_in2 == const0_rtx) + { + low_dest = low_in1; + emit_insn (gen_subdi3_compare1 (high_dest, high_in1, + force_reg (DImode, high_in2))); + } + else + { + if (CONST_INT_P (low_in2)) + { + low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2))); + high_in2 = force_reg (DImode, high_in2); + emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2)); + } + else + emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2)); + emit_insn (gen_subdi3_carryinCV (high_dest, + force_reg (DImode, high_in1), + high_in2)); + } + + emit_move_insn (gen_lowpart (DImode, op0), low_dest); + emit_move_insn (gen_highpart (DImode, op0), high_dest); + +} + /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ static unsigned HOST_WIDE_INT diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index e6e7e64..fda7a74 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1606,25 +1606,135 @@ } ) +;; This pattern is used to implement the built-in function implementing signed +;; integer addition with overflow checking for SImode and DImode + +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +;; The pattern is used to implement the built-in functions implementing unsigned +;; integer addition with overflow checking for SImode and DImode + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand 3 "")] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + + DONE; +}) + + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0,l1,l2,h0,h1,h2; - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + if (!aarch64_pluslong_operand (h2, DImode)) + h2 = force_reg (DImode, h2); + emit_insn (gen_adddi3 (h0, h1, h2)); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryin (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); DONE; }) +;; This pattern is used to implement the built-in function implementing signed +;; integer addition with overflow checking for TImode + +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareV (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinV (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +;; This pattern is used to implement the built-in function implementing unsigned +;; integer addition with overflow checking for TImode + +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_add_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + + if (l2 == const0_rtx) + { + l0 = l1; + emit_insn (gen_adddi3_compareC (h0, h1, force_reg (DImode, h2))); + } + else + { + emit_insn (gen_adddi3_compareC (l0, l1, force_reg (DImode, l2))); + emit_insn (gen_adddi3_carryinC (h0, h1, force_reg (DImode, h2))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + DONE; + }) + (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1710,7 +1820,7 @@ (define_insn "add<mode>3_compareC" [(set (reg:CC_C CC_REGNUM) - (ne:CC_C + (compare:CC_C (plus:<DWI> (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) @@ -1723,10 +1833,70 @@ [(set_attr "type" "alus_sreg")] ) +;; Note that since we're sign-extending, match the immediate in GPI +;; rather than in DWI. Since CONST_INT is modeless, this works fine. +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*adds_shift_imm_<mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (ASHIFT:GPI + (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) (match_operand:GPI 3 "register_operand" "r")) @@ -2082,6 +2252,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (compare:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2178,22 +2480,98 @@ (set_attr "simd" "*,yes")] ) +;; This pattern is used to implement the built-in function implementing signed +;; integer subtraction with overflow checking for SImode and DImode + +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +;; This pattern is used to implement the built-in function implementing unsigned +;; integer subtraction with overflow checking for SImode and DImode + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand 3 "")] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); + + DONE; +}) + (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_lowpart_offset (DImode, TImode)); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = simplify_gen_subreg (DImode, operands[1], TImode, + subreg_highpart_offset (DImode, TImode)); + rtx h2 = gen_highpart (DImode, operands[2]); - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); + emit_insn (gen_subdi3_compare1 (l0, l1, l2)); + emit_insn (gen_subdi3_carryin (h0, h1, h2)); - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + +;; This pattern is used to implement the built-in function implementing signed +;; integer subtraction with overflow checking for TImode + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +;; This pattern is used to implement the built-in function implementing unsigned +;; integer subtraction with overflow checking for TImode + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (match_operand 3 "")] + "" +{ + rtx l0,l1,l2,h0,h1,h2; + + aarch64_subv_128bit_scratch_regs (operands[1], operands[2], + &l0, &l1, &l2, &h0, &h1, &h2); + aarch64_expand_subvti (operands[0], l0, l1, l2, h0, h1, h2); + + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) @@ -2222,6 +2600,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, #%n3 + adds\\t%<w>0, %<w>1, %3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2498,6 +2892,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c new file mode 100644 index 0000000..0b31500 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_sadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_add (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "adcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c new file mode 100644 index 0000000..9768a98 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_add (long x, long y) +{ + long r; + + int ovr = __builtin_saddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c new file mode 100644 index 0000000..126a526 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_saddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_add (long long x, long long y) +{ + long long r; + + int ovr = __builtin_saddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c new file mode 100644 index 0000000..c1261e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +__int128 overflow_sub (__int128 x, __int128 y) +{ + __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c new file mode 100644 index 0000000..1040464 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long overflow_sub (long x, long y) +{ + long r; + + int ovr = __builtin_ssubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c new file mode 100644 index 0000000..a03df88 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_ssubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +long long overflow_sub (long long x, long long y) +{ + long long r; + + int ovr = __builtin_ssubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c new file mode 100644 index 0000000..c573c2a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uadd_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_add (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_add_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ +/* { dg-final { scan-assembler "adcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c new file mode 100644 index 0000000..e325591 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_add (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_uaddl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c new file mode 100644 index 0000000..5f42886 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_uaddll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_add (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_uaddll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "adds" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c new file mode 100644 index 0000000..a84f4a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usub_128.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned __int128 overflow_sub (unsigned __int128 x, unsigned __int128 y) +{ + unsigned __int128 r; + + int ovr = __builtin_sub_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ +/* { dg-final { scan-assembler "sbcs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c new file mode 100644 index 0000000..ed033da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubl.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long overflow_sub (unsigned long x, unsigned long y) +{ + unsigned long r; + + int ovr = __builtin_usubl_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c new file mode 100644 index 0000000..a742f0c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_usubll.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void overflow_handler (); + +unsigned long long overflow_sub (unsigned long long x, unsigned long long y) +{ + unsigned long long r; + + int ovr = __builtin_usubll_overflow (x, y, &r); + if (ovr) + overflow_handler (); + + return r; +} + +/* { dg-final { scan-assembler "subs" } } */ + -- 1.9.1 ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2017-08-01 6:33 UTC | newest] Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2016-11-30 23:06 [PATCH][Aarch64] Add support for overflow add and sub operations Michael Collison 2017-05-19 6:27 Michael Collison 2017-05-19 11:00 ` Christophe Lyon 2017-05-19 21:42 ` Michael Collison 2017-07-05 9:38 ` Richard Earnshaw (lists) 2017-07-06 7:29 ` Michael Collison 2017-07-06 8:22 ` Richard Earnshaw (lists) 2017-08-01 6:33 ` Michael Collison
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).