public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Add uaddv4_optab, usubv4_optab
@ 2015-11-20 10:28 Richard Henderson
  2015-11-20 10:43 ` Jakub Jelinek
  2015-11-20 10:46 ` Eric Botcazou
  0 siblings, 2 replies; 6+ messages in thread
From: Richard Henderson @ 2015-11-20 10:28 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 516 bytes --]

Toward fixing PR68385.  I'm just starting a full round of testing, but

extern void underflow(void) __attribute__((noreturn));
unsigned sub1(unsigned a, unsigned b)
{
     unsigned r = a - b;
     if (r > a) underflow();
     return r;
}

unsigned sub2(unsigned a, unsigned b)
{
     unsigned r;
     if (__builtin_sub_overflow(a, b, &r)) underflow();
     return r;
}


sub1:
	movl	%edi, %eax
	subl	%esi, %eax
	cmpl	%eax, %edi
	jb	.L7
	rep ret
...
sub2:
	movl	%edi, %eax
	subl	%esi, %eax
	jb	.L16
	rep ret
...


r~

[-- Attachment #2: z --]
[-- Type: text/plain, Size: 10541 bytes --]

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4c5e22a..10a004e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6156,6 +6156,22 @@
 		  (const_string "4")]
 	      (const_string "<MODE_SIZE>")))])
 
+(define_expand "uaddv<mode>4"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (plus:SWI (match_dup 1) (match_dup 2))
+		     (match_dup 1)))
+	      (set (match_dup 0)
+		   (plus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (ne (reg:CCC FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+{
+  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+})
+
 ;; The lea patterns for modes less than 32 bits need to be matched by
 ;; several insns converted to real lea by splitters.
 
@@ -6461,6 +6477,20 @@
 		  (const_string "4")]
 	      (const_string "<MODE_SIZE>")))])
 
+(define_expand "usubv<mode>4"
+  [(parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 1) (match_dup 2)))
+	      (set (match_dup 0)
+		   (minus:SWI (match_dup 1) (match_dup 2)))])
+   (set (pc) (if_then_else
+	       (ltu (reg:CC FLAGS_REG) (const_int 0))
+	       (label_ref (match_operand 3))
+	       (pc)))]
+  ""
+{
+  ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);
+})
+
 (define_insn "*sub<mode>_3"
   [(set (reg FLAGS_REG)
 	(compare (match_operand:SWI 1 "nonimmediate_operand" "0,0")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 8b2deaa..9386e62 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4912,6 +4912,25 @@ address calculations.  @code{add@var{m}3} is used if
 @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
 Similar, for other arithmetic operations.
 
+@cindex @code{addv@var{m}4} instruction pattern
+@item @samp{addv@var{m}4}
+Add operand 2 and operand 1, storing the result in operand 0.  If signed
+overflow occurs during the addition, jump to the label in operand 3.
+
+@cindex @code{subv@var{m}4} instruction pattern
+@cindex @code{mulv@var{m}4} instruction pattern
+@item @samp{subv@var{m}4}, @samp{mulv@var{m}4}
+Similar, for other signed arithmetic operations.
+
+@cindex @code{uaddv@var{m}4} instruction pattern
+@item @samp{uaddv@var{m}4}
+Like @code{addv@var{m}4}, except jump on unsigned overflow.
+
+@cindex @code{usubv@var{m}4} instruction pattern
+@cindex @code{umulv@var{m}4} instruction pattern
+@item @samp{usubv@var{m}4}, @samp{umulv@var{m}4}
+Similar, for other unsigned arithmetic operations.
+
 @cindex @code{fma@var{m}4} instruction pattern
 @item @samp{fma@var{m}4}
 Multiply operand 2 and operand 1, then add operand 3, storing the
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index bc77bdc..b15657f 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -546,6 +546,33 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
   /* u1 +- u2 -> ur  */
   if (uns0_p && uns1_p && unsr_p)
     {
+      insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab
+                                       : usubv4_optab, mode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  struct expand_operand ops[4];
+	  rtx_insn *last = get_last_insn ();
+
+	  res = gen_reg_rtx (mode);
+	  create_output_operand (&ops[0], res, mode);
+	  create_input_operand (&ops[1], op0, mode);
+	  create_input_operand (&ops[2], op1, mode);
+	  create_fixed_operand (&ops[3], do_error);
+	  if (maybe_expand_insn (icode, 4, ops))
+	    {
+	      last = get_last_insn ();
+	      if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+		  && JUMP_P (last)
+		  && any_condjump_p (last)
+		  && !find_reg_note (last, REG_BR_PROB, 0))
+		add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+	      emit_jump (done_label);
+	      goto do_error_label;
+	    }
+
+	  delete_insns_since (last);
+	}
+
       /* Compute the operation.  On RTL level, the addition is always
 	 unsigned.  */
       res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
@@ -737,92 +764,88 @@ expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
   gcc_assert (!uns0_p && !uns1_p && !unsr_p);
 
   /* s1 +- s2 -> sr  */
- do_signed: ;
-  enum insn_code icode;
-  icode = optab_handler (code == PLUS_EXPR ? addv4_optab : subv4_optab, mode);
-  if (icode != CODE_FOR_nothing)
-    {
-      struct expand_operand ops[4];
-      rtx_insn *last = get_last_insn ();
-
-      res = gen_reg_rtx (mode);
-      create_output_operand (&ops[0], res, mode);
-      create_input_operand (&ops[1], op0, mode);
-      create_input_operand (&ops[2], op1, mode);
-      create_fixed_operand (&ops[3], do_error);
-      if (maybe_expand_insn (icode, 4, ops))
-	{
-	  last = get_last_insn ();
-	  if (profile_status_for_fn (cfun) != PROFILE_ABSENT
-	      && JUMP_P (last)
-	      && any_condjump_p (last)
-	      && !find_reg_note (last, REG_BR_PROB, 0))
-	    add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
-	  emit_jump (done_label);
-        }
-      else
-	{
-	  delete_insns_since (last);
-	  icode = CODE_FOR_nothing;
-	}
-    }
-
-  if (icode == CODE_FOR_nothing)
-    {
-      rtx_code_label *sub_check = gen_label_rtx ();
-      int pos_neg = 3;
-
-      /* Compute the operation.  On RTL level, the addition is always
-	 unsigned.  */
-      res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
-			  op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
-
-      /* If we can prove one of the arguments (for MINUS_EXPR only
-	 the second operand, as subtraction is not commutative) is always
-	 non-negative or always negative, we can do just one comparison
-	 and conditional jump instead of 2 at runtime, 3 present in the
-	 emitted code.  If one of the arguments is CONST_INT, all we
-	 need is to make sure it is op1, then the first
-	 do_compare_rtx_and_jump will be just folded.  Otherwise try
-	 to use range info if available.  */
-      if (code == PLUS_EXPR && CONST_INT_P (op0))
-	std::swap (op0, op1);
-      else if (CONST_INT_P (op1))
-	;
-      else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
-	{
-	  pos_neg = get_range_pos_neg (arg0);
-	  if (pos_neg != 3)
-	    std::swap (op0, op1);
-	}
-      if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
-	pos_neg = get_range_pos_neg (arg1);
-
-      /* If the op1 is negative, we have to use a different check.  */
-      if (pos_neg == 3)
-	do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
-				 NULL, sub_check, PROB_EVEN);
-
-      /* Compare the result of the operation with one of the operands.  */
-      if (pos_neg & 1)
-	do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
-				 false, mode, NULL_RTX, NULL, done_label,
-				 PROB_VERY_LIKELY);
-
-      /* If we get here, we have to print the error.  */
-      if (pos_neg == 3)
-	{
-	  emit_jump (do_error);
+ do_signed:
+  {
+    insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab
+				     : subv4_optab, mode);
+    if (icode != CODE_FOR_nothing)
+      {
+	struct expand_operand ops[4];
+	rtx_insn *last = get_last_insn ();
+
+	res = gen_reg_rtx (mode);
+	create_output_operand (&ops[0], res, mode);
+	create_input_operand (&ops[1], op0, mode);
+	create_input_operand (&ops[2], op1, mode);
+	create_fixed_operand (&ops[3], do_error);
+	if (maybe_expand_insn (icode, 4, ops))
+	  {
+	    last = get_last_insn ();
+	    if (profile_status_for_fn (cfun) != PROFILE_ABSENT
+		&& JUMP_P (last)
+		&& any_condjump_p (last)
+		&& !find_reg_note (last, REG_BR_PROB, 0))
+	      add_int_reg_note (last, REG_BR_PROB, PROB_VERY_UNLIKELY);
+	    emit_jump (done_label);
+	    goto do_error_label;
+	  }
+
+	delete_insns_since (last);
+      }
+
+    rtx_code_label *sub_check = gen_label_rtx ();
+    int pos_neg = 3;
+
+    /* Compute the operation.  On RTL level, the addition is always
+       unsigned.  */
+    res = expand_binop (mode, code == PLUS_EXPR ? add_optab : sub_optab,
+			op0, op1, NULL_RTX, false, OPTAB_LIB_WIDEN);
+
+    /* If we can prove one of the arguments (for MINUS_EXPR only
+       the second operand, as subtraction is not commutative) is always
+       non-negative or always negative, we can do just one comparison
+       and conditional jump instead of 2 at runtime, 3 present in the
+       emitted code.  If one of the arguments is CONST_INT, all we
+       need is to make sure it is op1, then the first
+       do_compare_rtx_and_jump will be just folded.  Otherwise try
+       to use range info if available.  */
+    if (code == PLUS_EXPR && CONST_INT_P (op0))
+      std::swap (op0, op1);
+    else if (CONST_INT_P (op1))
+      ;
+    else if (code == PLUS_EXPR && TREE_CODE (arg0) == SSA_NAME)
+      {
+        pos_neg = get_range_pos_neg (arg0);
+        if (pos_neg != 3)
+	  std::swap (op0, op1);
+      }
+    if (pos_neg == 3 && !CONST_INT_P (op1) && TREE_CODE (arg1) == SSA_NAME)
+      pos_neg = get_range_pos_neg (arg1);
+
+    /* If the op1 is negative, we have to use a different check.  */
+    if (pos_neg == 3)
+      do_compare_rtx_and_jump (op1, const0_rtx, LT, false, mode, NULL_RTX,
+			       NULL, sub_check, PROB_EVEN);
+
+    /* Compare the result of the operation with one of the operands.  */
+    if (pos_neg & 1)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? GE : LE,
+			       false, mode, NULL_RTX, NULL, done_label,
+			       PROB_VERY_LIKELY);
 
-	  emit_label (sub_check);
-	}
+    /* If we get here, we have to print the error.  */
+    if (pos_neg == 3)
+      {
+	emit_jump (do_error);
+	emit_label (sub_check);
+      }
 
-      /* We have k = a + b for b < 0 here.  k <= a must hold.  */
-      if (pos_neg & 2)
-	do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
-				 false, mode, NULL_RTX, NULL, done_label,
-				 PROB_VERY_LIKELY);
-    }
+    /* We have k = a + b for b < 0 here.  k <= a must hold.  */
+    if (pos_neg & 2)
+      do_compare_rtx_and_jump (res, op0, code == PLUS_EXPR ? LE : GE,
+			       false, mode, NULL_RTX, NULL, done_label,
+			       PROB_VERY_LIKELY);
+  }
 
  do_error_label:
   emit_label (do_error);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 0ca2333..c141a3c 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -197,6 +197,8 @@ OPTAB_D (ctrap_optab, "ctrap$a4")
 OPTAB_D (addv4_optab, "addv$I$a4")
 OPTAB_D (subv4_optab, "subv$I$a4")
 OPTAB_D (mulv4_optab, "mulv$I$a4")
+OPTAB_D (uaddv4_optab, "uaddv$I$a4")
+OPTAB_D (usubv4_optab, "usubv$I$a4")
 OPTAB_D (umulv4_optab, "umulv$I$a4")
 OPTAB_D (negv3_optab, "negv$I$a3")
 OPTAB_D (addptr3_optab, "addptr$a3")

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-11-20 11:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-20 10:28 Add uaddv4_optab, usubv4_optab Richard Henderson
2015-11-20 10:43 ` Jakub Jelinek
2015-11-20 11:01   ` Eric Botcazou
2015-11-20 11:12     ` Richard Henderson
2015-11-20 11:11   ` Richard Henderson
2015-11-20 10:46 ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).