public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] widening_mul, i386: Improve spaceship expansion on x86 [PR103973]
@ 2022-01-14 22:56 Jakub Jelinek
  2022-01-15  8:29 ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2022-01-14 22:56 UTC (permalink / raw)
  To: Richard Biener, Uros Bizjak; +Cc: gcc-patches

Hi!

C++20:
#include <compare>
auto cmp4way(double a, double b)
{
  return a <=> b;
}
expands to:
        ucomisd %xmm1, %xmm0
        jp      .L8
        movl    $0, %eax
        jne     .L8
.L2:
        ret
        .p2align 4,,10
        .p2align 3
.L8:
        comisd  %xmm0, %xmm1
        movl    $-1, %eax
        ja      .L2
        ucomisd %xmm1, %xmm0
        setbe   %al
        addl    $1, %eax
        ret
That is 3 comparisons of the same operands.
The following patch improves it to just one comparison:
        comisd  %xmm1, %xmm0
        jp      .L4
        seta    %al
        movl    $0, %edx
        leal    -1(%rax,%rax), %eax
        cmove   %edx, %eax
        ret
.L4:
        movl    $2, %eax
        ret
While a <=> b expands to a == b ? 0 : a < b ? -1 : a > b ? 1 : 2
where the first comparison is equality and this shouldn't raise
exceptions on qNaN operands, if the operands aren't equal (which
includes unordered cases), then it immediately performs < or >
comparison and that raises exceptions even on qNaNs, so we can just
perform a single comparison that raises exceptions on qNaN.
As the 4 different cases are encoded as
ZF CF PF
1  1  1  a unordered b
0  0  0  a > b
0  1  0  a < b
1  0  0  a == b
we can emit optimal sequence of comparions, first jp
for the unordered case, then je for the == case and finally jb
for the < case.

The patch pattern recognizes spaceship-like comparisons during
widening_mul if the spaceship optab is implemented, and replaces
thoose comparisons with comparisons of .SPACESHIP ifn which returns
-1/0/1/2 based on the comparison.  This seems to work well both for the
case of just returning the -1/0/1/2 (when we have just a common
successor with a PHI) or when the different cases are handled with
various other basic blocks.  The testcases cover both of those cases,
the latter with different function calls in those.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-01-14  Jakub Jelinek  <jakub@redhat.com>

	PR target/103973
	* tree-cfg.h (cond_only_block_p): Declare.
	* tree-ssa-phiopt.c (cond_only_block_p): Move function to ...
	* tree-cfg.c (cond_only_block_p): ... here.  No longer static.
	* optabs.def (spaceship_optab): New optab.
	* internal-fn.def (SPACESHIP): New internal function.
	* internal-fn.h (expand_SPACESHIP): Declare.
	* internal-fn.c (expand_PHI): Formatting fix.
	(expand_SPACESHIP): New function.
	* tree-ssa-math-opts.c (optimize_spaceship): New function.
	(math_opts_dom_walker::after_dom_children): Use it.
	* config/i386/i386.md (spaceship<mode>3): New define_expand.
	* config/i386/i386-protos.h (ix86_expand_fp_spaceship): Declare.
	* config/i386/i386-expand.c (ix86_expand_fp_spaceship): New function.
	* doc/md.texi (spaceship@var{m}3): Document.

	* gcc.target/i386/pr103973-1.c: New test.
	* gcc.target/i386/pr103973-2.c: New test.
	* gcc.target/i386/pr103973-3.c: New test.
	* gcc.target/i386/pr103973-4.c: New test.
	* g++.target/i386/pr103973-1.C: New test.
	* g++.target/i386/pr103973-2.C: New test.
	* g++.target/i386/pr103973-3.C: New test.
	* g++.target/i386/pr103973-4.C: New test.

--- gcc/tree-cfg.h.jj	2022-01-13 22:29:07.414943450 +0100
+++ gcc/tree-cfg.h	2022-01-14 12:59:42.147866622 +0100
@@ -111,6 +111,7 @@ extern basic_block gimple_switch_label_b
 extern basic_block gimple_switch_default_bb (function *, gswitch *);
 extern edge gimple_switch_edge (function *, gswitch *, unsigned);
 extern edge gimple_switch_default_edge (function *, gswitch *);
+extern bool cond_only_block_p (basic_block);
 
 /* Return true if the LHS of a call should be removed.  */
 
--- gcc/tree-ssa-phiopt.c.jj	2022-01-13 22:29:07.514942041 +0100
+++ gcc/tree-ssa-phiopt.c	2022-01-14 12:59:42.146866637 +0100
@@ -1958,31 +1958,6 @@ minmax_replacement (basic_block cond_bb,
   return true;
 }
 
-/* Return true if the only executable statement in BB is a GIMPLE_COND.  */
-
-static bool
-cond_only_block_p (basic_block bb)
-{
-  /* BB must have no executable statements.  */
-  gimple_stmt_iterator gsi = gsi_after_labels (bb);
-  if (phi_nodes (bb))
-    return false;
-  while (!gsi_end_p (gsi))
-    {
-      gimple *stmt = gsi_stmt (gsi);
-      if (is_gimple_debug (stmt))
-	;
-      else if (gimple_code (stmt) == GIMPLE_NOP
-	       || gimple_code (stmt) == GIMPLE_PREDICT
-	       || gimple_code (stmt) == GIMPLE_COND)
-	;
-      else
-	return false;
-      gsi_next (&gsi);
-    }
-  return true;
-}
-
 /* Attempt to optimize (x <=> y) cmp 0 and similar comparisons.
    For strong ordering <=> try to match something like:
     <bb 2> :  // cond3_bb (== cond2_bb)
--- gcc/tree-cfg.c.jj	2022-01-13 22:29:07.399943661 +0100
+++ gcc/tree-cfg.c	2022-01-14 12:59:42.148866608 +0100
@@ -9410,6 +9410,31 @@ gimple_switch_default_edge (function *if
   return gimple_switch_edge (ifun, gs, 0);
 }
 
+/* Return true if the only executable statement in BB is a GIMPLE_COND.  */
+
+bool
+cond_only_block_p (basic_block bb)
+{
+  /* BB must have no executable statements.  */
+  gimple_stmt_iterator gsi = gsi_after_labels (bb);
+  if (phi_nodes (bb))
+    return false;
+  while (!gsi_end_p (gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (is_gimple_debug (stmt))
+	;
+      else if (gimple_code (stmt) == GIMPLE_NOP
+	       || gimple_code (stmt) == GIMPLE_PREDICT
+	       || gimple_code (stmt) == GIMPLE_COND)
+	;
+      else
+	return false;
+      gsi_next (&gsi);
+    }
+  return true;
+}
+
 
 /* Emit return warnings.  */
 
--- gcc/optabs.def.jj	2022-01-13 22:29:07.371944055 +0100
+++ gcc/optabs.def	2022-01-14 17:10:45.681029356 +0100
@@ -259,6 +259,7 @@ OPTAB_D (usubv4_optab, "usubv$I$a4")
 OPTAB_D (umulv4_optab, "umulv$I$a4")
 OPTAB_D (negv3_optab, "negv$I$a3")
 OPTAB_D (addptr3_optab, "addptr$a3")
+OPTAB_D (spaceship_optab, "spaceship$a3")
 
 OPTAB_D (smul_highpart_optab, "smul$a3_highpart")
 OPTAB_D (umul_highpart_optab, "umul$a3_highpart")
--- gcc/internal-fn.def.jj	2022-01-13 22:29:07.367944112 +0100
+++ gcc/internal-fn.def	2022-01-14 12:59:42.146866637 +0100
@@ -430,6 +430,9 @@ DEF_INTERNAL_FN (NOP, ECF_CONST | ECF_LE
 /* Temporary vehicle for __builtin_shufflevector.  */
 DEF_INTERNAL_FN (SHUFFLEVECTOR, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 
+/* <=> optimization.  */
+DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+
 #undef DEF_INTERNAL_INT_FN
 #undef DEF_INTERNAL_FLT_FN
 #undef DEF_INTERNAL_FLT_FLOATN_FN
--- gcc/internal-fn.h.jj	2022-01-13 22:29:15.344831763 +0100
+++ gcc/internal-fn.h	2022-01-14 13:31:47.892386409 +0100
@@ -241,6 +241,7 @@ extern void expand_internal_call (gcall
 extern void expand_internal_call (internal_fn, gcall *);
 extern void expand_PHI (internal_fn, gcall *);
 extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
+extern void expand_SPACESHIP (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
 
--- gcc/internal-fn.c.jj	2022-01-13 22:29:15.344831763 +0100
+++ gcc/internal-fn.c	2022-01-14 17:23:04.335568234 +0100
@@ -4425,5 +4425,27 @@ expand_SHUFFLEVECTOR (internal_fn, gcall
 void
 expand_PHI (internal_fn, gcall *)
 {
-    gcc_unreachable ();
+  gcc_unreachable ();
+}
+
+void
+expand_SPACESHIP (internal_fn, gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree rhs1 = gimple_call_arg (stmt, 0);
+  tree rhs2 = gimple_call_arg (stmt, 1);
+  tree type = TREE_TYPE (rhs1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx op1 = expand_normal (rhs1);
+  rtx op2 = expand_normal (rhs2);
+
+  class expand_operand ops[3];
+  create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (lhs)));
+  create_input_operand (&ops[1], op1, TYPE_MODE (type));
+  create_input_operand (&ops[2], op2, TYPE_MODE (type));
+  insn_code icode = optab_handler (spaceship_optab, TYPE_MODE (type));
+  expand_insn (icode, 3, ops);
+  if (!rtx_equal_p (target, ops[0].value))
+    emit_move_insn (target, ops[0].value);
 }
--- gcc/tree-ssa-math-opts.c.jj	2022-01-13 22:29:07.491942365 +0100
+++ gcc/tree-ssa-math-opts.c	2022-01-14 20:41:45.000712458 +0100
@@ -4637,6 +4637,195 @@ convert_mult_to_highpart (gassign *stmt,
   return true;
 }
 
+/* If target has spaceship<MODE>3 expander, pattern recognize
+   <bb 2> [local count: 1073741824]:
+   if (a_2(D) == b_3(D))
+     goto <bb 6>; [34.00%]
+   else
+     goto <bb 3>; [66.00%]
+
+   <bb 3> [local count: 708669601]:
+   if (a_2(D) < b_3(D))
+     goto <bb 6>; [1.04%]
+   else
+     goto <bb 4>; [98.96%]
+
+   <bb 4> [local count: 701299439]:
+   if (a_2(D) > b_3(D))
+     goto <bb 5>; [48.89%]
+   else
+     goto <bb 6>; [51.11%]
+
+   <bb 5> [local count: 342865295]:
+
+   <bb 6> [local count: 1073741824]:
+   and turn it into:
+   <bb 2> [local count: 1073741824]:
+   _1 = .SPACESHIP (a_2(D), b_3(D));
+   if (_1 == 0)
+     goto <bb 6>; [34.00%]
+   else
+     goto <bb 3>; [66.00%]
+
+   <bb 3> [local count: 708669601]:
+   if (_1 == -1)
+     goto <bb 6>; [1.04%]
+   else
+     goto <bb 4>; [98.96%]
+
+   <bb 4> [local count: 701299439]:
+   if (_1 == 1)
+     goto <bb 5>; [48.89%]
+   else
+     goto <bb 6>; [51.11%]
+
+   <bb 5> [local count: 342865295]:
+
+   <bb 6> [local count: 1073741824]:
+   so that the backend can emit optimal comparison and
+   conditional jump sequence.  */
+
+static void
+optimize_spaceship (gimple *stmt)
+{
+  enum tree_code code = gimple_cond_code (stmt);
+  if (code != EQ_EXPR && code != NE_EXPR)
+    return;
+  tree arg1 = gimple_cond_lhs (stmt);
+  tree arg2 = gimple_cond_rhs (stmt);
+  if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
+      || !HONOR_NANS (TREE_TYPE (arg1))
+      || optab_handler (spaceship_optab,
+			TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
+      || operand_equal_p (arg1, arg2, 0))
+    return;
+
+  basic_block bb0 = gimple_bb (stmt), bb1, bb2;
+  edge em1 = NULL, e1 = NULL, e2 = NULL;
+  bb1 = EDGE_SUCC (bb0, 1)->dest;
+  if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
+    bb1 = EDGE_SUCC (bb0, 0)->dest;
+
+  gimple *g = last_stmt (bb1);
+  if (g == NULL
+      || gimple_code (g) != GIMPLE_COND
+      || !single_pred_p (bb1)
+      || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
+	  ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
+	  : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
+	     || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
+      || !cond_only_block_p (bb1))
+    return;
+
+  enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
+			  ? LT_EXPR : GT_EXPR);
+  switch (gimple_cond_code (g))
+    {
+    case LT_EXPR:
+    case LE_EXPR:
+      break;
+    case GT_EXPR:
+    case GE_EXPR:
+      ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
+      break;
+    default:
+      return;
+    }
+
+  /* With NaNs, </<=/>/>= are false, so we need to look for the
+     third comparison on the false edge from whatever non-equality
+     comparison the second comparison is.  */
+  int i = (EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0;
+  bb2 = EDGE_SUCC (bb1, i)->dest;
+  g = last_stmt (bb2);
+  if (g == NULL
+      || gimple_code (g) != GIMPLE_COND
+      || !single_pred_p (bb2)
+      || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
+	  ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
+	  : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
+	     || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
+      || !cond_only_block_p (bb2)
+      || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
+    return;
+
+  enum tree_code ccode2
+    = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
+  switch (gimple_cond_code (g))
+    {
+    case LT_EXPR:
+    case LE_EXPR:
+      break;
+    case GT_EXPR:
+    case GE_EXPR:
+      ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
+      break;
+    default:
+      return;
+    }
+  if (ccode == ccode2)
+    return;
+
+  if (ccode == LT_EXPR)
+    {
+      em1 = EDGE_SUCC (bb1, 1 - i);
+      e1 = EDGE_SUCC (bb2, 0);
+      e2 = EDGE_SUCC (bb2, 1);
+      if ((e1->flags & EDGE_TRUE_VALUE) == 0)
+	std::swap (e1, e2);
+    }
+  else
+    {
+      e1 = EDGE_SUCC (bb1, 1 - i);
+      em1 = EDGE_SUCC (bb2, 0);
+      e2 = EDGE_SUCC (bb2, 1);
+      if ((em1->flags & EDGE_TRUE_VALUE) == 0)
+	std::swap (em1, e2);
+    }
+
+  g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
+  tree lhs = make_ssa_name (integer_type_node);
+  gimple_call_set_lhs (g, lhs);
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+
+  gcond *cond = as_a <gcond *> (stmt);
+  gimple_cond_set_lhs (cond, lhs);
+  gimple_cond_set_rhs (cond, integer_zero_node);
+  update_stmt (stmt);
+
+  g = last_stmt (bb1);
+  cond = as_a <gcond *> (g);
+  gimple_cond_set_code (cond, EQ_EXPR);
+  gimple_cond_set_lhs (cond, lhs);
+  if (em1->src == bb1)
+    gimple_cond_set_rhs (cond, integer_minus_one_node);
+  else
+    {
+      gcc_assert (e1->src == bb1);
+      gimple_cond_set_rhs (cond, integer_one_node);
+    }
+  update_stmt (g);
+
+  g = last_stmt (bb2);
+  cond = as_a <gcond *> (g);
+  gimple_cond_set_lhs (cond, lhs);
+  if (em1->src == bb2)
+    gimple_cond_set_rhs (cond, integer_minus_one_node);
+  else
+    {
+      gcc_assert (e1->src == bb2);
+      gimple_cond_set_rhs (cond, integer_one_node);
+    }
+  gimple_cond_set_code (cond,
+			(e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
+  update_stmt (g);
+
+  wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
+  wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
+  set_range_info (lhs, VR_RANGE, wm1, w2);
+}
+
 
 /* Find integer multiplications where the operands are extended from
    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
@@ -4798,6 +4987,8 @@ math_opts_dom_walker::after_dom_children
 	      break;
 	    }
 	}
+      else if (gimple_code (stmt) == GIMPLE_COND)
+	optimize_spaceship (stmt);
       gsi_next (&gsi);
     }
   if (fma_state.m_deferring_p
--- gcc/config/i386/i386.md.jj	2022-01-14 11:51:34.432384170 +0100
+++ gcc/config/i386/i386.md	2022-01-14 18:22:41.140906449 +0100
@@ -23886,6 +23886,18 @@ (define_insn "hreset"
   [(set_attr "type" "other")
    (set_attr "length" "4")])
 
+;; Spaceship optimization
+(define_expand "spaceship<mode>3"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:MODEF 1 "cmp_fp_expander_operand")
+   (match_operand:MODEF 2 "cmp_fp_expander_operand")]
+  "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   && TARGET_CMOVE && TARGET_IEEE_FP"
+{
+  ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
+  DONE;
+})
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
--- gcc/config/i386/i386-protos.h.jj	2022-01-11 23:11:21.798298473 +0100
+++ gcc/config/i386/i386-protos.h	2022-01-14 17:57:53.570021792 +0100
@@ -150,6 +150,7 @@ extern bool ix86_expand_int_vec_cmp (rtx
 extern bool ix86_expand_fp_vec_cmp (rtx[]);
 extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
 extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
+extern void ix86_expand_fp_spaceship (rtx, rtx, rtx);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern rtx_insn *ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
 extern bool ix86_call_use_plt_p (rtx);
--- gcc/config/i386/i386-expand.c.jj	2022-01-14 11:51:34.429384213 +0100
+++ gcc/config/i386/i386-expand.c	2022-01-14 21:02:09.446437810 +0100
@@ -2879,6 +2879,46 @@ ix86_expand_setcc (rtx dest, enum rtx_co
   emit_insn (gen_rtx_SET (dest, ret));
 }
 
+/* Expand floating point op0 <=> op1 if NaNs are honored.  */
+
+void
+ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
+{
+  gcc_checking_assert (ix86_fp_comparison_strategy (GT) == IX86_FPCMP_COMI);
+  rtx gt = ix86_expand_fp_compare (GT, op0, op1);
+  rtx l0 = gen_label_rtx ();
+  rtx l1 = gen_label_rtx ();
+  rtx l2 = gen_label_rtx ();
+  rtx lend = gen_label_rtx ();
+  rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
+			   gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+  rtx tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
+				  gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
+  rtx_insn *jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+  rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
+			   gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
+			      gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
+  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  add_reg_br_prob_note (jmp, profile_probability::unlikely ());
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
+			      gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
+  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  add_reg_br_prob_note (jmp, profile_probability::even ());
+  emit_move_insn (dest, constm1_rtx);
+  emit_jump (lend);
+  emit_label (l0);
+  emit_move_insn (dest, const0_rtx);
+  emit_jump (lend);
+  emit_label (l1);
+  emit_move_insn (dest, const1_rtx);
+  emit_jump (lend);
+  emit_label (l2);
+  emit_move_insn (dest, const2_rtx);
+  emit_label (lend);
+}
+
 /* Expand comparison setting or clearing carry flag.  Return true when
    successful and set pop for the operation.  */
 static bool
--- gcc/doc/md.texi.jj	2022-01-13 22:29:15.329831975 +0100
+++ gcc/doc/md.texi	2022-01-14 21:07:11.533180235 +0100
@@ -8055,6 +8055,15 @@ inclusive and operand 1 exclusive.
 If this pattern is not defined, a call to the library function
 @code{__clear_cache} is used.
 
+@cindex @code{spaceship@var{m}3} instruction pattern
+@item @samp{spaceship@var{m}3}
+Initialize output operand 0 with mode of integer type to -1, 0, 1 or 2
+if operand 1 with mode @var{m} compares less than operand 2, equal to
+operand 2, greater than operand 2 or is unordered with operand 2.
+@var{m} should be a scalar floating point mode.
+
+This pattern is not allowed to @code{FAIL}.
+
 @end table
 
 @end ifset
--- gcc/testsuite/gcc.target/i386/pr103973-1.c.jj	2022-01-14 19:21:42.080770182 +0100
+++ gcc/testsuite/gcc.target/i386/pr103973-1.c	2022-01-14 20:52:22.764713273 +0100
@@ -0,0 +1,98 @@
+/* PR target/103973 */
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-final { scan-assembler-not "'\tucomisd" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\tcomisd" 4 { target { ! ia32 } } } } */
+
+__attribute__((noipa)) int m1 (void) { return -1; }
+__attribute__((noipa)) int p0 (void) { return 0; }
+__attribute__((noipa)) int p1 (void) { return 1; }
+__attribute__((noipa)) int p2 (void) { return 2; }
+
+__attribute__((noipa)) int
+foo (double a, double b)
+{
+  if (a == b)
+    return 0;
+  if (a < b)
+    return -1;
+  if (a > b)
+    return 1;
+  return 2;
+}
+
+__attribute__((noipa)) int
+bar (double a, double b)
+{
+  if (a == b)
+    return p0 ();
+  if (a < b)
+    return m1 ();
+  if (a > b)
+    return p1 ();
+  return p2 ();
+}
+
+__attribute__((noipa)) int
+baz (double a, double b)
+{
+  if (a == b)
+    return p0 ();
+  if (b < a)
+    return p1 ();
+  if (a < b)
+    return m1 ();
+  return p2 ();
+}
+
+__attribute__((noipa)) int
+qux (double a)
+{
+  if (a != 0.0f)
+    {
+      if (a <= 0.0f)
+	return -1;
+      if (a >= 0.0f)
+	return 1;
+      return 2;
+    }
+  return 0;
+}
+
+int
+main ()
+{
+  double m5 = -5.0f;
+  double p5 = 5.0f;
+  volatile double p0 = 0.0f;
+  double nan = p0 / p0;
+  if (foo (p5, p5) != 0 || foo (m5, m5) != 0)
+    __builtin_abort ();
+  if (foo (m5, p5) != -1 || foo (p5, m5) != 1)
+    __builtin_abort ();
+  if (foo (m5, nan) != 2 || foo (nan, p5) != 2)
+    __builtin_abort ();
+  if (foo (nan, nan) != 2)
+    __builtin_abort ();
+  if (bar (p5, p5) != 0 || bar (m5, m5) != 0)
+    __builtin_abort ();
+  if (bar (m5, p5) != -1 || bar (p5, m5) != 1)
+    __builtin_abort ();
+  if (bar (m5, nan) != 2 || bar (nan, p5) != 2)
+    __builtin_abort ();
+  if (bar (nan, nan) != 2)
+    __builtin_abort ();
+  if (baz (p5, p5) != 0 || baz (m5, m5) != 0)
+    __builtin_abort ();
+  if (baz (m5, p5) != -1 || baz (p5, m5) != 1)
+    __builtin_abort ();
+  if (baz (m5, nan) != 2 || baz (nan, p5) != 2)
+    __builtin_abort ();
+  if (baz (nan, nan) != 2)
+    __builtin_abort ();
+  if (qux (p0) != 0 || qux (nan) != 2)
+    __builtin_abort ();
+  if (qux (m5) != -1 || qux (p5) != 1)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/pr103973-2.c.jj	2022-01-14 19:21:50.766647425 +0100
+++ gcc/testsuite/gcc.target/i386/pr103973-2.c	2022-01-14 20:43:40.230085717 +0100
@@ -0,0 +1,7 @@
+/* PR target/103973 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=i686" } */
+/* { dg-final { scan-assembler-not "'\tfucom" } } */
+/* { dg-final { scan-assembler-times "\tfcom" 4 } } */
+
+#include "pr103973-1.c"
--- gcc/testsuite/gcc.target/i386/pr103973-3.c.jj	2022-01-14 19:23:15.645447824 +0100
+++ gcc/testsuite/gcc.target/i386/pr103973-3.c	2022-01-14 20:43:46.415998435 +0100
@@ -0,0 +1,8 @@
+/* PR target/103973 */
+/* { dg-do run } */
+/* { dg-options "-O2 -save-temps" } */
+/* { dg-final { scan-assembler-not "'\tucomiss" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\tcomiss" 4 { target { ! ia32 } } } } */
+
+#define double float
+#include "pr103973-1.c"
--- gcc/testsuite/gcc.target/i386/pr103973-4.c.jj	2022-01-14 19:25:15.622752173 +0100
+++ gcc/testsuite/gcc.target/i386/pr103973-4.c	2022-01-14 20:43:53.518898224 +0100
@@ -0,0 +1,8 @@
+/* PR target/103973 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=i686" } */
+/* { dg-final { scan-assembler-not "'\tfucom" } } */
+/* { dg-final { scan-assembler-times "\tfcom" 4 } } */
+
+#define double float
+#include "pr103973-1.c"
--- gcc/testsuite/g++.target/i386/pr103973-1.C.jj	2022-01-14 20:44:35.225309786 +0100
+++ gcc/testsuite/g++.target/i386/pr103973-1.C	2022-01-14 20:55:04.611429778 +0100
@@ -0,0 +1,71 @@
+// PR target/103973
+// { dg-do run }
+// { dg-options "-O2 -std=c++20 -save-temps" }
+// { dg-final { scan-assembler-not "'\tucomisd" { target { ! ia32 } } } }
+// { dg-final { scan-assembler-times "\tcomisd" 2 { target { ! ia32 } } } }
+
+#include <compare>
+
+#ifndef double_type
+#define double_type double
+#endif
+
+__attribute__((noipa)) auto
+foo (double_type a, double_type b)
+{
+  return a <=> b;
+}
+
+__attribute__((noipa)) int
+bar (double_type a, double_type b)
+{
+  auto c = foo (a, b);
+  if (c == std::partial_ordering::less)
+    return -1;
+  if (c == std::partial_ordering::equivalent)
+    return 0;
+  if (c == std::partial_ordering::greater)
+    return 1;
+  return 2;
+}
+
+__attribute__((noipa)) auto
+baz (double_type a)
+{
+  return a <=> 0.0f;
+}
+
+__attribute__((noipa)) int
+qux (double_type a)
+{
+  auto c = baz (a);
+  if (c == std::partial_ordering::less)
+    return -1;
+  if (c == std::partial_ordering::equivalent)
+    return 0;
+  if (c == std::partial_ordering::greater)
+    return 1;
+  return 2;
+}
+
+int
+main ()
+{
+  double_type m5 = -5.0;
+  double_type p5 = 5.0;
+  volatile double_type p0 = 0.0;
+  double_type nan = p0 / p0;
+  if (bar (p5, p5) != 0 || bar (m5, m5) != 0)
+    __builtin_abort ();
+  if (bar (m5, p5) != -1 || bar (p5, m5) != 1)
+    __builtin_abort ();
+  if (bar (m5, nan) != 2 || bar (nan, p5) != 2)
+    __builtin_abort ();
+  if (bar (nan, nan) != 2)
+    __builtin_abort ();
+  if (qux (p0) != 0 || qux (nan) != 2)
+    __builtin_abort ();
+  if (qux (m5) != -1 || qux (p5) != 1)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/g++.target/i386/pr103973-2.C.jj	2022-01-14 20:52:50.309324647 +0100
+++ gcc/testsuite/g++.target/i386/pr103973-2.C	2022-01-14 20:55:41.967902715 +0100
@@ -0,0 +1,7 @@
+// PR target/103973
+// { dg-do compile { target ia32 } }
+// { dg-options "-O2 -march=i686 -std=c++20" }
+// { dg-final { scan-assembler-not "'\tfucom" } }
+// { dg-final { scan-assembler-times "\tfcom" 2 } }
+
+#include "pr103973-1.C"
--- gcc/testsuite/g++.target/i386/pr103973-3.C.jj	2022-01-14 20:52:53.212283686 +0100
+++ gcc/testsuite/g++.target/i386/pr103973-3.C	2022-01-14 20:55:49.052802755 +0100
@@ -0,0 +1,8 @@
+// PR target/103973
+// { dg-do run }
+// { dg-options "-O2 -save-temps -std=c++20" }
+// { dg-final { scan-assembler-not "'\tucomiss" { target { ! ia32 } } } }
+// { dg-final { scan-assembler-times "\tcomiss" 2 { target { ! ia32 } } } }
+
+#define double_type float
+#include "pr103973-1.C"
--- gcc/testsuite/g++.target/i386/pr103973-4.C.jj	2022-01-14 20:52:55.951245044 +0100
+++ gcc/testsuite/g++.target/i386/pr103973-4.C	2022-01-14 20:55:55.723708635 +0100
@@ -0,0 +1,8 @@
+// PR target/103973
+// { dg-do compile { target ia32 } }
+// { dg-options "-O2 -march=i686 -std=c++20" }
+// { dg-final { scan-assembler-not "'\tfucom" } }
+// { dg-final { scan-assembler-times "\tfcom" 2 } }
+
+#define double_type float
+#include "pr103973-1.C"

	Jakub


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-01-17 12:36 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-14 22:56 [PATCH] widening_mul, i386: Improve spaceship expansion on x86 [PR103973] Jakub Jelinek
2022-01-15  8:29 ` Uros Bizjak
2022-01-15  9:56   ` Jakub Jelinek
2022-01-15 10:42     ` Uros Bizjak
2022-01-15 11:22       ` [PATCH] widening_mul, i386, v2: " Jakub Jelinek
2022-01-15 16:40         ` Uros Bizjak
2022-01-17 12:04         ` Richard Biener
2022-01-17 12:36           ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).