public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Fix _mm_[u]comixx_{ss,sd} codegen and add PF result. [PR106113]
       [not found] <20220714060206.1898798-1-lingling.kong@intel.com>
@ 2022-07-14  6:10 ` Kong, Lingling
  2022-07-15  2:09   ` Hongtao Liu
  0 siblings, 1 reply; 2+ messages in thread
From: Kong, Lingling @ 2022-07-14  6:10 UTC (permalink / raw)
  To: gcc-patches, Liu, Hongtao

Hi,

The patch is to fix _mm_[u]comixx_{ss,sd} codegen and add PF result.  These intrinsics have changed over time, like `_mm_comieq_ss ` old operation is `RETURN ( a[31:0] == b[31:0] ) ? 1 : 0`, and new operation update is `RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0`.

OK for master?

gcc/ChangeLog:

	PR target/106113
	* config/i386/i386-builtin.def (BDESC): Fix [u]comi{ss,sd}
	comparison due to intrinsics changed over time.
	* config/i386/i386-expand.cc (ix86_ssecom_setcc):
	Add unordered check and mode for sse comi codegen.
	(ix86_expand_sse_comi): Add unordered check and check a different
	CCmode.
	(ix86_expand_sse_comi_round):Extract unordered check and mode part
	in ix86_ssecom_setcc.

gcc/testsuite/ChangeLog:

	PR target/106113
	* gcc.target/i386/avx-vcomisd-pr106113-2.c: New test.
	* gcc.target/i386/avx-vcomiss-pr106113-2.c: Ditto.
	* gcc.target/i386/avx-vucomisd-pr106113-2.c: Ditto.
	* gcc.target/i386/avx-vucomiss-pr106113-2.c: Ditto.
	* gcc.target/i386/sse-comiss-pr106113-1.c: Ditto.
	* gcc.target/i386/sse-comiss-pr106113-2.c: Ditto.
	* gcc.target/i386/sse-ucomiss-pr106113-1.c: Ditto.
	* gcc.target/i386/sse-ucomiss-pr106113-2.c: Ditto.
	* gcc.target/i386/sse2-comisd-pr106113-1.c: Ditto.
	* gcc.target/i386/sse2-comisd-pr106113-2.c: Ditto.
	* gcc.target/i386/sse2-ucomisd-pr106113-1.c: Ditto.
	* gcc.target/i386/sse2-ucomisd-pr106113-2.c: Ditto.
---
 gcc/config/i386/i386-builtin.def              |  32 ++--
 gcc/config/i386/i386-expand.cc                | 140 +++++++++++-------
 .../gcc.target/i386/avx-vcomisd-pr106113-2.c  |   8 +
 .../gcc.target/i386/avx-vcomiss-pr106113-2.c  |   8 +
 .../gcc.target/i386/avx-vucomisd-pr106113-2.c |   8 +
 .../gcc.target/i386/avx-vucomiss-pr106113-2.c |   8 +
 .../gcc.target/i386/sse-comiss-pr106113-1.c   |  19 +++
 .../gcc.target/i386/sse-comiss-pr106113-2.c   |  59 ++++++++
 .../gcc.target/i386/sse-ucomiss-pr106113-1.c  |  19 +++
 .../gcc.target/i386/sse-ucomiss-pr106113-2.c  |  59 ++++++++
 .../gcc.target/i386/sse2-comisd-pr106113-1.c  |  19 +++
 .../gcc.target/i386/sse2-comisd-pr106113-2.c  |  59 ++++++++
 .../gcc.target/i386/sse2-ucomisd-pr106113-1.c |  19 +++
 .../gcc.target/i386/sse2-ucomisd-pr106113-2.c |  59 ++++++++
 14 files changed, 450 insertions(+), 66 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index fd160935e67..acb7e8ca64b 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -35,30 +35,30 @@
         IX86_BUILTIN__BDESC_##NEXT_KIND##_FIRST - 1.  */
 
 BDESC_FIRST (comi, COMI,
-       OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0)
+       OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0)
-BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0)
 BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0)
-BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0)
 
 BDESC_END (COMI, PCMPESTR)
 
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6a3fcde5738..40f821e7a11 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -9770,47 +9770,121 @@ ix86_expand_sse_compare (const struct builtin_description *d,
   return target;
 }
 
+/* Subroutine of ix86_sse_comi and ix86_sse_comi_round to take care of
+ * ordered EQ or unordered NE, generate PF jump.  */
+
+static rtx
+ix86_ssecom_setcc (const enum rtx_code comparison,
+		   bool check_unordered, machine_mode mode,
+		   rtx set_dst, rtx target)
+{
+
+  rtx_code_label *label = NULL;
+
+  /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
+     with NAN operands.  */
+  if (check_unordered)
+    {
+      gcc_assert (comparison == EQ || comparison == NE);
+
+      rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
+      label = gen_label_rtx ();
+      rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+				  gen_rtx_LABEL_REF (VOIDmode, label),
+				  pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+    }
+
+  /* NB: Set CCFPmode and check a different CCmode which is in subset
+     of CCFPmode.  */
+  if (GET_MODE (set_dst) != mode)
+    {
+      gcc_assert (mode == CCAmode || mode == CCCmode
+		  || mode == CCOmode || mode == CCPmode
+		  || mode == CCSmode || mode == CCZmode);
+      set_dst = gen_rtx_REG (mode, FLAGS_REG);
+    }
+
+  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+			  gen_rtx_fmt_ee (comparison, QImode,
+					  set_dst,
+					  const0_rtx)));
+
+  if (label)
+    emit_label (label);
+
+  return SUBREG_REG (target);
+}
+
 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
 
 static rtx
 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
 		      rtx target)
 {
-  rtx pat;
+  rtx pat, set_dst;
   tree arg0 = CALL_EXPR_ARG (exp, 0);
   tree arg1 = CALL_EXPR_ARG (exp, 1);
   rtx op0 = expand_normal (arg0);
   rtx op1 = expand_normal (arg1);
-  machine_mode mode0 = insn_data[d->icode].operand[0].mode;
-  machine_mode mode1 = insn_data[d->icode].operand[1].mode;
-  enum rtx_code comparison = d->comparison;
+  enum insn_code icode = d->icode;
+  const struct insn_data_d *insn_p = &insn_data[icode];
+  machine_mode mode0 = insn_p->operand[0].mode;
+  machine_mode mode1 = insn_p->operand[1].mode;
 
   if (VECTOR_MODE_P (mode0))
     op0 = safe_vector_operand (op0, mode0);
   if (VECTOR_MODE_P (mode1))
     op1 = safe_vector_operand (op1, mode1);
 
+  enum rtx_code comparison = d->comparison;
+  rtx const_val = const0_rtx;
+
+  bool check_unordered = false;
+  machine_mode mode = CCFPmode;
+  switch (comparison)
+    {
+    case LE:	/* -> GE  */
+    case LT:	/* -> GT  */
+      std::swap (op0, op1);
+      comparison = swap_condition (comparison);
+      /* FALLTHRU */
+    case GT:
+    case GE:
+      break;
+    case EQ:
+      check_unordered = true;
+      mode = CCZmode;
+      break;
+    case NE:
+      check_unordered = true;
+      mode = CCZmode;
+      const_val = const1_rtx;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
   target = gen_reg_rtx (SImode);
-  emit_move_insn (target, const0_rtx);
+  emit_move_insn (target, const_val);
   target = gen_rtx_SUBREG (QImode, target, 0);
 
   if ((optimize && !register_operand (op0, mode0))
-      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+      || !insn_p->operand[0].predicate (op0, mode0))
     op0 = copy_to_mode_reg (mode0, op0);
   if ((optimize && !register_operand (op1, mode1))
-      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+      || !insn_p->operand[1].predicate (op1, mode1))
     op1 = copy_to_mode_reg (mode1, op1);
 
-  pat = GEN_FCN (d->icode) (op0, op1);
+  pat = GEN_FCN (icode) (op0, op1);
   if (! pat)
     return 0;
-  emit_insn (pat);
-  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
-			  gen_rtx_fmt_ee (comparison, QImode,
-					  SET_DEST (pat),
-					  const0_rtx)));
 
-  return SUBREG_REG (target);
+  set_dst = SET_DEST (pat);
+  emit_insn (pat);
+  return ix86_ssecom_setcc (comparison, check_unordered, mode,
+			    set_dst, target);
 }
 
 /* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
@@ -11410,42 +11484,8 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
 
   emit_insn (pat);
 
-  rtx_code_label *label = NULL;
-
-  /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
-     with NAN operands.  */
-  if (check_unordered)
-    {
-      gcc_assert (comparison == EQ || comparison == NE);
-
-      rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
-      label = gen_label_rtx ();
-      rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
-      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-				  gen_rtx_LABEL_REF (VOIDmode, label),
-				  pc_rtx);
-      emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-    }
-
-  /* NB: Set CCFPmode and check a different CCmode which is in subset
-     of CCFPmode.  */
-  if (GET_MODE (set_dst) != mode)
-    {
-      gcc_assert (mode == CCAmode || mode == CCCmode
-		  || mode == CCOmode || mode == CCPmode
-		  || mode == CCSmode || mode == CCZmode);
-      set_dst = gen_rtx_REG (mode, FLAGS_REG);
-    }
-
-  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
-			  gen_rtx_fmt_ee (comparison, QImode,
-					  set_dst,
-					  const0_rtx)));
-
-  if (label)
-    emit_label (label);
-
-  return SUBREG_REG (target);
+  return ix86_ssecom_setcc (comparison, check_unordered, mode,
+			    set_dst, target);
 }
 
 static rtx
diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
new file mode 100644
index 00000000000..9025b1b57b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-comisd-pr106113-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
new file mode 100644
index 00000000000..dc0bf514069
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse-comiss-pr106113-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
new file mode 100644
index 00000000000..3b0c5db2332
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse2-ucomisd-pr106113-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
new file mode 100644
index 00000000000..d67e4adffeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse-ucomiss-pr106113-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
new file mode 100644
index 00000000000..95621029bf6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-msse -O2" } */
+/* { dg-final { scan-assembler-times "comiss\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
+/* { dg-final { scan-assembler-times "jp" 2 } } */
+#include <xmmintrin.h>
+
+volatile __m128 x1, x2;
+volatile int res;
+
+void extern
+sse_comi_test (void)
+{
+  res = _mm_comieq_ss (x1, x2);
+  res = _mm_comilt_ss (x1, x2);
+  res = _mm_comile_ss (x1, x2);
+  res = _mm_comigt_ss (x1, x2);
+  res = _mm_comige_ss (x1, x2);
+  res = _mm_comineq_ss (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
new file mode 100644
index 00000000000..a90f3337034
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse" } */
+/* { dg-require-effective-target sse } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse_test
+#endif
+
+#include CHECK_H
+
+#include <xmmintrin.h>
+
+#define CMP(PRED, EXP) \
+      res = _mm_comi##PRED##_ss (__A, __B);           \
+        if (res != EXP)                               \
+	    abort ();
+static void 
+__attribute__((noinline, unused))
+do_check (float s1, float s2)
+{
+  __m128 __A = _mm_load_ss (&s1);
+  __m128 __B = _mm_load_ss (&s2);
+  int res;
+  
+  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
+  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
+  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
+  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
+  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
+  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
+}
+
+static void
+TEST (void)
+{
+  struct
+    {
+      float x1;
+      float x2;
+    }
+  inputs[] =
+    {
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nanf (""), -5.8 },
+      { -4.8, __builtin_nansf ("") },
+      { 3.8, __builtin_nansf ("") },
+      { 4.2, 4.2 },
+      { __builtin_nanf (""), __builtin_nansf ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
new file mode 100644
index 00000000000..e337e11a557
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-msse -O2" } */
+/* { dg-final { scan-assembler-times "ucomiss\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
+/* { dg-final { scan-assembler-times "jp" 2 } } */
+#include <xmmintrin.h>
+
+volatile __m128 x1, x2;
+volatile int res;
+
+void extern
+sse_ucomi_test (void)
+{
+  res = _mm_ucomieq_ss (x1, x2);
+  res = _mm_ucomilt_ss (x1, x2);
+  res = _mm_ucomile_ss (x1, x2);
+  res = _mm_ucomigt_ss (x1, x2);
+  res = _mm_ucomige_ss (x1, x2);
+  res = _mm_ucomineq_ss (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
new file mode 100644
index 00000000000..37d845025c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse" } */
+/* { dg-require-effective-target sse } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse_test
+#endif
+
+#include CHECK_H
+
+#include <xmmintrin.h>
+
+#define CMP(PRED, EXP) \
+      res = _mm_ucomi##PRED##_ss (__A, __B);           \
+        if (res != EXP)                               \
+	    abort ();
+static void 
+__attribute__((noinline, unused))
+do_check (float s1, float s2)
+{
+  __m128 __A = _mm_load_ss (&s1);
+  __m128 __B = _mm_load_ss (&s2);
+  int res;
+  
+  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
+  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
+  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
+  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
+  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
+  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
+}
+
+static void
+TEST (void)
+{
+  struct
+    {
+      float x1;
+      float x2;
+    }
+  inputs[] =
+    {
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nanf (""), -5.8 },
+      { -4.8, __builtin_nansf ("") },
+      { 3.8, __builtin_nansf ("") },
+      { 4.2, 4.2 },
+      { __builtin_nanf (""), __builtin_nansf ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
new file mode 100644
index 00000000000..6268977d268
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+/* { dg-final { scan-assembler-times "comisd\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
+/* { dg-final { scan-assembler-times "jp" 2 } } */
+#include <xmmintrin.h>
+
+volatile __m128d x1, x2;
+volatile int res;
+
+void extern
+sse2_comisd_test (void)
+{
+  res = _mm_comieq_sd (x1, x2);
+  res = _mm_comilt_sd (x1, x2);
+  res = _mm_comile_sd (x1, x2);
+  res = _mm_comigt_sd (x1, x2);
+  res = _mm_comige_sd (x1, x2);
+  res = _mm_comineq_sd (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
new file mode 100644
index 00000000000..f49771c9212
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+
+#define CMP(PRED, EXP) \
+      res = _mm_comi##PRED##_sd (__A, __B);           \
+        if (res != EXP)                               \
+	    abort ();
+static void 
+__attribute__((noinline, unused))
+do_check (double s1, double s2)
+{
+  __m128d __A = _mm_load_sd (&s1);
+  __m128d __B = _mm_load_sd (&s2);
+  int res;
+  
+  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
+  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
+  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
+  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
+  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
+  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
+}
+
+static void
+TEST (void)
+{
+  struct
+    {
+      double x1;
+      double x2;
+    }
+  inputs[] =
+    {
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nan (""), -5.8 },
+      { -4.8, __builtin_nans ("") },
+      { 3.8, __builtin_nans ("") },
+      { 4.2, 4.2 },
+      { __builtin_nan (""), __builtin_nans ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
new file mode 100644
index 00000000000..e64c0ace0cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-msse2 -O2" } */
+/* { dg-final { scan-assembler-times "ucomisd\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
+/* { dg-final { scan-assembler-times "jp" 2 } } */
+#include <xmmintrin.h>
+
+volatile __m128d x1, x2;
+volatile int res;
+
+void extern
+sse2_ucomisd_test (void)
+{
+  res = _mm_ucomieq_sd (x1, x2);
+  res = _mm_ucomilt_sd (x1, x2);
+  res = _mm_ucomile_sd (x1, x2);
+  res = _mm_ucomigt_sd (x1, x2);
+  res = _mm_ucomige_sd (x1, x2);
+  res = _mm_ucomineq_sd (x1, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c
new file mode 100644
index 00000000000..606a8971c26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+#include <emmintrin.h>
+
+#define CMP(PRED, EXP) \
+      res = _mm_ucomi##PRED##_sd (__A, __B);           \
+        if (res != EXP)                               \
+	    abort ();
+static void 
+__attribute__((noinline, unused))
+do_check (double s1, double s2)
+{
+  __m128d __A = _mm_load_sd (&s1);
+  __m128d __B = _mm_load_sd (&s2);
+  int res;
+  
+  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
+  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
+  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
+  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
+  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
+  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
+}
+
+static void
+TEST (void)
+{
+  struct
+    {
+      double x1;
+      double x2;
+    }
+  inputs[] =
+    {
+      { 4.3, 2.18 },
+      { -4.3, 3.18 },
+      { __builtin_nan (""), -5.8 },
+      { -4.8, __builtin_nans ("") },
+      { 3.8, __builtin_nans ("") },
+      { 4.2, 4.2 },
+      { __builtin_nan (""), __builtin_nans ("") },
+    };
+  int i;
+
+  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
+    do_check (inputs[i].x1, inputs[i].x2);
+}
-- 
2.18.2


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] i386: Fix _mm_[u]comixx_{ss,sd} codegen and add PF result. [PR106113]
  2022-07-14  6:10 ` [PATCH] i386: Fix _mm_[u]comixx_{ss,sd} codegen and add PF result. [PR106113] Kong, Lingling
@ 2022-07-15  2:09   ` Hongtao Liu
  0 siblings, 0 replies; 2+ messages in thread
From: Hongtao Liu @ 2022-07-15  2:09 UTC (permalink / raw)
  To: Kong, Lingling; +Cc: gcc-patches, Liu, Hongtao

On Thu, Jul 14, 2022 at 2:11 PM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> The patch is to fix _mm_[u]comixx_{ss,sd} codegen and add PF result.  These intrinsics have changed over time, like `_mm_comieq_ss ` old operation is `RETURN ( a[31:0] == b[31:0] ) ? 1 : 0`, and new operation update is `RETURN ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0`.
>
> OK for master?
All _mm_comiXX_ss uses order_compare except for mm_comine_ss which
uses unordered_compare, now it's aligned with intrinsic guide.
Ok for trunk.
>
> gcc/ChangeLog:
>
>         PR target/106113
>         * config/i386/i386-builtin.def (BDESC): Fix [u]comi{ss,sd}
>         comparison due to intrinsics changed over time.
>         * config/i386/i386-expand.cc (ix86_ssecom_setcc):
>         Add unordered check and mode for sse comi codegen.
>         (ix86_expand_sse_comi): Add unordered check and check a different
>         CCmode.
>         (ix86_expand_sse_comi_round):Extract unordered check and mode part
>         in ix86_ssecom_setcc.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/106113
>         * gcc.target/i386/avx-vcomisd-pr106113-2.c: New test.
>         * gcc.target/i386/avx-vcomiss-pr106113-2.c: Ditto.
>         * gcc.target/i386/avx-vucomisd-pr106113-2.c: Ditto.
>         * gcc.target/i386/avx-vucomiss-pr106113-2.c: Ditto.
>         * gcc.target/i386/sse-comiss-pr106113-1.c: Ditto.
>         * gcc.target/i386/sse-comiss-pr106113-2.c: Ditto.
>         * gcc.target/i386/sse-ucomiss-pr106113-1.c: Ditto.
>         * gcc.target/i386/sse-ucomiss-pr106113-2.c: Ditto.
>         * gcc.target/i386/sse2-comisd-pr106113-1.c: Ditto.
>         * gcc.target/i386/sse2-comisd-pr106113-2.c: Ditto.
>         * gcc.target/i386/sse2-ucomisd-pr106113-1.c: Ditto.
>         * gcc.target/i386/sse2-ucomisd-pr106113-2.c: Ditto.
> ---
>  gcc/config/i386/i386-builtin.def              |  32 ++--
>  gcc/config/i386/i386-expand.cc                | 140 +++++++++++-------
>  .../gcc.target/i386/avx-vcomisd-pr106113-2.c  |   8 +
>  .../gcc.target/i386/avx-vcomiss-pr106113-2.c  |   8 +
>  .../gcc.target/i386/avx-vucomisd-pr106113-2.c |   8 +
>  .../gcc.target/i386/avx-vucomiss-pr106113-2.c |   8 +
>  .../gcc.target/i386/sse-comiss-pr106113-1.c   |  19 +++
>  .../gcc.target/i386/sse-comiss-pr106113-2.c   |  59 ++++++++
>  .../gcc.target/i386/sse-ucomiss-pr106113-1.c  |  19 +++
>  .../gcc.target/i386/sse-ucomiss-pr106113-2.c  |  59 ++++++++
>  .../gcc.target/i386/sse2-comisd-pr106113-1.c  |  19 +++
>  .../gcc.target/i386/sse2-comisd-pr106113-2.c  |  59 ++++++++
>  .../gcc.target/i386/sse2-ucomisd-pr106113-1.c |  19 +++
>  .../gcc.target/i386/sse2-ucomisd-pr106113-2.c |  59 ++++++++
>  14 files changed, 450 insertions(+), 66 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index fd160935e67..acb7e8ca64b 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -35,30 +35,30 @@
>          IX86_BUILTIN__BDESC_##NEXT_KIND##_FIRST - 1.  */
>
>  BDESC_FIRST (comi, COMI,
> -       OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0)
> +       OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0)
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0)
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0)
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0)
>  BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0)
> -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0)
> +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0)
>  BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0)
>  BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0)
>  BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0)
>  BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0)
> -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0)
> +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0)
>
>  BDESC_END (COMI, PCMPESTR)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 6a3fcde5738..40f821e7a11 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -9770,47 +9770,121 @@ ix86_expand_sse_compare (const struct builtin_description *d,
>    return target;
>  }
>
> +/* Subroutine of ix86_sse_comi and ix86_sse_comi_round to take care of
> + * ordered EQ or unordered NE, generate PF jump.  */
> +
> +static rtx
> +ix86_ssecom_setcc (const enum rtx_code comparison,
> +                  bool check_unordered, machine_mode mode,
> +                  rtx set_dst, rtx target)
> +{
> +
> +  rtx_code_label *label = NULL;
> +
> +  /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
> +     with NAN operands.  */
> +  if (check_unordered)
> +    {
> +      gcc_assert (comparison == EQ || comparison == NE);
> +
> +      rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
> +      label = gen_label_rtx ();
> +      rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
> +      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
> +                                 gen_rtx_LABEL_REF (VOIDmode, label),
> +                                 pc_rtx);
> +      emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> +    }
> +
> +  /* NB: Set CCFPmode and check a different CCmode which is in subset
> +     of CCFPmode.  */
> +  if (GET_MODE (set_dst) != mode)
> +    {
> +      gcc_assert (mode == CCAmode || mode == CCCmode
> +                 || mode == CCOmode || mode == CCPmode
> +                 || mode == CCSmode || mode == CCZmode);
> +      set_dst = gen_rtx_REG (mode, FLAGS_REG);
> +    }
> +
> +  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
> +                         gen_rtx_fmt_ee (comparison, QImode,
> +                                         set_dst,
> +                                         const0_rtx)));
> +
> +  if (label)
> +    emit_label (label);
> +
> +  return SUBREG_REG (target);
> +}
> +
>  /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
>
>  static rtx
>  ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
>                       rtx target)
>  {
> -  rtx pat;
> +  rtx pat, set_dst;
>    tree arg0 = CALL_EXPR_ARG (exp, 0);
>    tree arg1 = CALL_EXPR_ARG (exp, 1);
>    rtx op0 = expand_normal (arg0);
>    rtx op1 = expand_normal (arg1);
> -  machine_mode mode0 = insn_data[d->icode].operand[0].mode;
> -  machine_mode mode1 = insn_data[d->icode].operand[1].mode;
> -  enum rtx_code comparison = d->comparison;
> +  enum insn_code icode = d->icode;
> +  const struct insn_data_d *insn_p = &insn_data[icode];
> +  machine_mode mode0 = insn_p->operand[0].mode;
> +  machine_mode mode1 = insn_p->operand[1].mode;
>
>    if (VECTOR_MODE_P (mode0))
>      op0 = safe_vector_operand (op0, mode0);
>    if (VECTOR_MODE_P (mode1))
>      op1 = safe_vector_operand (op1, mode1);
>
> +  enum rtx_code comparison = d->comparison;
> +  rtx const_val = const0_rtx;
> +
> +  bool check_unordered = false;
> +  machine_mode mode = CCFPmode;
> +  switch (comparison)
> +    {
> +    case LE:   /* -> GE  */
> +    case LT:   /* -> GT  */
> +      std::swap (op0, op1);
> +      comparison = swap_condition (comparison);
> +      /* FALLTHRU */
> +    case GT:
> +    case GE:
> +      break;
> +    case EQ:
> +      check_unordered = true;
> +      mode = CCZmode;
> +      break;
> +    case NE:
> +      check_unordered = true;
> +      mode = CCZmode;
> +      const_val = const1_rtx;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
>    target = gen_reg_rtx (SImode);
> -  emit_move_insn (target, const0_rtx);
> +  emit_move_insn (target, const_val);
>    target = gen_rtx_SUBREG (QImode, target, 0);
>
>    if ((optimize && !register_operand (op0, mode0))
> -      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
> +      || !insn_p->operand[0].predicate (op0, mode0))
>      op0 = copy_to_mode_reg (mode0, op0);
>    if ((optimize && !register_operand (op1, mode1))
> -      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
> +      || !insn_p->operand[1].predicate (op1, mode1))
>      op1 = copy_to_mode_reg (mode1, op1);
>
> -  pat = GEN_FCN (d->icode) (op0, op1);
> +  pat = GEN_FCN (icode) (op0, op1);
>    if (! pat)
>      return 0;
> -  emit_insn (pat);
> -  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
> -                         gen_rtx_fmt_ee (comparison, QImode,
> -                                         SET_DEST (pat),
> -                                         const0_rtx)));
>
> -  return SUBREG_REG (target);
> +  set_dst = SET_DEST (pat);
> +  emit_insn (pat);
> +  return ix86_ssecom_setcc (comparison, check_unordered, mode,
> +                           set_dst, target);
>  }
>
>  /* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
> @@ -11410,42 +11484,8 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
>
>    emit_insn (pat);
>
> -  rtx_code_label *label = NULL;
> -
> -  /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
> -     with NAN operands.  */
> -  if (check_unordered)
> -    {
> -      gcc_assert (comparison == EQ || comparison == NE);
> -
> -      rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
> -      label = gen_label_rtx ();
> -      rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
> -      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
> -                                 gen_rtx_LABEL_REF (VOIDmode, label),
> -                                 pc_rtx);
> -      emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
> -    }
> -
> -  /* NB: Set CCFPmode and check a different CCmode which is in subset
> -     of CCFPmode.  */
> -  if (GET_MODE (set_dst) != mode)
> -    {
> -      gcc_assert (mode == CCAmode || mode == CCCmode
> -                 || mode == CCOmode || mode == CCPmode
> -                 || mode == CCSmode || mode == CCZmode);
> -      set_dst = gen_rtx_REG (mode, FLAGS_REG);
> -    }
> -
> -  emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
> -                         gen_rtx_fmt_ee (comparison, QImode,
> -                                         set_dst,
> -                                         const0_rtx)));
> -
> -  if (label)
> -    emit_label (label);
> -
> -  return SUBREG_REG (target);
> +  return ix86_ssecom_setcc (comparison, check_unordered, mode,
> +                           set_dst, target);
>  }
>
>  static rtx
> diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
> new file mode 100644
> index 00000000000..9025b1b57b6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O2 -mavx" } */
> +
> +#define CHECK_H "avx-check.h"
> +#define TEST avx_test
> +
> +#include "sse2-comisd-pr106113-2.c"
> diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
> new file mode 100644
> index 00000000000..dc0bf514069
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O2 -mavx" } */
> +
> +#define CHECK_H "avx-check.h"
> +#define TEST avx_test
> +
> +#include "sse-comiss-pr106113-2.c"
> diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
> new file mode 100644
> index 00000000000..3b0c5db2332
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O2 -mavx" } */
> +
> +#define CHECK_H "avx-check.h"
> +#define TEST avx_test
> +
> +#include "sse2-ucomisd-pr106113-2.c"
> diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
> new file mode 100644
> index 00000000000..d67e4adffeb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O2 -mavx" } */
> +
> +#define CHECK_H "avx-check.h"
> +#define TEST avx_test
> +
> +#include "sse-ucomiss-pr106113-2.c"
> diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
> new file mode 100644
> index 00000000000..95621029bf6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse -O2" } */
> +/* { dg-final { scan-assembler-times "comiss\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
> +/* { dg-final { scan-assembler-times "jp" 2 } } */
> +#include <xmmintrin.h>
> +
> +volatile __m128 x1, x2;
> +volatile int res;
> +
> +void extern
> +sse_comi_test (void)
> +{
> +  res = _mm_comieq_ss (x1, x2);
> +  res = _mm_comilt_ss (x1, x2);
> +  res = _mm_comile_ss (x1, x2);
> +  res = _mm_comigt_ss (x1, x2);
> +  res = _mm_comige_ss (x1, x2);
> +  res = _mm_comineq_ss (x1, x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
> new file mode 100644
> index 00000000000..a90f3337034
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -msse" } */
> +/* { dg-require-effective-target sse } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <xmmintrin.h>
> +
> +#define CMP(PRED, EXP) \
> +      res = _mm_comi##PRED##_ss (__A, __B);           \
> +        if (res != EXP)                               \
> +           abort ();
> +static void
> +__attribute__((noinline, unused))
> +do_check (float s1, float s2)
> +{
> +  __m128 __A = _mm_load_ss (&s1);
> +  __m128 __B = _mm_load_ss (&s2);
> +  int res;
> +
> +  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
> +  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
> +  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
> +  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
> +  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
> +  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
> +}
> +
> +static void
> +TEST (void)
> +{
> +  struct
> +    {
> +      float x1;
> +      float x2;
> +    }
> +  inputs[] =
> +    {
> +      { 4.3, 2.18 },
> +      { -4.3, 3.18 },
> +      { __builtin_nanf (""), -5.8 },
> +      { -4.8, __builtin_nansf ("") },
> +      { 3.8, __builtin_nansf ("") },
> +      { 4.2, 4.2 },
> +      { __builtin_nanf (""), __builtin_nansf ("") },
> +    };
> +  int i;
> +
> +  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
> +    do_check (inputs[i].x1, inputs[i].x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
> new file mode 100644
> index 00000000000..e337e11a557
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse -O2" } */
> +/* { dg-final { scan-assembler-times "ucomiss\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
> +/* { dg-final { scan-assembler-times "jp" 2 } } */
> +#include <xmmintrin.h>
> +
> +volatile __m128 x1, x2;
> +volatile int res;
> +
> +void extern
> +sse_ucomi_test (void)
> +{
> +  res = _mm_ucomieq_ss (x1, x2);
> +  res = _mm_ucomilt_ss (x1, x2);
> +  res = _mm_ucomile_ss (x1, x2);
> +  res = _mm_ucomigt_ss (x1, x2);
> +  res = _mm_ucomige_ss (x1, x2);
> +  res = _mm_ucomineq_ss (x1, x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
> new file mode 100644
> index 00000000000..37d845025c8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -msse" } */
> +/* { dg-require-effective-target sse } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <xmmintrin.h>
> +
> +#define CMP(PRED, EXP) \
> +      res = _mm_ucomi##PRED##_ss (__A, __B);           \
> +        if (res != EXP)                               \
> +           abort ();
> +static void
> +__attribute__((noinline, unused))
> +do_check (float s1, float s2)
> +{
> +  __m128 __A = _mm_load_ss (&s1);
> +  __m128 __B = _mm_load_ss (&s2);
> +  int res;
> +
> +  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
> +  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
> +  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
> +  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
> +  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
> +  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
> +}
> +
> +static void
> +TEST (void)
> +{
> +  struct
> +    {
> +      float x1;
> +      float x2;
> +    }
> +  inputs[] =
> +    {
> +      { 4.3, 2.18 },
> +      { -4.3, 3.18 },
> +      { __builtin_nanf (""), -5.8 },
> +      { -4.8, __builtin_nansf ("") },
> +      { 3.8, __builtin_nansf ("") },
> +      { 4.2, 4.2 },
> +      { __builtin_nanf (""), __builtin_nansf ("") },
> +    };
> +  int i;
> +
> +  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
> +    do_check (inputs[i].x1, inputs[i].x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
> new file mode 100644
> index 00000000000..6268977d268
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse2 -O2" } */
> +/* { dg-final { scan-assembler-times "comisd\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
> +/* { dg-final { scan-assembler-times "jp" 2 } } */
> +#include <xmmintrin.h>
> +
> +volatile __m128d x1, x2;
> +volatile int res;
> +
> +void extern
> +sse2_comisd_test (void)
> +{
> +  res = _mm_comieq_sd (x1, x2);
> +  res = _mm_comilt_sd (x1, x2);
> +  res = _mm_comile_sd (x1, x2);
> +  res = _mm_comigt_sd (x1, x2);
> +  res = _mm_comige_sd (x1, x2);
> +  res = _mm_comineq_sd (x1, x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
> new file mode 100644
> index 00000000000..f49771c9212
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -msse2" } */
> +/* { dg-require-effective-target sse2 } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse2-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse2_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <emmintrin.h>
> +
> +#define CMP(PRED, EXP) \
> +      res = _mm_comi##PRED##_sd (__A, __B);           \
> +        if (res != EXP)                               \
> +           abort ();
> +static void
> +__attribute__((noinline, unused))
> +do_check (double s1, double s2)
> +{
> +  __m128d __A = _mm_load_sd (&s1);
> +  __m128d __B = _mm_load_sd (&s2);
> +  int res;
> +
> +  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
> +  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
> +  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
> +  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
> +  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
> +  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
> +}
> +
> +static void
> +TEST (void)
> +{
> +  struct
> +    {
> +      double x1;
> +      double x2;
> +    }
> +  inputs[] =
> +    {
> +      { 4.3, 2.18 },
> +      { -4.3, 3.18 },
> +      { __builtin_nan (""), -5.8 },
> +      { -4.8, __builtin_nans ("") },
> +      { 3.8, __builtin_nans ("") },
> +      { 4.2, 4.2 },
> +      { __builtin_nan (""), __builtin_nans ("") },
> +    };
> +  int i;
> +
> +  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
> +    do_check (inputs[i].x1, inputs[i].x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
> new file mode 100644
> index 00000000000..e64c0ace0cc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-msse2 -O2" } */
> +/* { dg-final { scan-assembler-times "ucomisd\[ \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6  } } */
> +/* { dg-final { scan-assembler-times "jp" 2 } } */
> +#include <xmmintrin.h>
> +
> +volatile __m128d x1, x2;
> +volatile int res;
> +
> +void extern
> +sse2_ucomisd_test (void)
> +{
> +  res = _mm_ucomieq_sd (x1, x2);
> +  res = _mm_ucomilt_sd (x1, x2);
> +  res = _mm_ucomile_sd (x1, x2);
> +  res = _mm_ucomigt_sd (x1, x2);
> +  res = _mm_ucomige_sd (x1, x2);
> +  res = _mm_ucomineq_sd (x1, x2);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c
> new file mode 100644
> index 00000000000..606a8971c26
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c
> @@ -0,0 +1,59 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -msse2" } */
> +/* { dg-require-effective-target sse2 } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse2-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse2_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include <emmintrin.h>
> +
> +#define CMP(PRED, EXP) \
> +      res = _mm_ucomi##PRED##_sd (__A, __B);           \
> +        if (res != EXP)                               \
> +           abort ();
> +static void
> +__attribute__((noinline, unused))
> +do_check (double s1, double s2)
> +{
> +  __m128d __A = _mm_load_sd (&s1);
> +  __m128d __B = _mm_load_sd (&s2);
> +  int res;
> +
> +  CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2));
> +  CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2));
> +  CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2));
> +  CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2));
> +  CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2));
> +  CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2));
> +}
> +
> +static void
> +TEST (void)
> +{
> +  struct
> +    {
> +      double x1;
> +      double x2;
> +    }
> +  inputs[] =
> +    {
> +      { 4.3, 2.18 },
> +      { -4.3, 3.18 },
> +      { __builtin_nan (""), -5.8 },
> +      { -4.8, __builtin_nans ("") },
> +      { 3.8, __builtin_nans ("") },
> +      { 4.2, 4.2 },
> +      { __builtin_nan (""), __builtin_nans ("") },
> +    };
> +  int i;
> +
> +  for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
> +    do_check (inputs[i].x1, inputs[i].x2);
> +}
> --
> 2.18.2
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-07-15  2:09 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20220714060206.1898798-1-lingling.kong@intel.com>
2022-07-14  6:10 ` [PATCH] i386: Fix _mm_[u]comixx_{ss,sd} codegen and add PF result. [PR106113] Kong, Lingling
2022-07-15  2:09   ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).