public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
@ 2011-09-07 19:25 Jakub Jelinek
  2011-09-07 20:02 ` Uros Bizjak
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-07 19:25 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

Hi!

The attached testcase ICEs, because the vectorizer assumes that if vcond*
is available, it supports all comparisons, not just a subset of them.
With -mavx vcmpd etc. already support all the needed comparisons (and
several more - we wouldn't even need to swap the arguments), for SSE
the only missing ones (LTGT and UNEQ) can be handled as ORDERED & NE
resp. UNORDERED | EQ.

Bootstrapped/regtested on x86_64-linux and i686-linux (on non-AVX host),
plus regtested on x86_64-linux on AVX box.  Ok for trunk and 4.6?

2011-09-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/50310
	* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
	TARGET_AVX return code for LTGT and UNEQ.
	(ix86_expand_fp_vcond): Handle LTGT and UNEQ.

	* gcc.c-torture/execute/ieee/pr50310.c: New test.
	* gcc.dg/pr50310-2.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-07 13:34:17.000000000 +0200
@@ -18308,6 +18308,10 @@ ix86_prepare_sse_fp_compare_args (rtx de
     {
     case LTGT:
     case UNEQ:
+      /* With AVX these are supported directly.  */
+      if (TARGET_AVX)
+	break;
+
       /* We have no LTGT as an operator.  We could implement it with
 	 NE & ORDERED, but this requires an extra temporary.  It's
 	 not clear that it's worth it.  */
@@ -18559,7 +18563,32 @@ ix86_expand_fp_vcond (rtx operands[])
   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
 					   &operands[4], &operands[5]);
   if (code == UNKNOWN)
-    return false;
+    {
+      rtx temp;
+      switch (GET_CODE (operands[3]))
+	{
+	case LTGT:
+	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = AND;
+	  break;
+	case UNEQ:
+	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = IOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+				 OPTAB_DIRECT);
+      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+      return true;
+    }
 
   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
 				 operands[5], operands[1], operands[2]))
--- gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c.jj	2011-09-07 14:16:12.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c	2011-09-07 14:40:57.000000000 +0200
@@ -0,0 +1,73 @@
+/* PR target/50310 */
+
+extern void abort (void);
+double s1[4], s2[4], s3[64];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+}
+
+int
+main ()
+{
+  int i;
+  s1[0] = 5.0;
+  s1[1] = 6.0;
+  s1[2] = 5.0;
+  s1[3] = __builtin_nan ("");
+  s2[0] = 6.0;
+  s2[1] = 5.0;
+  s2[2] = 5.0;
+  s2[3] = 5.0;
+  asm volatile ("" : : : "memory");
+  foo ();
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 16 * 4; i++)
+    if (i >= 12 * 4 && (i & 3) == 3)
+      {
+	if (s3[i] != 0.0) abort ();
+      }
+    else
+      {
+        static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 };
+        if (s3[i]
+	    != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8]))
+		? -1.0 : 0.0))
+	  abort ();
+      }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr50310-2.c.jj	2011-09-07 12:53:43.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr50310-2.c	2011-09-07 12:53:16.000000000 +0200
@@ -0,0 +1,47 @@
+/* PR target/50310 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+  int i;
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  asm volatile ("" : : : "memory");
+  return 0;
+}

	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-07 19:25 [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310) Jakub Jelinek
@ 2011-09-07 20:02 ` Uros Bizjak
  2011-09-07 20:32   ` Jakub Jelinek
  0 siblings, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2011-09-07 20:02 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, Sep 7, 2011 at 8:17 PM, Jakub Jelinek <jakub@redhat.com> wrote:

> The attached testcase ICEs, because the vectorizer assumes that if vcond*
> is available, it supports all comparisons, not just a subset of them.
> With -mavx vcmpd etc. already support all the needed comparisons (and
> several more - we wouldn't even need to swap the arguments), for SSE
> the only missing ones (LTGT and UNEQ) can be handled as ORDERED & NE
> resp. UNORDERED | EQ.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on non-AVX host),
> plus regtested on x86_64-linux on AVX box.  Ok for trunk and 4.6?
>
> 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
>
>        PR target/50310
>        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>        TARGET_AVX return code for LTGT and UNEQ.
>        (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>
>        * gcc.c-torture/execute/ieee/pr50310.c: New test.
>        * gcc.dg/pr50310-2.c: New test.

Please put early exit for TARGET_SSE at the beginning of
ix86_prepare_sse_fp_compare_args function. There is really no need to
swap operands - and to help reload, since AVX instructions are
three-operand instructions.

OK for mainline with this change.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-07 20:02 ` Uros Bizjak
@ 2011-09-07 20:32   ` Jakub Jelinek
  2011-09-07 20:49     ` Uros Bizjak
  2011-09-22 23:25     ` Uros Bizjak
  0 siblings, 2 replies; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-07 20:32 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Wed, Sep 07, 2011 at 09:54:03PM +0200, Uros Bizjak wrote:
> > 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
> >
> >        PR target/50310
> >        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
> >        TARGET_AVX return code for LTGT and UNEQ.
> >        (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
> >
> >        * gcc.c-torture/execute/ieee/pr50310.c: New test.
> >        * gcc.dg/pr50310-2.c: New test.
> 
> Please put early exit for TARGET_SSE at the beginning of
> ix86_prepare_sse_fp_compare_args function. There is really no need to

You mean for TARGET_AVX, right?

> swap operands - and to help reload, since AVX instructions are
> three-operand instructions.
> 
> OK for mainline with this change.

Here is the updated patch, I'll bootstrap/regtest it now.

2011-09-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/50310
	* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return
	code early if TARGET_AVX.
	(ix86_expand_fp_vcond): Handle LTGT and UNEQ.

	* gcc.c-torture/execute/ieee/pr50310.c: New test.
	* gcc.dg/pr50310-2.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-07 21:57:52.000000000 +0200
@@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
 {
   rtx tmp;
 
+  /* AVX supports all the needed comparisons, no need to swap arguments
+     nor help reload.  */
+  if (TARGET_AVX)
+    return code;
+
   switch (code)
     {
     case LTGT:
@@ -18559,7 +18564,32 @@ ix86_expand_fp_vcond (rtx operands[])
   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
 					   &operands[4], &operands[5]);
   if (code == UNKNOWN)
-    return false;
+    {
+      rtx temp;
+      switch (GET_CODE (operands[3]))
+	{
+	case LTGT:
+	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = AND;
+	  break;
+	case UNEQ:
+	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+				      operands[5], operands[0], operands[0]);
+	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+				     operands[5], operands[1], operands[2]);
+	  code = IOR;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+				 OPTAB_DIRECT);
+      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+      return true;
+    }
 
   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
 				 operands[5], operands[1], operands[2]))
--- gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c.jj	2011-09-07 14:16:12.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c	2011-09-07 14:40:57.000000000 +0200
@@ -0,0 +1,73 @@
+/* PR target/50310 */
+
+extern void abort (void);
+double s1[4], s2[4], s3[64];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+}
+
+int
+main ()
+{
+  int i;
+  s1[0] = 5.0;
+  s1[1] = 6.0;
+  s1[2] = 5.0;
+  s1[3] = __builtin_nan ("");
+  s2[0] = 6.0;
+  s2[1] = 5.0;
+  s2[2] = 5.0;
+  s2[3] = 5.0;
+  asm volatile ("" : : : "memory");
+  foo ();
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 16 * 4; i++)
+    if (i >= 12 * 4 && (i & 3) == 3)
+      {
+	if (s3[i] != 0.0) abort ();
+      }
+    else
+      {
+        static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 };
+        if (s3[i]
+	    != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8]))
+		? -1.0 : 0.0))
+	  abort ();
+      }
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr50310-2.c.jj	2011-09-07 12:53:43.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr50310-2.c	2011-09-07 12:53:16.000000000 +0200
@@ -0,0 +1,47 @@
+/* PR target/50310 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+  int i;
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 4; i++)
+    s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+  for (i = 0; i < 4; i++)
+    s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+  asm volatile ("" : : : "memory");
+  return 0;
+}


	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-07 20:32   ` Jakub Jelinek
@ 2011-09-07 20:49     ` Uros Bizjak
  2011-09-22 23:25     ` Uros Bizjak
  1 sibling, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2011-09-07 20:49 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, Sep 7, 2011 at 10:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:

>> > 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
>> >
>> >        PR target/50310
>> >        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>> >        TARGET_AVX return code for LTGT and UNEQ.
>> >        (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>> >
>> >        * gcc.c-torture/execute/ieee/pr50310.c: New test.
>> >        * gcc.dg/pr50310-2.c: New test.
>>
>> Please put early exit for TARGET_SSE at the beginning of
>> ix86_prepare_sse_fp_compare_args function. There is really no need to
>
> You mean for TARGET_AVX, right?

Oh, sure.

>> swap operands - and to help reload, since AVX instructions are
>> three-operand instructions.
>>
>> OK for mainline with this change.
>
> Here is the updated patch, I'll bootstrap/regtest it now.

Thanks.

Uros.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-07 20:32   ` Jakub Jelinek
  2011-09-07 20:49     ` Uros Bizjak
@ 2011-09-22 23:25     ` Uros Bizjak
  2011-09-23  8:58       ` Jakub Jelinek
  1 sibling, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2011-09-22 23:25 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, Sep 7, 2011 at 10:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Sep 07, 2011 at 09:54:03PM +0200, Uros Bizjak wrote:
>> > 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
>> >
>> >        PR target/50310
>> >        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>> >        TARGET_AVX return code for LTGT and UNEQ.
>> >        (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>> >
>> >        * gcc.c-torture/execute/ieee/pr50310.c: New test.
>> >        * gcc.dg/pr50310-2.c: New test.
>>
>> Please put early exit for TARGET_SSE at the beginning of
>> ix86_prepare_sse_fp_compare_args function. There is really no need to
>
> You mean for TARGET_AVX, right?
>
>> swap operands - and to help reload, since AVX instructions are
>> three-operand instructions.
>>
>> OK for mainline with this change.
>
> Here is the updated patch, I'll bootstrap/regtest it now.
>
> 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
>
>        PR target/50310
>        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return
>        code early if TARGET_AVX.
>        (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>
>        * gcc.c-torture/execute/ieee/pr50310.c: New test.
>        * gcc.dg/pr50310-2.c: New test.
>
> --- gcc/config/i386/i386.c.jj   2011-09-02 16:29:38.000000000 +0200
> +++ gcc/config/i386/i386.c      2011-09-07 21:57:52.000000000 +0200
> @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
>  {
>   rtx tmp;
>
> +  /* AVX supports all the needed comparisons, no need to swap arguments
> +     nor help reload.  */
> +  if (TARGET_AVX)
> +    return code;
> +

Unfortunately, this part prevents generation of vmin/vmax instructions
for TARGET_AVX. In ix86_expand_fp_movcc, we call
ix86_prepare_sse_fp_compare_args, where we previously converted GT
into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
operand for MIN/MAX, whereas GT is not.

I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
there is a scary comment  in front of ix86_expand_sse_fp_minmax w.r.t.
to IEEE safety.

The test is gcc.target/i386/ssefp-?.c, please compile it with "-O2 -mavx".

Uros.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-22 23:25     ` Uros Bizjak
@ 2011-09-23  8:58       ` Jakub Jelinek
  2011-09-23 12:51         ` Uros Bizjak
  0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-23  8:58 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Fri, Sep 23, 2011 at 12:09:18AM +0200, Uros Bizjak wrote:
> > --- gcc/config/i386/i386.c.jj   2011-09-02 16:29:38.000000000 +0200
> > +++ gcc/config/i386/i386.c      2011-09-07 21:57:52.000000000 +0200
> > @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
> >  {
> >   rtx tmp;
> >
> > +  /* AVX supports all the needed comparisons, no need to swap arguments
> > +     nor help reload.  */
> > +  if (TARGET_AVX)
> > +    return code;
> > +
> 
> Unfortunately, this part prevents generation of vmin/vmax instructions
> for TARGET_AVX. In ix86_expand_fp_movcc, we call
> ix86_prepare_sse_fp_compare_args, where we previously converted GT
> into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
> operand for MIN/MAX, whereas GT is not.
> 
> I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
> there is a scary comment  in front of ix86_expand_sse_fp_minmax w.r.t.
> to IEEE safety.

swap_condition is documented to be IEEE safe:
/* Similar, but return the code when two operands of a comparison are swapped.
   This IS safe for IEEE floating-point.  */

So, do you prefer this?

2011-09-23  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
	GE/GT/UNLE/UNLT swap arguments and condition even for TARGET_AVX.

	* gcc.target/i386/avxfp-1.c: New test.
	* gcc.target/i386/avxfp-2.c: New test.

--- gcc/config/i386/i386.c.jj	2011-09-22 18:55:45.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-23 10:04:35.000000000 +0200
@@ -18739,15 +18739,13 @@ ix86_prepare_sse_fp_compare_args (rtx de
 {
   rtx tmp;
 
-  /* AVX supports all the needed comparisons, no need to swap arguments
-     nor help reload.  */
-  if (TARGET_AVX)
-    return code;
-
   switch (code)
     {
     case LTGT:
     case UNEQ:
+      /* AVX supports all the needed comparisons.  */
+      if (TARGET_AVX)
+	break;
       /* We have no LTGT as an operator.  We could implement it with
 	 NE & ORDERED, but this requires an extra temporary.  It's
 	 not clear that it's worth it.  */
@@ -18764,6 +18762,9 @@ ix86_prepare_sse_fp_compare_args (rtx de
     case NE:
     case UNORDERED:
     case ORDERED:
+      /* AVX has 3 operand comparisons, no need to swap anything.  */
+      if (TARGET_AVX)
+	break;
       /* For commutative operators, try to canonicalize the destination
 	 operand to be first in the comparison - this helps reload to
 	 avoid extra moves.  */
@@ -18775,8 +18776,10 @@ ix86_prepare_sse_fp_compare_args (rtx de
     case GT:
     case UNLE:
     case UNLT:
-      /* These are not supported directly.  Swap the comparison operands
-	 to transform into something that is supported.  */
+      /* These are not supported directly before AVX, and furthermore
+	 ix86_expand_sse_fp_minmax only optimizes LT/UNGE.  Swap the
+	 comparison operands to transform into something that is
+	 supported.  */
       tmp = *pop0;
       *pop0 = *pop1;
       *pop1 = tmp;
--- gcc/testsuite/gcc.target/i386/avxfp-1.c.jj	2011-09-23 10:07:48.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avxfp-1.c	2011-09-23 10:08:02.000000000 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+/* { dg-final { scan-assembler "vmaxsd" } } */
+/* { dg-final { scan-assembler "vminsd" } } */
+double x;
+t()
+{
+  x=x>5?x:5;
+}
+
+double x;
+q()
+{
+  x=x<5?x:5;
+}
--- gcc/testsuite/gcc.target/i386/avxfp-2.c.jj	2011-09-23 10:07:51.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avxfp-2.c	2011-09-23 10:08:17.000000000 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+/* { dg-final { scan-assembler "vmaxsd" } } */
+/* { dg-final { scan-assembler "vminsd" } } */
+double x;
+q()
+{
+  x=x<5?5:x;
+}
+
+double x;
+q1()
+{
+  x=x>5?5:x;
+}

	Jakub

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
  2011-09-23  8:58       ` Jakub Jelinek
@ 2011-09-23 12:51         ` Uros Bizjak
  0 siblings, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2011-09-23 12:51 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Fri, Sep 23, 2011 at 10:12 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > --- gcc/config/i386/i386.c.jj   2011-09-02 16:29:38.000000000 +0200
>> > +++ gcc/config/i386/i386.c      2011-09-07 21:57:52.000000000 +0200
>> > @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
>> >  {
>> >   rtx tmp;
>> >
>> > +  /* AVX supports all the needed comparisons, no need to swap arguments
>> > +     nor help reload.  */
>> > +  if (TARGET_AVX)
>> > +    return code;
>> > +
>>
>> Unfortunately, this part prevents generation of vmin/vmax instructions
>> for TARGET_AVX. In ix86_expand_fp_movcc, we call
>> ix86_prepare_sse_fp_compare_args, where we previously converted GT
>> into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
>> operand for MIN/MAX, whereas GT is not.
>>
>> I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
>> there is a scary comment  in front of ix86_expand_sse_fp_minmax w.r.t.
>> to IEEE safety.
>
> swap_condition is documented to be IEEE safe:
> /* Similar, but return the code when two operands of a comparison are swapped.
>   This IS safe for IEEE floating-point.  */
>
> So, do you prefer this?


Yes, since the operands can be swapped, this looks OK to me. Also, the
comments are very informative.

> 2011-09-23  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>        GE/GT/UNLE/UNLT swap arguments and condition even for TARGET_AVX.
>
>        * gcc.target/i386/avxfp-1.c: New test.
>        * gcc.target/i386/avxfp-2.c: New test.

OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2011-09-23  9:51 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-07 19:25 [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310) Jakub Jelinek
2011-09-07 20:02 ` Uros Bizjak
2011-09-07 20:32   ` Jakub Jelinek
2011-09-07 20:49     ` Uros Bizjak
2011-09-22 23:25     ` Uros Bizjak
2011-09-23  8:58       ` Jakub Jelinek
2011-09-23 12:51         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).