* [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
@ 2011-09-07 19:25 Jakub Jelinek
2011-09-07 20:02 ` Uros Bizjak
0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-07 19:25 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
Hi!
The attached testcase ICEs, because the vectorizer assumes that if vcond*
is available, it supports all comparisons, not just a subset of them.
With -mavx vcmpd etc. already support all the needed comparisons (and
several more - we wouldn't even need to swap the arguments), for SSE
the only missing ones (LTGT and UNEQ) can be handled as ORDERED & NE
resp. UNORDERED | EQ.
Bootstrapped/regtested on x86_64-linux and i686-linux (on non-AVX host),
plus regtested on x86_64-linux on AVX box. Ok for trunk and 4.6?
2011-09-07 Jakub Jelinek <jakub@redhat.com>
PR target/50310
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
TARGET_AVX return code for LTGT and UNEQ.
(ix86_expand_fp_vcond): Handle LTGT and UNEQ.
* gcc.c-torture/execute/ieee/pr50310.c: New test.
* gcc.dg/pr50310-2.c: New test.
--- gcc/config/i386/i386.c.jj 2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/i386.c 2011-09-07 13:34:17.000000000 +0200
@@ -18308,6 +18308,10 @@ ix86_prepare_sse_fp_compare_args (rtx de
{
case LTGT:
case UNEQ:
+ /* With AVX these are supported directly. */
+ if (TARGET_AVX)
+ break;
+
/* We have no LTGT as an operator. We could implement it with
NE & ORDERED, but this requires an extra temporary. It's
not clear that it's worth it. */
@@ -18559,7 +18563,32 @@ ix86_expand_fp_vcond (rtx operands[])
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
&operands[4], &operands[5]);
if (code == UNKNOWN)
- return false;
+ {
+ rtx temp;
+ switch (GET_CODE (operands[3]))
+ {
+ case LTGT:
+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = AND;
+ break;
+ case UNEQ:
+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = IOR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+ OPTAB_DIRECT);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+ }
if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
operands[5], operands[1], operands[2]))
--- gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c.jj 2011-09-07 14:16:12.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c 2011-09-07 14:40:57.000000000 +0200
@@ -0,0 +1,73 @@
+/* PR target/50310 */
+
+extern void abort (void);
+double s1[4], s2[4], s3[64];
+
+void
+foo (void)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+}
+
+int
+main ()
+{
+ int i;
+ s1[0] = 5.0;
+ s1[1] = 6.0;
+ s1[2] = 5.0;
+ s1[3] = __builtin_nan ("");
+ s2[0] = 6.0;
+ s2[1] = 5.0;
+ s2[2] = 5.0;
+ s2[3] = 5.0;
+ asm volatile ("" : : : "memory");
+ foo ();
+ asm volatile ("" : : : "memory");
+ for (i = 0; i < 16 * 4; i++)
+ if (i >= 12 * 4 && (i & 3) == 3)
+ {
+ if (s3[i] != 0.0) abort ();
+ }
+ else
+ {
+ static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 };
+ if (s3[i]
+ != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8]))
+ ? -1.0 : 0.0))
+ abort ();
+ }
+ return 0;
+}
--- gcc/testsuite/gcc.dg/pr50310-2.c.jj 2011-09-07 12:53:43.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr50310-2.c 2011-09-07 12:53:16.000000000 +0200
@@ -0,0 +1,47 @@
+/* PR target/50310 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+ int i;
+ asm volatile ("" : : : "memory");
+ for (i = 0; i < 4; i++)
+ s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+ asm volatile ("" : : : "memory");
+ return 0;
+}
Jakub
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-07 19:25 [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310) Jakub Jelinek
@ 2011-09-07 20:02 ` Uros Bizjak
2011-09-07 20:32 ` Jakub Jelinek
0 siblings, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2011-09-07 20:02 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Sep 7, 2011 at 8:17 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> The attached testcase ICEs, because the vectorizer assumes that if vcond*
> is available, it supports all comparisons, not just a subset of them.
> With -mavx vcmpd etc. already support all the needed comparisons (and
> several more - we wouldn't even need to swap the arguments), for SSE
> the only missing ones (LTGT and UNEQ) can be handled as ORDERED & NE
> resp. UNORDERED | EQ.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on non-AVX host),
> plus regtested on x86_64-linux on AVX box. Ok for trunk and 4.6?
>
> 2011-09-07 Jakub Jelinek <jakub@redhat.com>
>
> PR target/50310
> * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
> TARGET_AVX return code for LTGT and UNEQ.
> (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>
> * gcc.c-torture/execute/ieee/pr50310.c: New test.
> * gcc.dg/pr50310-2.c: New test.
Please put early exit for TARGET_SSE at the beginning of
ix86_prepare_sse_fp_compare_args function. There is really no need to
swap operands - and to help reload, since AVX instructions are
three-operand instructions.
OK for mainline with this change.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-07 20:02 ` Uros Bizjak
@ 2011-09-07 20:32 ` Jakub Jelinek
2011-09-07 20:49 ` Uros Bizjak
2011-09-22 23:25 ` Uros Bizjak
0 siblings, 2 replies; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-07 20:32 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Wed, Sep 07, 2011 at 09:54:03PM +0200, Uros Bizjak wrote:
> > 2011-09-07  Jakub Jelinek  <jakub@redhat.com>
> >
> > Â Â Â Â PR target/50310
> > Â Â Â Â * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
> > Â Â Â Â TARGET_AVX return code for LTGT and UNEQ.
> > Â Â Â Â (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
> >
> > Â Â Â Â * gcc.c-torture/execute/ieee/pr50310.c: New test.
> > Â Â Â Â * gcc.dg/pr50310-2.c: New test.
>
> Please put early exit for TARGET_SSE at the beginning of
> ix86_prepare_sse_fp_compare_args function. There is really no need to
You mean for TARGET_AVX, right?
> swap operands - and to help reload, since AVX instructions are
> three-operand instructions.
>
> OK for mainline with this change.
Here is the updated patch, I'll bootstrap/regtest it now.
2011-09-07 Jakub Jelinek <jakub@redhat.com>
PR target/50310
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return
code early if TARGET_AVX.
(ix86_expand_fp_vcond): Handle LTGT and UNEQ.
* gcc.c-torture/execute/ieee/pr50310.c: New test.
* gcc.dg/pr50310-2.c: New test.
--- gcc/config/i386/i386.c.jj 2011-09-02 16:29:38.000000000 +0200
+++ gcc/config/i386/i386.c 2011-09-07 21:57:52.000000000 +0200
@@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
{
rtx tmp;
+ /* AVX supports all the needed comparisons, no need to swap arguments
+ nor help reload. */
+ if (TARGET_AVX)
+ return code;
+
switch (code)
{
case LTGT:
@@ -18559,7 +18564,32 @@ ix86_expand_fp_vcond (rtx operands[])
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
&operands[4], &operands[5]);
if (code == UNKNOWN)
- return false;
+ {
+ rtx temp;
+ switch (GET_CODE (operands[3]))
+ {
+ case LTGT:
+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = AND;
+ break;
+ case UNEQ:
+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = IOR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+ OPTAB_DIRECT);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+ }
if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
operands[5], operands[1], operands[2]))
--- gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c.jj 2011-09-07 14:16:12.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/ieee/pr50310.c 2011-09-07 14:40:57.000000000 +0200
@@ -0,0 +1,73 @@
+/* PR target/50310 */
+
+extern void abort (void);
+double s1[4], s2[4], s3[64];
+
+void
+foo (void)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[13 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[15 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+}
+
+int
+main ()
+{
+ int i;
+ s1[0] = 5.0;
+ s1[1] = 6.0;
+ s1[2] = 5.0;
+ s1[3] = __builtin_nan ("");
+ s2[0] = 6.0;
+ s2[1] = 5.0;
+ s2[2] = 5.0;
+ s2[3] = 5.0;
+ asm volatile ("" : : : "memory");
+ foo ();
+ asm volatile ("" : : : "memory");
+ for (i = 0; i < 16 * 4; i++)
+ if (i >= 12 * 4 && (i & 3) == 3)
+ {
+ if (s3[i] != 0.0) abort ();
+ }
+ else
+ {
+ static int masks[] = { 2, 2|4, 1, 1|4, 1|2, 8, 2, 1 };
+ if (s3[i]
+ != (((1 << (i & 3)) & ((i & 4) ? ~masks[i / 8] : masks[i / 8]))
+ ? -1.0 : 0.0))
+ abort ();
+ }
+ return 0;
+}
--- gcc/testsuite/gcc.dg/pr50310-2.c.jj 2011-09-07 12:53:43.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr50310-2.c 2011-09-07 12:53:16.000000000 +0200
@@ -0,0 +1,47 @@
+/* PR target/50310 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-options "-O3 -mavx" { target avx_runtime } } */
+
+double s1[4], s2[4], s3[64];
+
+int
+main (void)
+{
+ int i;
+ asm volatile ("" : : : "memory");
+ for (i = 0; i < 4; i++)
+ s3[0 * 4 + i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[1 * 4 + i] = (!__builtin_isgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[2 * 4 + i] = __builtin_isgreaterequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[3 * 4 + i] = (!__builtin_isgreaterequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[4 * 4 + i] = __builtin_isless (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[5 * 4 + i] = (!__builtin_isless (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[6 * 4 + i] = __builtin_islessequal (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[7 * 4 + i] = (!__builtin_islessequal (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[8 * 4 + i] = __builtin_islessgreater (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[9 * 4 + i] = (!__builtin_islessgreater (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[10 * 4 + i] = __builtin_isunordered (s1[i], s2[i]) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[11 * 4 + i] = (!__builtin_isunordered (s1[i], s2[i])) ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[12 * 4 + i] = s1[i] > s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[13 * 4 + i] = s1[i] >= s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[14 * 4 + i] = s1[i] < s2[i] ? -1.0 : 0.0;
+ for (i = 0; i < 4; i++)
+ s3[15 * 4 + i] = s1[i] <= s2[i] ? -1.0 : 0.0;
+ asm volatile ("" : : : "memory");
+ return 0;
+}
Jakub
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-07 20:32 ` Jakub Jelinek
@ 2011-09-07 20:49 ` Uros Bizjak
2011-09-22 23:25 ` Uros Bizjak
1 sibling, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2011-09-07 20:49 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Sep 7, 2011 at 10:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > 2011-09-07 Jakub Jelinek <jakub@redhat.com>
>> >
>> > PR target/50310
>> > * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>> > TARGET_AVX return code for LTGT and UNEQ.
>> > (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>> >
>> > * gcc.c-torture/execute/ieee/pr50310.c: New test.
>> > * gcc.dg/pr50310-2.c: New test.
>>
>> Please put early exit for TARGET_SSE at the beginning of
>> ix86_prepare_sse_fp_compare_args function. There is really no need to
>
> You mean for TARGET_AVX, right?
Oh, sure.
>> swap operands - and to help reload, since AVX instructions are
>> three-operand instructions.
>>
>> OK for mainline with this change.
>
> Here is the updated patch, I'll bootstrap/regtest it now.
Thanks.
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-07 20:32 ` Jakub Jelinek
2011-09-07 20:49 ` Uros Bizjak
@ 2011-09-22 23:25 ` Uros Bizjak
2011-09-23 8:58 ` Jakub Jelinek
1 sibling, 1 reply; 7+ messages in thread
From: Uros Bizjak @ 2011-09-22 23:25 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Wed, Sep 7, 2011 at 10:01 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Sep 07, 2011 at 09:54:03PM +0200, Uros Bizjak wrote:
>> > 2011-09-07 Jakub Jelinek <jakub@redhat.com>
>> >
>> > PR target/50310
>> > * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
>> > TARGET_AVX return code for LTGT and UNEQ.
>> > (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>> >
>> > * gcc.c-torture/execute/ieee/pr50310.c: New test.
>> > * gcc.dg/pr50310-2.c: New test.
>>
>> Please put early exit for TARGET_SSE at the beginning of
>> ix86_prepare_sse_fp_compare_args function. There is really no need to
>
> You mean for TARGET_AVX, right?
>
>> swap operands - and to help reload, since AVX instructions are
>> three-operand instructions.
>>
>> OK for mainline with this change.
>
> Here is the updated patch, I'll bootstrap/regtest it now.
>
> 2011-09-07 Jakub Jelinek <jakub@redhat.com>
>
> PR target/50310
> * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Return
> code early if TARGET_AVX.
> (ix86_expand_fp_vcond): Handle LTGT and UNEQ.
>
> * gcc.c-torture/execute/ieee/pr50310.c: New test.
> * gcc.dg/pr50310-2.c: New test.
>
> --- gcc/config/i386/i386.c.jj 2011-09-02 16:29:38.000000000 +0200
> +++ gcc/config/i386/i386.c 2011-09-07 21:57:52.000000000 +0200
> @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
> {
> rtx tmp;
>
> + /* AVX supports all the needed comparisons, no need to swap arguments
> + nor help reload. */
> + if (TARGET_AVX)
> + return code;
> +
Unfortunately, this part prevents generation of vmin/vmax instructions
for TARGET_AVX. In ix86_expand_fp_movcc, we call
ix86_prepare_sse_fp_compare_args, where we previously converted GT
into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
operand for MIN/MAX, whereas GT is not.
I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
there is a scary comment in front of ix86_expand_sse_fp_minmax w.r.t.
to IEEE safety.
The test is gcc.target/i386/ssefp-?.c, please compile it with "-O2 -mavx".
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-22 23:25 ` Uros Bizjak
@ 2011-09-23 8:58 ` Jakub Jelinek
2011-09-23 12:51 ` Uros Bizjak
0 siblings, 1 reply; 7+ messages in thread
From: Jakub Jelinek @ 2011-09-23 8:58 UTC (permalink / raw)
To: Uros Bizjak; +Cc: gcc-patches
On Fri, Sep 23, 2011 at 12:09:18AM +0200, Uros Bizjak wrote:
> > --- gcc/config/i386/i386.c.jj  2011-09-02 16:29:38.000000000 +0200
> > +++ gcc/config/i386/i386.c    2011-09-07 21:57:52.000000000 +0200
> > @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
> > Â {
> > Â rtx tmp;
> >
> > + Â /* AVX supports all the needed comparisons, no need to swap arguments
> > + Â Â nor help reload. Â */
> > + Â if (TARGET_AVX)
> > + Â Â return code;
> > +
>
> Unfortunately, this part prevents generation of vmin/vmax instructions
> for TARGET_AVX. In ix86_expand_fp_movcc, we call
> ix86_prepare_sse_fp_compare_args, where we previously converted GT
> into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
> operand for MIN/MAX, whereas GT is not.
>
> I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
> there is a scary comment in front of ix86_expand_sse_fp_minmax w.r.t.
> to IEEE safety.
swap_condition is documented to be IEEE safe:
/* Similar, but return the code when two operands of a comparison are swapped.
This IS safe for IEEE floating-point. */
So, do you prefer this?
2011-09-23 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
GE/GT/UNLE/UNLT swap arguments and condition even for TARGET_AVX.
* gcc.target/i386/avxfp-1.c: New test.
* gcc.target/i386/avxfp-2.c: New test.
--- gcc/config/i386/i386.c.jj 2011-09-22 18:55:45.000000000 +0200
+++ gcc/config/i386/i386.c 2011-09-23 10:04:35.000000000 +0200
@@ -18739,15 +18739,13 @@ ix86_prepare_sse_fp_compare_args (rtx de
{
rtx tmp;
- /* AVX supports all the needed comparisons, no need to swap arguments
- nor help reload. */
- if (TARGET_AVX)
- return code;
-
switch (code)
{
case LTGT:
case UNEQ:
+ /* AVX supports all the needed comparisons. */
+ if (TARGET_AVX)
+ break;
/* We have no LTGT as an operator. We could implement it with
NE & ORDERED, but this requires an extra temporary. It's
not clear that it's worth it. */
@@ -18764,6 +18762,9 @@ ix86_prepare_sse_fp_compare_args (rtx de
case NE:
case UNORDERED:
case ORDERED:
+ /* AVX has 3 operand comparisons, no need to swap anything. */
+ if (TARGET_AVX)
+ break;
/* For commutative operators, try to canonicalize the destination
operand to be first in the comparison - this helps reload to
avoid extra moves. */
@@ -18775,8 +18776,10 @@ ix86_prepare_sse_fp_compare_args (rtx de
case GT:
case UNLE:
case UNLT:
- /* These are not supported directly. Swap the comparison operands
- to transform into something that is supported. */
+ /* These are not supported directly before AVX, and furthermore
+ ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
+ comparison operands to transform into something that is
+ supported. */
tmp = *pop0;
*pop0 = *pop1;
*pop1 = tmp;
--- gcc/testsuite/gcc.target/i386/avxfp-1.c.jj 2011-09-23 10:07:48.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avxfp-1.c 2011-09-23 10:08:02.000000000 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+/* { dg-final { scan-assembler "vmaxsd" } } */
+/* { dg-final { scan-assembler "vminsd" } } */
+double x;
+t()
+{
+ x=x>5?x:5;
+}
+
+double x;
+q()
+{
+ x=x<5?x:5;
+}
--- gcc/testsuite/gcc.target/i386/avxfp-2.c.jj 2011-09-23 10:07:51.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avxfp-2.c 2011-09-23 10:08:17.000000000 +0200
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+/* { dg-final { scan-assembler "vmaxsd" } } */
+/* { dg-final { scan-assembler "vminsd" } } */
+double x;
+q()
+{
+ x=x<5?5:x;
+}
+
+double x;
+q1()
+{
+ x=x>5?5:x;
+}
Jakub
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310)
2011-09-23 8:58 ` Jakub Jelinek
@ 2011-09-23 12:51 ` Uros Bizjak
0 siblings, 0 replies; 7+ messages in thread
From: Uros Bizjak @ 2011-09-23 12:51 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: gcc-patches
On Fri, Sep 23, 2011 at 10:12 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > --- gcc/config/i386/i386.c.jj 2011-09-02 16:29:38.000000000 +0200
>> > +++ gcc/config/i386/i386.c 2011-09-07 21:57:52.000000000 +0200
>> > @@ -18304,6 +18304,11 @@ ix86_prepare_sse_fp_compare_args (rtx de
>> > {
>> > rtx tmp;
>> >
>> > + /* AVX supports all the needed comparisons, no need to swap arguments
>> > + nor help reload. */
>> > + if (TARGET_AVX)
>> > + return code;
>> > +
>>
>> Unfortunately, this part prevents generation of vmin/vmax instructions
>> for TARGET_AVX. In ix86_expand_fp_movcc, we call
>> ix86_prepare_sse_fp_compare_args, where we previously converted GT
>> into LT. LT is recognized in ix86_expand_sse_fp_minmax as valid
>> operand for MIN/MAX, whereas GT is not.
>>
>> I'm not sure if we can swap operands in ix86_expand_sse_fp_minmax,
>> there is a scary comment in front of ix86_expand_sse_fp_minmax w.r.t.
>> to IEEE safety.
>
> swap_condition is documented to be IEEE safe:
> /* Similar, but return the code when two operands of a comparison are swapped.
> This IS safe for IEEE floating-point. */
>
> So, do you prefer this?
Yes, since the operands can be swapped, this looks OK to me. Also, the
comments are very informative.
> 2011-09-23 Jakub Jelinek <jakub@redhat.com>
>
> * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For
> GE/GT/UNLE/UNLT swap arguments and condition even for TARGET_AVX.
>
> * gcc.target/i386/avxfp-1.c: New test.
> * gcc.target/i386/avxfp-2.c: New test.
OK.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2011-09-23 9:51 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-07 19:25 [PATCH] Ensure vcond* expansion doesn't fail on x86 (PR target/50310) Jakub Jelinek
2011-09-07 20:02 ` Uros Bizjak
2011-09-07 20:32 ` Jakub Jelinek
2011-09-07 20:49 ` Uros Bizjak
2011-09-22 23:25 ` Uros Bizjak
2011-09-23 8:58 ` Jakub Jelinek
2011-09-23 12:51 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).