* [PATCH] Fix for PR 37809 and 37807
@ 2008-10-22 8:36 Ralph Loader
2008-10-22 10:42 ` Richard Guenther
2008-10-22 13:49 ` Paolo Bonzini
0 siblings, 2 replies; 12+ messages in thread
From: Ralph Loader @ 2008-10-22 8:36 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 611 bytes --]
Hi,
This patch has a fix for prs 37807 (exponential time with MMX builtins)
and 37809 (wrong code with MMX builtins).
Could an expert check that I'm not inadvertantly disabling important
optimisations for vector types? I believe the code in question was
only ever intended to be used for scalar types, but it is possible that
I've missed something.
2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
PR middle-end/37807, middle-end/37809
* combine.c (force_to_mode): Do not process vector types.
* rtlanal.c (nonzero_bits1): Do not process vector types.
(num_sign_bit_copies1): Likewise.
Cheers,
Ralph.
[-- Attachment #2: pr.diff --]
[-- Type: text/x-patch, Size: 5375 bytes --]
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 80b318f..0b2ccf1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
+
+ PR middle-end/37807, middle-end/37809
+ * combine.c (force_to_mode): Do not process vector types.
+
+ * rtlanal.c (nonzero_bits1): Do not process vector types.
+ (num_sign_bit_copies1): Likewise.
+
2008-10-17 Andreas Krebbel <krebbel1@de.ibm.com>
* c-parser.c (c_parser_binary_expression): Silence the
diff --git a/gcc/combine.c b/gcc/combine.c
index 55baf37..43f7cdc 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7321,6 +7321,10 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
&& (GET_MODE_MASK (GET_MODE (x)) & ~mask) == 0)
return gen_lowpart (mode, x);
+ /* The arithmetic simplifications here do the wrong thing on vector modes. */
+ if (VECTOR_MODE_P (mode) || VECTOR_MODE_P (GET_MODE (x)))
+ return gen_lowpart_or_truncate (mode, x);
+
switch (code)
{
case CLOBBER:
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index b2038aa..5d9df2c 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3681,8 +3681,9 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
enum rtx_code code;
unsigned int mode_width = GET_MODE_BITSIZE (mode);
- /* For floating-point values, assume all bits are needed. */
- if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode))
+ /* For floating-point and vector values, assume all bits are needed. */
+ if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode)
+ || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
return nonzero;
/* If X is wider than MODE, use its mode instead. */
@@ -4195,7 +4196,8 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
if (mode == VOIDmode)
mode = GET_MODE (x);
- if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x)))
+ if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x))
+ || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
return 1;
/* For a smaller object, just ignore the high bits. */
diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c
new file mode 100644
index 0000000..9d665f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-8.c
@@ -0,0 +1,136 @@
+/* PR middle-end/37809 */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -mmmx" } */
+
+#include <mmintrin.h>
+
+// Various tests of cases where it is incorrect to optimise vectors as if they
+// were integers of the same width.
+
+extern void abort (void);
+
+/* #include <stdio.h> */
+/* #define FAIL(...) printf (__VA_ARGS__); */
+#define FAIL(...) abort();
+
+
+void Sshift()
+{
+ volatile __m64 y = (__m64) 0xffffffffll;
+ __m64 x = y & (__m64) 0xffffffffll;
+ x = _m_psradi (x, 1);
+ x &= (__m64) 0x80000000ll;
+ if (0 == (long long) x)
+ FAIL ("Sshift\n");
+}
+
+#define SHIFTU(F,B,S,T) \
+ void F() \
+ { \
+ volatile __m64 y = (__m64) 0ll; \
+ __m64 x = y | (__m64) (1llu << B); \
+ if (S > 0) \
+ x = _m_pslldi (x, S); \
+ else \
+ x = _m_psrldi (x, -S); \
+ if (T > 0) \
+ x = _m_pslldi (x, T); \
+ else \
+ x = _m_psrldi (x, -T); \
+ x &= (__m64) (1llu << (B + S + T)); \
+ if ((long long) x) \
+ FAIL ("%s\n", #F); \
+ }
+
+SHIFTU (shiftU1, 31, 1, -1)
+SHIFTU (shiftU2, 32, -1, 1)
+SHIFTU (shiftU3, 31, 1, 0)
+SHIFTU (shiftU4, 32, -1, 0)
+
+void add()
+{
+ volatile long long ONE = 1;
+ long long one = ONE;
+
+ __m64 a = (__m64) one;
+ __m64 b = (__m64) -one;
+ __m64 c = a + b;
+ if (0 == (long long) c)
+ FAIL ("add\n");
+}
+
+void add2()
+{
+ volatile long long ONE = 1;
+ long long one = ONE;
+
+ __m64 a = (__m64) one;
+ __m64 b = (__m64) -one;
+ __m64 c = _m_paddd (a, b);
+ if (0 == (long long) c)
+ FAIL ("add2\n");
+}
+
+
+void mult1()
+{
+ volatile __m64 y = (__m64) 0ll;
+ __m64 x = y | (__m64) (1ll << 32);
+ x = x * (__m64) 1ll;
+ x &= (__m64) (1ll << 32);
+ if (0 != (long long) x)
+ FAIL ("multi1\n");
+}
+
+
+void mult2()
+{
+ volatile int foo = 1;
+ unsigned long long one = foo & 1;
+
+ __m64 x = (__m64) (one << 16);
+ x *= x;
+ x &= (__m64) (1ll << 32);
+ if (0 != (long long) x)
+ FAIL ("mult2\n");
+}
+
+
+void mult3()
+{
+ volatile __m64 y = (__m64) (1ll << 32);
+ __m64 a = y;
+ __m64 b = y * (__m64) 1ll;
+ if (((long long) a) == (long long) b)
+ FAIL ("mult3\n");
+}
+
+
+void div()
+{
+ volatile __m64 y = (__m64) 0ll;
+ __m64 x = y | (__m64) (1ull << 32);
+ x |= (__m64) 1ull;
+ x = x / x;
+ if (1ll == (long long) x)
+ FAIL ("div\n");
+}
+
+
+int main()
+{
+ Sshift();
+ shiftU1();
+ shiftU2();
+ shiftU3();
+ shiftU4();
+ add();
+ add2();
+
+ mult1();
+ mult2();
+ mult3();
+ div();
+ return 0;
+}
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-10-22 8:36 [PATCH] Fix for PR 37809 and 37807 Ralph Loader
@ 2008-10-22 10:42 ` Richard Guenther
2008-11-05 7:57 ` Ralph Loader
2008-10-22 13:49 ` Paolo Bonzini
1 sibling, 1 reply; 12+ messages in thread
From: Richard Guenther @ 2008-10-22 10:42 UTC (permalink / raw)
To: Ralph Loader; +Cc: gcc-patches
On Wed, Oct 22, 2008 at 7:12 AM, Ralph Loader <suckfish@ihug.co.nz> wrote:
> Hi,
>
> This patch has a fix for prs 37807 (exponential time with MMX builtins)
> and 37809 (wrong code with MMX builtins).
>
> Could an expert check that I'm not inadvertantly disabling important
> optimisations for vector types? I believe the code in question was
> only ever intended to be used for scalar types, but it is possible that
> I've missed something.
The rtlanal.c parts are ok. I wonder if we ever hit the _or_truncate case
with vector modes in force_to_mode though - can you check if using
gen_lowpart instead of gen_lowpart_or_truncate works?
Btw, you missa ChangeLog entry for the testcase for testsuite/ChangeLog.
Thanks,
Richard.
> 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
>
> PR middle-end/37807, middle-end/37809
> * combine.c (force_to_mode): Do not process vector types.
>
> * rtlanal.c (nonzero_bits1): Do not process vector types.
> (num_sign_bit_copies1): Likewise.
>
> Cheers,
> Ralph.
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-10-22 8:36 [PATCH] Fix for PR 37809 and 37807 Ralph Loader
2008-10-22 10:42 ` Richard Guenther
@ 2008-10-22 13:49 ` Paolo Bonzini
1 sibling, 0 replies; 12+ messages in thread
From: Paolo Bonzini @ 2008-10-22 13:49 UTC (permalink / raw)
To: Ralph Loader; +Cc: gcc-patches
> Could an expert check that I'm not inadvertantly disabling important
> optimisations for vector types? I believe the code in question was
> only ever intended to be used for scalar types, but it is possible that
> I've missed something.
I'm sure some optimization, if done correctly, might help on vector
types too. But yes, disabling them like this seems sensible. (Note:
I've not reviewed the patch thoroughly, and cannot approve it anyway).
Paolo
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-10-22 10:42 ` Richard Guenther
@ 2008-11-05 7:57 ` Ralph Loader
2008-11-05 9:21 ` Richard Guenther
0 siblings, 1 reply; 12+ messages in thread
From: Ralph Loader @ 2008-11-05 7:57 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Guenther
[-- Attachment #1: Type: text/plain, Size: 875 bytes --]
[Sorry for the delay on following up on this]
> The rtlanal.c parts are ok. I wonder if we ever hit the _or_truncate
> case with vector modes in force_to_mode though - can you check if
> using gen_lowpart instead of gen_lowpart_or_truncate works?
I checked this and gen_lowpart instead of gen_lowpart_or_truncate
appears fine.
Attached is patch modified to use gen_lowpart, and with the test case
cleaned up & changeloged.
Cheers,
Ralph.
>
> Btw, you missa ChangeLog entry for the testcase for
> testsuite/ChangeLog.
>
> Thanks,
> Richard.
>
> > 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
> >
> > PR middle-end/37807, middle-end/37809
> > * combine.c (force_to_mode): Do not process vector types.
> >
> > * rtlanal.c (nonzero_bits1): Do not process vector types.
> > (num_sign_bit_copies1): Likewise.
> >
> > Cheers,
> > Ralph.
> >
[-- Attachment #2: gcc.diff --]
[-- Type: text/x-patch, Size: 5640 bytes --]
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 44bb0e5..2dfcce0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2008-11-02 Ralph Loader <suckfish@ihug.co.nz>
+
+ PR middle-end/37807, middle-end/37809
+ * combine.c (force_to_mode): Do not process vector types.
+
+ * rtlanal.c (nonzero_bits1): Do not process vector types.
+ (num_sign_bit_copies1): Likewise.
+
2008-10-25 Richard Sandiford <rdsandiford@googlemail.com>
* config/mips/mips.h (REG_ALLOC_ORDER): Put call-clobbered registers
diff --git a/gcc/combine.c b/gcc/combine.c
index 55baf37..3fb3cad 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7321,6 +7321,10 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
&& (GET_MODE_MASK (GET_MODE (x)) & ~mask) == 0)
return gen_lowpart (mode, x);
+ /* The arithmetic simplifications here do the wrong thing on vector modes. */
+ if (VECTOR_MODE_P (mode) || VECTOR_MODE_P (GET_MODE (x)))
+ return gen_lowpart (mode, x);
+
switch (code)
{
case CLOBBER:
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index b2038aa..5d9df2c 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3681,8 +3681,9 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
enum rtx_code code;
unsigned int mode_width = GET_MODE_BITSIZE (mode);
- /* For floating-point values, assume all bits are needed. */
- if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode))
+ /* For floating-point and vector values, assume all bits are needed. */
+ if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode)
+ || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
return nonzero;
/* If X is wider than MODE, use its mode instead. */
@@ -4195,7 +4196,8 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
if (mode == VOIDmode)
mode = GET_MODE (x);
- if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x)))
+ if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x))
+ || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
return 1;
/* For a smaller object, just ignore the high bits. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 886e542..2f8158b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2008-11-02 Ralph Loader <suckfish@ihug.co.nz>
+
+ PR middle-end/37807, middle-end/37809
+ * gcc/testsuite/gcc.target/i386/mmx-8.c: New test.
+
2008-10-24 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/37841
diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c
new file mode 100644
index 0000000..7556693
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-8.c
@@ -0,0 +1,133 @@
+/* PR middle-end/37809 */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -mmmx" } */
+
+#include <mmintrin.h>
+
+#include "mmx-check.h"
+
+// Various tests of cases where it is incorrect to optimise vectors as if they
+// were integers of the same width.
+
+extern void abort (void);
+
+
+void Sshift()
+{
+ volatile __m64 y = (__m64) 0xffffffffll;
+ __m64 x = y & (__m64) 0xffffffffll;
+ x = _m_psradi (x, 1);
+ x &= (__m64) 0x80000000ll;
+ if (0 == (long long) x)
+ abort();
+}
+
+#define SHIFTU(F,B,S,T) \
+ void F() \
+ { \
+ volatile __m64 y = (__m64) 0ll; \
+ __m64 x = y | (__m64) (1llu << B); \
+ if (S > 0) \
+ x = _m_pslldi (x, S); \
+ else \
+ x = _m_psrldi (x, -S); \
+ if (T > 0) \
+ x = _m_pslldi (x, T); \
+ else \
+ x = _m_psrldi (x, -T); \
+ x &= (__m64) (1llu << (B + S + T)); \
+ if ((long long) x) \
+ abort(); \
+ }
+
+SHIFTU (shiftU1, 31, 1, -1)
+SHIFTU (shiftU2, 32, -1, 1)
+SHIFTU (shiftU3, 31, 1, 0)
+SHIFTU (shiftU4, 32, -1, 0)
+
+void add()
+{
+ volatile long long ONE = 1;
+ long long one = ONE;
+
+ __m64 a = (__m64) one;
+ __m64 b = (__m64) -one;
+ __m64 c = a + b;
+ if (0 == (long long) c)
+ abort();
+}
+
+void add2()
+{
+ volatile long long ONE = 1;
+ long long one = ONE;
+
+ __m64 a = (__m64) one;
+ __m64 b = (__m64) -one;
+ __m64 c = _m_paddd (a, b);
+ if (0 == (long long) c)
+ abort();
+}
+
+
+void mult1()
+{
+ volatile __m64 y = (__m64) 0ll;
+ __m64 x = y | (__m64) (1ll << 32);
+ x = x * (__m64) 1ll;
+ x &= (__m64) (1ll << 32);
+ if (0 != (long long) x)
+ abort();
+}
+
+
+void mult2()
+{
+ volatile int foo = 1;
+ unsigned long long one = foo & 1;
+
+ __m64 x = (__m64) (one << 16);
+ x *= x;
+ x &= (__m64) (1ll << 32);
+ if (0 != (long long) x)
+ abort();
+}
+
+
+void mult3()
+{
+ volatile __m64 y = (__m64) (1ll << 32);
+ __m64 a = y;
+ __m64 b = y * (__m64) 1ll;
+ if (((long long) a) == (long long) b)
+ abort();
+}
+
+
+void div()
+{
+ volatile __m64 y = (__m64) 0ll;
+ __m64 x = y | (__m64) (1ull << 32);
+ x |= (__m64) 1ull;
+ x = x / x;
+ if (1ll == (long long) x)
+ abort();
+}
+
+
+void mmx_test (void)
+{
+ Sshift();
+ shiftU1();
+ shiftU2();
+ shiftU3();
+ shiftU4();
+ add();
+ add2();
+
+ mult1();
+ mult2();
+ mult3();
+ div();
+}
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-11-05 7:57 ` Ralph Loader
@ 2008-11-05 9:21 ` Richard Guenther
2008-11-13 21:52 ` Kaveh R. GHAZI
0 siblings, 1 reply; 12+ messages in thread
From: Richard Guenther @ 2008-11-05 9:21 UTC (permalink / raw)
To: Ralph Loader; +Cc: gcc-patches
On Wed, Nov 5, 2008 at 8:56 AM, Ralph Loader <suckfish@ihug.co.nz> wrote:
> [Sorry for the delay on following up on this]
>
>> The rtlanal.c parts are ok. I wonder if we ever hit the _or_truncate
>> case with vector modes in force_to_mode though - can you check if
>> using gen_lowpart instead of gen_lowpart_or_truncate works?
>
> I checked this and gen_lowpart instead of gen_lowpart_or_truncate
> appears fine.
>
> Attached is patch modified to use gen_lowpart, and with the test case
> cleaned up & changeloged.
The patch is ok this way.
Thanks,
Richard.
> Cheers,
> Ralph.
>
>
>>
>> Btw, you missa ChangeLog entry for the testcase for
>> testsuite/ChangeLog.
>>
>> Thanks,
>> Richard.
>>
>> > 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
>> >
>> > PR middle-end/37807, middle-end/37809
>> > * combine.c (force_to_mode): Do not process vector types.
>> >
>> > * rtlanal.c (nonzero_bits1): Do not process vector types.
>> > (num_sign_bit_copies1): Likewise.
>> >
>> > Cheers,
>> > Ralph.
>> >
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-11-05 9:21 ` Richard Guenther
@ 2008-11-13 21:52 ` Kaveh R. GHAZI
2008-11-13 22:43 ` Uros Bizjak
2008-11-13 22:54 ` Uros Bizjak
0 siblings, 2 replies; 12+ messages in thread
From: Kaveh R. GHAZI @ 2008-11-13 21:52 UTC (permalink / raw)
To: Richard Guenther; +Cc: Ralph Loader, gcc-patches, ubizjak
On Wed, 5 Nov 2008, Richard Guenther wrote:
> > I checked this and gen_lowpart instead of gen_lowpart_or_truncate
> > appears fine.
> >
> > Attached is patch modified to use gen_lowpart, and with the test case
> > cleaned up & changeloged.
>
> The patch is ok this way.
>
> Thanks,
> Richard.
>
> >> > 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
> >> >
> >> > PR middle-end/37807, middle-end/37809
> >> > * combine.c (force_to_mode): Do not process vector types.
> >> >
> >> > * rtlanal.c (nonzero_bits1): Do not process vector types.
> >> > (num_sign_bit_copies1): Likewise.
The testcase mmx-8.c crashes on the 4.3/4.2 branches, e.g.
http://gcc.gnu.org/ml/gcc-testresults/2008-11/msg01056.html
Was the patch tested on the branches prior to installation?
--Kaveh
--
Kaveh R. Ghazi ghazi@caip.rutgers.edu
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-11-13 21:52 ` Kaveh R. GHAZI
@ 2008-11-13 22:43 ` Uros Bizjak
2008-11-13 22:54 ` Uros Bizjak
1 sibling, 0 replies; 12+ messages in thread
From: Uros Bizjak @ 2008-11-13 22:43 UTC (permalink / raw)
To: Kaveh R. GHAZI; +Cc: Richard Guenther, Ralph Loader, gcc-patches
Kaveh R. GHAZI wrote:
>>> I checked this and gen_lowpart instead of gen_lowpart_or_truncate
>>> appears fine.
>>>
>>> Attached is patch modified to use gen_lowpart, and with the test case
>>> cleaned up & changeloged.
>>>
>> The patch is ok this way.
>>
>> Thanks,
>> Richard.
>>
>>
>>>>> 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
>>>>>
>>>>> PR middle-end/37807, middle-end/37809
>>>>> * combine.c (force_to_mode): Do not process vector types.
>>>>>
>>>>> * rtlanal.c (nonzero_bits1): Do not process vector types.
>>>>> (num_sign_bit_copies1): Likewise.
>>>>>
>
> The testcase mmx-8.c crashes on the 4.3/4.2 branches, e.g.
> http://gcc.gnu.org/ml/gcc-testresults/2008-11/msg01056.html
>
> Was the patch tested on the branches prior to installation?
>
Yes, but on i686-pc-linux-gnu only. You are testing on x86_64.
Uros.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-11-13 21:52 ` Kaveh R. GHAZI
2008-11-13 22:43 ` Uros Bizjak
@ 2008-11-13 22:54 ` Uros Bizjak
2008-11-19 20:32 ` Kaveh R. Ghazi
1 sibling, 1 reply; 12+ messages in thread
From: Uros Bizjak @ 2008-11-13 22:54 UTC (permalink / raw)
To: Kaveh R. GHAZI; +Cc: Richard Guenther, Ralph Loader, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1320 bytes --]
Kaveh R. GHAZI wrote:
>>>>> 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
>>>>>
>>>>> PR middle-end/37807, middle-end/37809
>>>>> * combine.c (force_to_mode): Do not process vector types.
>>>>>
>>>>> * rtlanal.c (nonzero_bits1): Do not process vector types.
>>>>> (num_sign_bit_copies1): Likewise.
>>>>>
>
> The testcase mmx-8.c crashes on the 4.3/4.2 branches, e.g.
> http://gcc.gnu.org/ml/gcc-testresults/2008-11/msg01056.html
>
The fix for rtl-optimization/36438 [1], [2] needs to be backported to
4.3 and 4.2 branch.
[1] http://gcc.gnu.org/PR36438
[2] http://gcc.gnu.org/ml/gcc-patches/2008-06/msg00268.html
2008-11-13 Uros Bizjak <ubizjak@gmail.com>
Backport from mainline:
2008-06-06 Uros Bizjak <ubizjak@gmail.com>
PR rtl-optimization/36438
* cse.c (fold_rtx) [ASHIFT, LSHIFTRT, ASHIFTRT]: Break out early
for vector shifts with constant scalar shift operands.
testsuite/ChangeLog:
2008-11-13 Uros Bizjak <ubizjak@gmail.com>
Backport from mainline:
2008-06-06 Uros Bizjak <ubizjak@gmail.com>
PR rtl-optimization/36438
* gcc.target/i386/pr36438.c
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32} and committed to 4.3 branch. It will be committed to 4.2 after
regression test ends.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 1115 bytes --]
Index: cse.c
===================================================================
--- cse.c (revision 141833)
+++ cse.c (working copy)
@@ -3505,6 +3505,11 @@
&& exact_log2 (- INTVAL (const_arg1)) >= 0)))
break;
+ /* ??? Vector mode shifts by scalar
+ shift operand are not supported yet. */
+ if (is_shift && VECTOR_MODE_P (mode))
+ break;
+
if (is_shift
&& (INTVAL (inner_const) >= GET_MODE_BITSIZE (mode)
|| INTVAL (inner_const) < 0))
Index: testsuite/gcc.target/i386/pr36438.c
===================================================================
--- testsuite/gcc.target/i386/pr36438.c (revision 0)
+++ testsuite/gcc.target/i386/pr36438.c (revision 0)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmmx" } */
+
+#include <mmintrin.h>
+
+extern __m64 SetS16 (unsigned short, unsigned short,
+ unsigned short, unsigned short);
+
+void
+foo (__m64 * dest)
+{
+ __m64 mask = SetS16 (0x00FF, 0xFF00, 0x0000, 0x00FF);
+
+ mask = _mm_slli_si64 (mask, 8);
+ mask = _mm_slli_si64 (mask, 8);
+
+ *dest = mask;
+
+ _mm_empty ();
+}
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-11-13 22:54 ` Uros Bizjak
@ 2008-11-19 20:32 ` Kaveh R. Ghazi
0 siblings, 0 replies; 12+ messages in thread
From: Kaveh R. Ghazi @ 2008-11-19 20:32 UTC (permalink / raw)
To: Uros Bizjak; +Cc: Richard Guenther, Ralph Loader, gcc-patches
From: "Uros Bizjak" <ubizjak@gmail.com>
> Kaveh R. GHAZI wrote:
>
>>>>>> 2008-10-19 Ralph Loader <suckfish@ihug.co.nz>
>>>>>>
>>>>>> PR middle-end/37807, middle-end/37809
>>>>>> * combine.c (force_to_mode): Do not process vector types.
>>>>>>
>>>>>> * rtlanal.c (nonzero_bits1): Do not process vector types.
>>>>>> (num_sign_bit_copies1): Likewise.
>>>>>>
>>
>> The testcase mmx-8.c crashes on the 4.3/4.2 branches, e.g.
>> http://gcc.gnu.org/ml/gcc-testresults/2008-11/msg01056.html
>>
> The fix for rtl-optimization/36438 [1], [2] needs to be backported to
> 4.3 and 4.2 branch.
>
> [1] http://gcc.gnu.org/PR36438
> [2] http://gcc.gnu.org/ml/gcc-patches/2008-06/msg00268.html
>
> 2008-11-13 Uros Bizjak <ubizjak@gmail.com>
>
> Backport from mainline:
> 2008-06-06 Uros Bizjak <ubizjak@gmail.com>
>
> PR rtl-optimization/36438
> * cse.c (fold_rtx) [ASHIFT, LSHIFTRT, ASHIFTRT]: Break out early
> for vector shifts with constant scalar shift operands.
>
> testsuite/ChangeLog:
>
> 2008-11-13 Uros Bizjak <ubizjak@gmail.com>
>
> Backport from mainline:
> 2008-06-06 Uros Bizjak <ubizjak@gmail.com>
>
> PR rtl-optimization/36438
> * gcc.target/i386/pr36438.c
>
> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
> {,-m32} and committed to 4.3 branch. It will be committed to 4.2 after
> regression test ends.
>
> Uros.
To confirm, the problem has been resolved on both branches. Thanks for the
quick fix.
--Kaveh
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-10-22 17:38 ` Andreas Schwab
@ 2008-10-22 18:26 ` Uros Bizjak
0 siblings, 0 replies; 12+ messages in thread
From: Uros Bizjak @ 2008-10-22 18:26 UTC (permalink / raw)
To: Andreas Schwab; +Cc: Ralph Loader, GCC Patches
Andreas Schwab wrote:
>>> +
>>> +
>>> +void Sshift()
>>> +{
>>> + volatile __m64 y = (__m64) 0xffffffffll;
>>> + __m64 x = y & (__m64) 0xffffffffll;
>>> + x = _m_psradi (x, 1);
>>> + x &= (__m64) 0x80000000ll;
>>> + if (0 == (long long) x)
>>>
>>>
>> Aliasing violation on x variable.
>>
>
> How is that an aliasing violation? There are no pointers involved.
>
Ouch, indeed.
Sorry for the noise...
Uros.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
2008-10-22 17:13 Uros Bizjak
@ 2008-10-22 17:38 ` Andreas Schwab
2008-10-22 18:26 ` Uros Bizjak
0 siblings, 1 reply; 12+ messages in thread
From: Andreas Schwab @ 2008-10-22 17:38 UTC (permalink / raw)
To: Uros Bizjak; +Cc: Ralph Loader, GCC Patches
Uros Bizjak <ubizjak@gmail.com> writes:
>> +
>> +
>> +void Sshift()
>> +{
>> + volatile __m64 y = (__m64) 0xffffffffll;
>> + __m64 x = y & (__m64) 0xffffffffll;
>> + x = _m_psradi (x, 1);
>> + x &= (__m64) 0x80000000ll;
>> + if (0 == (long long) x)
>>
>
> Aliasing violation on x variable.
How is that an aliasing violation? There are no pointers involved.
Andreas.
--
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, MaxfeldstraÃe 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
@ 2008-10-22 17:13 Uros Bizjak
2008-10-22 17:38 ` Andreas Schwab
0 siblings, 1 reply; 12+ messages in thread
From: Uros Bizjak @ 2008-10-22 17:13 UTC (permalink / raw)
To: Ralph Loader, GCC Patches
Hello!
> diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c
> new file mode 100644
> index 0000000..9d665f0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/mmx-8.c
> @@ -0,0 +1,136 @@
> +/* PR middle-end/37809 */
> +
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mmmx" } */
> +
> +#include <mmintrin.h>
> +
> +// Various tests of cases where it is incorrect to optimise vectors as if they
> +// were integers of the same width.
> +
> +extern void abort (void);
> +
> +/* #include <stdio.h> */
> +/* #define FAIL(...) printf (__VA_ARGS__); */
> +#define FAIL(...) abort();
>
Please remove the definitions above and simply call abort() at FAIL
site. Alternatively, you can check for DEBUG definition to enable
additional debug information (see i.e. gcc.target/i386/mmx-4.c)
> +
> +
> +void Sshift()
> +{
> + volatile __m64 y = (__m64) 0xffffffffll;
> + __m64 x = y & (__m64) 0xffffffffll;
> + x = _m_psradi (x, 1);
> + x &= (__m64) 0x80000000ll;
> + if (0 == (long long) x)
>
Aliasing violation on x variable. You should use union here and through
the source. You should check your testcase with -Wstrict-aliasing {,=n}.
> +int main()
> +{
> + Sshift();
> + shiftU1();
> + shiftU2();
> + shiftU3();
> + shiftU4();
> + add();
> + add2();
> +
> + mult1();
> + mult2();
> + mult3();
> + div();
> + return 0;
> +}
>
Please note, that since this is MMX runtime testcase, you should check
for MMX support at runtime. Please look into
gcc.target/i386/builtin-apply-mmx.c or gcc.target/i386/mmx-4.c how to
use support for runtime check, as present in mmx-check.h header.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2008-11-19 19:54 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-22 8:36 [PATCH] Fix for PR 37809 and 37807 Ralph Loader
2008-10-22 10:42 ` Richard Guenther
2008-11-05 7:57 ` Ralph Loader
2008-11-05 9:21 ` Richard Guenther
2008-11-13 21:52 ` Kaveh R. GHAZI
2008-11-13 22:43 ` Uros Bizjak
2008-11-13 22:54 ` Uros Bizjak
2008-11-19 20:32 ` Kaveh R. Ghazi
2008-10-22 13:49 ` Paolo Bonzini
2008-10-22 17:13 Uros Bizjak
2008-10-22 17:38 ` Andreas Schwab
2008-10-22 18:26 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).