public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix for PR 37809 and 37807
@ 2008-10-22  8:36 Ralph Loader
  2008-10-22 10:42 ` Richard Guenther
  2008-10-22 13:49 ` Paolo Bonzini
  0 siblings, 2 replies; 12+ messages in thread
From: Ralph Loader @ 2008-10-22  8:36 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 611 bytes --]

Hi,

This patch has a fix for prs 37807 (exponential time with MMX builtins)
and 37809 (wrong code with MMX builtins).

Could an expert check that I'm not inadvertantly disabling important
optimisations for vector types?  I believe the code in question was
only ever intended to be used for scalar types, but it is possible that
I've missed something.

2008-10-19  Ralph Loader  <suckfish@ihug.co.nz>

	PR middle-end/37807, middle-end/37809
	* combine.c (force_to_mode): Do not process vector types.

	* rtlanal.c (nonzero_bits1): Do not process vector types.
	(num_sign_bit_copies1): Likewise.

Cheers,
Ralph.

[-- Attachment #2: pr.diff --]
[-- Type: text/x-patch, Size: 5375 bytes --]

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 80b318f..0b2ccf1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2008-10-19  Ralph Loader  <suckfish@ihug.co.nz>
+
+	PR middle-end/37807, middle-end/37809
+	* combine.c (force_to_mode): Do not process vector types.
+
+	* rtlanal.c (nonzero_bits1): Do not process vector types.
+	(num_sign_bit_copies1): Likewise.
+
 2008-10-17  Andreas Krebbel  <krebbel1@de.ibm.com>
 
 	* c-parser.c (c_parser_binary_expression): Silence the
diff --git a/gcc/combine.c b/gcc/combine.c
index 55baf37..43f7cdc 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7321,6 +7321,10 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask,
       && (GET_MODE_MASK (GET_MODE (x)) & ~mask) == 0)
     return gen_lowpart (mode, x);
 
+  /* The arithmetic simplifications here do the wrong thing on vector modes.  */
+  if (VECTOR_MODE_P (mode) || VECTOR_MODE_P (GET_MODE (x)))
+      return gen_lowpart_or_truncate (mode, x);
+
   switch (code)
     {
     case CLOBBER:
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index b2038aa..5d9df2c 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3681,8 +3681,9 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
   enum rtx_code code;
   unsigned int mode_width = GET_MODE_BITSIZE (mode);
 
-  /* For floating-point values, assume all bits are needed.  */
-  if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode))
+  /* For floating-point and vector values, assume all bits are needed.  */
+  if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode)
+      || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
     return nonzero;
 
   /* If X is wider than MODE, use its mode instead.  */
@@ -4195,7 +4196,8 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x,
   if (mode == VOIDmode)
     mode = GET_MODE (x);
 
-  if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x)))
+  if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x))
+      || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode))
     return 1;
 
   /* For a smaller object, just ignore the high bits.  */
diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c
new file mode 100644
index 0000000..9d665f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mmx-8.c
@@ -0,0 +1,136 @@
+/* PR middle-end/37809 */
+
+/* { dg-do run } */
+/* { dg-options "-O2 -mmmx" } */
+
+#include <mmintrin.h>
+
+// Various tests of cases where it is incorrect to optimise vectors as if they
+// were integers of the same width.
+
+extern void abort (void);
+
+/* #include <stdio.h> */
+/* #define FAIL(...) printf (__VA_ARGS__); */
+#define FAIL(...) abort();
+
+
+void Sshift()
+{
+    volatile __m64 y = (__m64) 0xffffffffll;
+    __m64 x = y & (__m64) 0xffffffffll;
+    x = _m_psradi (x, 1);
+    x &= (__m64) 0x80000000ll;
+    if (0 == (long long) x)
+        FAIL ("Sshift\n");
+}
+
+#define SHIFTU(F,B,S,T)                         \
+    void F()                                    \
+    {                                           \
+        volatile __m64 y = (__m64) 0ll;         \
+        __m64 x = y | (__m64) (1llu << B);      \
+        if (S > 0)                              \
+            x = _m_pslldi (x, S);               \
+        else                                    \
+            x = _m_psrldi (x, -S);              \
+        if (T > 0)                              \
+            x = _m_pslldi (x, T);               \
+        else                                    \
+            x = _m_psrldi (x, -T);              \
+        x &= (__m64) (1llu << (B + S + T));     \
+        if ((long long) x)                      \
+            FAIL ("%s\n", #F);                  \
+    }
+
+SHIFTU (shiftU1, 31, 1, -1)
+SHIFTU (shiftU2, 32, -1, 1)
+SHIFTU (shiftU3, 31, 1, 0)
+SHIFTU (shiftU4, 32, -1, 0)
+
+void add()
+{
+    volatile long long ONE = 1;
+    long long one = ONE;
+
+    __m64 a = (__m64) one;
+    __m64 b = (__m64) -one;
+    __m64 c = a + b;
+    if (0 == (long long) c)
+        FAIL ("add\n");
+}
+
+void add2()
+{
+    volatile long long ONE = 1;
+    long long one = ONE;
+
+    __m64 a = (__m64) one;
+    __m64 b = (__m64) -one;
+    __m64 c = _m_paddd (a, b);
+    if (0 == (long long) c)
+        FAIL ("add2\n");
+}
+
+
+void mult1()
+{
+    volatile __m64 y = (__m64) 0ll;
+    __m64 x = y | (__m64) (1ll << 32);
+    x = x * (__m64) 1ll;
+    x &= (__m64) (1ll << 32);
+    if (0 != (long long) x)
+        FAIL ("multi1\n");
+}
+
+
+void mult2()
+{
+    volatile int foo = 1;
+    unsigned long long one = foo & 1;
+
+    __m64 x = (__m64) (one << 16);
+    x *= x;
+    x &= (__m64) (1ll << 32);
+    if (0 != (long long) x)
+        FAIL ("mult2\n");
+}
+
+
+void mult3()
+{
+    volatile __m64 y = (__m64) (1ll << 32);
+    __m64 a = y;
+    __m64 b = y * (__m64) 1ll;
+    if (((long long) a) == (long long) b)
+        FAIL ("mult3\n");
+}
+
+
+void div()
+{
+    volatile __m64 y = (__m64) 0ll;
+    __m64 x = y | (__m64) (1ull << 32);
+    x |= (__m64) 1ull;
+    x = x / x;
+    if (1ll == (long long) x)
+        FAIL ("div\n");
+}
+
+
+int main()
+{
+    Sshift();
+    shiftU1();
+    shiftU2();
+    shiftU3();
+    shiftU4();
+    add();
+    add2();
+
+    mult1();
+    mult2();
+    mult3();
+    div();
+    return 0;
+}

^ permalink raw reply	[flat|nested] 12+ messages in thread
* Re: [PATCH] Fix for PR 37809 and 37807
@ 2008-10-22 17:13 Uros Bizjak
  2008-10-22 17:38 ` Andreas Schwab
  0 siblings, 1 reply; 12+ messages in thread
From: Uros Bizjak @ 2008-10-22 17:13 UTC (permalink / raw)
  To: Ralph Loader, GCC Patches

Hello!

> diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c
> new file mode 100644
> index 0000000..9d665f0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/mmx-8.c
> @@ -0,0 +1,136 @@
> +/* PR middle-end/37809 */
> +
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mmmx" } */
> +
> +#include <mmintrin.h>
> +
> +// Various tests of cases where it is incorrect to optimise vectors as if they
> +// were integers of the same width.
> +
> +extern void abort (void);
> +
> +/* #include <stdio.h> */
> +/* #define FAIL(...) printf (__VA_ARGS__); */
> +#define FAIL(...) abort();
>   

Please remove the definitions above and simply call abort() at FAIL 
site. Alternatively, you can check for DEBUG definition to enable 
additional debug information (see i.e. gcc.target/i386/mmx-4.c)

> +
> +
> +void Sshift()
> +{
> +    volatile __m64 y = (__m64) 0xffffffffll;
> +    __m64 x = y & (__m64) 0xffffffffll;
> +    x = _m_psradi (x, 1);
> +    x &= (__m64) 0x80000000ll;
> +    if (0 == (long long) x)
>   

Aliasing violation on x variable. You should use union here and through 
the source. You should check your testcase with -Wstrict-aliasing {,=n}.

> +int main()
> +{
> +    Sshift();
> +    shiftU1();
> +    shiftU2();
> +    shiftU3();
> +    shiftU4();
> +    add();
> +    add2();
> +
> +    mult1();
> +    mult2();
> +    mult3();
> +    div();
> +    return 0;
> +}
>   

Please note, that since this is MMX runtime testcase, you should check 
for MMX support at runtime. Please look into 
gcc.target/i386/builtin-apply-mmx.c or gcc.target/i386/mmx-4.c how to 
use support for runtime check, as present in mmx-check.h header.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2008-11-19 19:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-22  8:36 [PATCH] Fix for PR 37809 and 37807 Ralph Loader
2008-10-22 10:42 ` Richard Guenther
2008-11-05  7:57   ` Ralph Loader
2008-11-05  9:21     ` Richard Guenther
2008-11-13 21:52       ` Kaveh R. GHAZI
2008-11-13 22:43         ` Uros Bizjak
2008-11-13 22:54         ` Uros Bizjak
2008-11-19 20:32           ` Kaveh R. Ghazi
2008-10-22 13:49 ` Paolo Bonzini
2008-10-22 17:13 Uros Bizjak
2008-10-22 17:38 ` Andreas Schwab
2008-10-22 18:26   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).