From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 20951 invoked by alias); 22 Oct 2008 05:13:24 -0000 Received: (qmail 20921 invoked by uid 22791); 22 Oct 2008 05:13:22 -0000 X-Spam-Check-By: sourceware.org Received: from mailfilter11.ihug.co.nz (HELO mailfilter11.ihug.co.nz) (203.109.136.11) by sourceware.org (qpsmtpd/0.31) with ESMTP; Wed, 22 Oct 2008 05:12:39 +0000 X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: Aq8EAART/kh2XIF0/2dsb2JhbACBcsQvg1CBCg X-IronPort-AV: E=Sophos;i="4.33,461,1220184000"; d="diff'?scan'208";a="138631357" Received: from 118-92-129-116.dsl.dyn.ihug.co.nz (HELO i.geek.nz) ([118.92.129.116]) by smtp.mailfilter5.ihug.co.nz with SMTP; 22 Oct 2008 18:12:35 +1300 Date: Wed, 22 Oct 2008 08:36:00 -0000 From: Ralph Loader To: gcc-patches@gcc.gnu.org Subject: [PATCH] Fix for PR 37809 and 37807 Message-ID: <20081022181234.2acf7548@i.geek.nz> X-Mailer: Claws Mail 3.6.0 (GTK+ 2.14.4; x86_64-redhat-linux-gnu) Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="MP_/HJLC91qddcc.aS095i3utWA" X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2008-10/txt/msg00912.txt.bz2 --MP_/HJLC91qddcc.aS095i3utWA Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Content-Disposition: inline Content-length: 611 Hi, This patch has a fix for prs 37807 (exponential time with MMX builtins) and 37809 (wrong code with MMX builtins). Could an expert check that I'm not inadvertantly disabling important optimisations for vector types? I believe the code in question was only ever intended to be used for scalar types, but it is possible that I've missed something. 2008-10-19 Ralph Loader PR middle-end/37807, middle-end/37809 * combine.c (force_to_mode): Do not process vector types. * rtlanal.c (nonzero_bits1): Do not process vector types. (num_sign_bit_copies1): Likewise. Cheers, Ralph. --MP_/HJLC91qddcc.aS095i3utWA Content-Type: text/x-patch; name=pr.diff Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename=pr.diff Content-length: 5375 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 80b318f..0b2ccf1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2008-10-19 Ralph Loader + + PR middle-end/37807, middle-end/37809 + * combine.c (force_to_mode): Do not process vector types. + + * rtlanal.c (nonzero_bits1): Do not process vector types. + (num_sign_bit_copies1): Likewise. + 2008-10-17 Andreas Krebbel * c-parser.c (c_parser_binary_expression): Silence the diff --git a/gcc/combine.c b/gcc/combine.c index 55baf37..43f7cdc 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -7321,6 +7321,10 @@ force_to_mode (rtx x, enum machine_mode mode, unsigned HOST_WIDE_INT mask, && (GET_MODE_MASK (GET_MODE (x)) & ~mask) == 0) return gen_lowpart (mode, x); + /* The arithmetic simplifications here do the wrong thing on vector modes. */ + if (VECTOR_MODE_P (mode) || VECTOR_MODE_P (GET_MODE (x))) + return gen_lowpart_or_truncate (mode, x); + switch (code) { case CLOBBER: diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index b2038aa..5d9df2c 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -3681,8 +3681,9 @@ nonzero_bits1 (const_rtx x, enum machine_mode mode, const_rtx known_x, enum rtx_code code; unsigned int mode_width = GET_MODE_BITSIZE (mode); - /* For floating-point values, assume all bits are needed. */ - if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode)) + /* For floating-point and vector values, assume all bits are needed. */ + if (FLOAT_MODE_P (GET_MODE (x)) || FLOAT_MODE_P (mode) + || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode)) return nonzero; /* If X is wider than MODE, use its mode instead. */ @@ -4195,7 +4196,8 @@ num_sign_bit_copies1 (const_rtx x, enum machine_mode mode, const_rtx known_x, if (mode == VOIDmode) mode = GET_MODE (x); - if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x))) + if (mode == VOIDmode || FLOAT_MODE_P (mode) || FLOAT_MODE_P (GET_MODE (x)) + || VECTOR_MODE_P (GET_MODE (x)) || VECTOR_MODE_P (mode)) return 1; /* For a smaller object, just ignore the high bits. */ diff --git a/gcc/testsuite/gcc.target/i386/mmx-8.c b/gcc/testsuite/gcc.target/i386/mmx-8.c new file mode 100644 index 0000000..9d665f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mmx-8.c @@ -0,0 +1,136 @@ +/* PR middle-end/37809 */ + +/* { dg-do run } */ +/* { dg-options "-O2 -mmmx" } */ + +#include + +// Various tests of cases where it is incorrect to optimise vectors as if they +// were integers of the same width. + +extern void abort (void); + +/* #include */ +/* #define FAIL(...) printf (__VA_ARGS__); */ +#define FAIL(...) abort(); + + +void Sshift() +{ + volatile __m64 y = (__m64) 0xffffffffll; + __m64 x = y & (__m64) 0xffffffffll; + x = _m_psradi (x, 1); + x &= (__m64) 0x80000000ll; + if (0 == (long long) x) + FAIL ("Sshift\n"); +} + +#define SHIFTU(F,B,S,T) \ + void F() \ + { \ + volatile __m64 y = (__m64) 0ll; \ + __m64 x = y | (__m64) (1llu << B); \ + if (S > 0) \ + x = _m_pslldi (x, S); \ + else \ + x = _m_psrldi (x, -S); \ + if (T > 0) \ + x = _m_pslldi (x, T); \ + else \ + x = _m_psrldi (x, -T); \ + x &= (__m64) (1llu << (B + S + T)); \ + if ((long long) x) \ + FAIL ("%s\n", #F); \ + } + +SHIFTU (shiftU1, 31, 1, -1) +SHIFTU (shiftU2, 32, -1, 1) +SHIFTU (shiftU3, 31, 1, 0) +SHIFTU (shiftU4, 32, -1, 0) + +void add() +{ + volatile long long ONE = 1; + long long one = ONE; + + __m64 a = (__m64) one; + __m64 b = (__m64) -one; + __m64 c = a + b; + if (0 == (long long) c) + FAIL ("add\n"); +} + +void add2() +{ + volatile long long ONE = 1; + long long one = ONE; + + __m64 a = (__m64) one; + __m64 b = (__m64) -one; + __m64 c = _m_paddd (a, b); + if (0 == (long long) c) + FAIL ("add2\n"); +} + + +void mult1() +{ + volatile __m64 y = (__m64) 0ll; + __m64 x = y | (__m64) (1ll << 32); + x = x * (__m64) 1ll; + x &= (__m64) (1ll << 32); + if (0 != (long long) x) + FAIL ("multi1\n"); +} + + +void mult2() +{ + volatile int foo = 1; + unsigned long long one = foo & 1; + + __m64 x = (__m64) (one << 16); + x *= x; + x &= (__m64) (1ll << 32); + if (0 != (long long) x) + FAIL ("mult2\n"); +} + + +void mult3() +{ + volatile __m64 y = (__m64) (1ll << 32); + __m64 a = y; + __m64 b = y * (__m64) 1ll; + if (((long long) a) == (long long) b) + FAIL ("mult3\n"); +} + + +void div() +{ + volatile __m64 y = (__m64) 0ll; + __m64 x = y | (__m64) (1ull << 32); + x |= (__m64) 1ull; + x = x / x; + if (1ll == (long long) x) + FAIL ("div\n"); +} + + +int main() +{ + Sshift(); + shiftU1(); + shiftU2(); + shiftU3(); + shiftU4(); + add(); + add2(); + + mult1(); + mult2(); + mult3(); + div(); + return 0; +} --MP_/HJLC91qddcc.aS095i3utWA--