public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug inline-asm/42881]  New: SSE2 intrinsics miscompiled at -O0 -march=k8
@ 2010-01-27 11:03 bugs at 59A2 dot org
  2010-01-27 16:17 ` [Bug middle-end/42881] [4.5 Regression] " rguenth at gcc dot gnu dot org
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: bugs at 59A2 dot org @ 2010-01-27 11:03 UTC (permalink / raw)
  To: gcc-bugs

A simple test program, a.c:

#include <stdio.h>
#include <emmintrin.h>
int main(void) {                                                                
  double a[2];                                                                  
  __m128d x = _mm_set1_pd(3);                                                   
  _mm_storeu_pd(a,x);                                                           
  printf("%f %f\n",a[0],a[1]);                                                  
  return 0;                                                                     
}

$ gcc-4.5 -O0 -march=k8 a.c && ./a.out  # broken
0.000000 0.000000
$ gcc-4.5 -O1 -march=k8 a.c && ./a.out  # good
3.000000 3.000000
$ gcc-4.5 -O0 -march=core2 a.c && ./a.out # good
3.000000 3.000000

$ gcc-4.5 -O0 -march=k8 -c a.c && objdump -d -M intel --prefix-addresses a.o |
grep main
0000000000000000 <main> push   rbp
0000000000000001 <main+0x1> mov    rbp,rsp
0000000000000004 <main+0x4> sub    rsp,0x40
0000000000000008 <main+0x8> mov    rax,0x4008000000000000
0000000000000012 <main+0x12> mov    QWORD PTR [rbp-0x8],rax
0000000000000016 <main+0x16> movsd  xmm2,xmm1
000000000000001a <main+0x1a> unpcklpd xmm2,xmm2
000000000000001e <main+0x1e> movapd xmm0,xmm2
0000000000000022 <main+0x22> movlpd xmm1,QWORD PTR [rbp-0x8]
0000000000000027 <main+0x27> movaps XMMWORD PTR [rbp-0x40],xmm0
000000000000002b <main+0x2b> movapd xmm0,XMMWORD PTR [rbp-0x40]
0000000000000030 <main+0x30> lea    rax,[rbp-0x30]
0000000000000034 <main+0x34> mov    QWORD PTR [rbp-0x10],rax
0000000000000038 <main+0x38> movaps XMMWORD PTR [rbp-0x20],xmm0
000000000000003c <main+0x3c> mov    rax,QWORD PTR [rbp-0x10]
0000000000000040 <main+0x40> movapd xmm0,XMMWORD PTR [rbp-0x20]
0000000000000045 <main+0x45> movupd XMMWORD PTR [rax],xmm0
0000000000000049 <main+0x49> movlpd xmm1,QWORD PTR [rbp-0x28]
000000000000004e <main+0x4e> movlpd xmm0,QWORD PTR [rbp-0x30]
0000000000000053 <main+0x53> mov    eax,0x0
0000000000000058 <main+0x58> mov    rdi,rax
000000000000005b <main+0x5b> mov    eax,0x2
0000000000000060 <main+0x60> call   0000000000000065 <main+0x65>
0000000000000065 <main+0x65> mov    eax,0x0
000000000000006a <main+0x6a> leave  
000000000000006b <main+0x6b> ret

$ gcc-4.5 -O0 -march=core2 -c a.c && objdump -d -M intel --prefix-addresses a.o
| grep main
0000000000000000 <main> push   rbp
0000000000000001 <main+0x1> mov    rbp,rsp
0000000000000004 <main+0x4> sub    rsp,0x40
0000000000000008 <main+0x8> mov    rax,0x4008000000000000
0000000000000012 <main+0x12> mov    QWORD PTR [rbp-0x8],rax
0000000000000016 <main+0x16> movddup xmm0,QWORD PTR [rbp-0x8]
000000000000001b <main+0x1b> movapd XMMWORD PTR [rbp-0x40],xmm0
0000000000000020 <main+0x20> movapd xmm0,XMMWORD PTR [rbp-0x40]
0000000000000025 <main+0x25> lea    rax,[rbp-0x30]
0000000000000029 <main+0x29> mov    QWORD PTR [rbp-0x10],rax
000000000000002d <main+0x2d> movapd XMMWORD PTR [rbp-0x20],xmm0
0000000000000032 <main+0x32> mov    rax,QWORD PTR [rbp-0x10]
0000000000000036 <main+0x36> movapd xmm0,XMMWORD PTR [rbp-0x20]
000000000000003b <main+0x3b> movupd XMMWORD PTR [rax],xmm0
000000000000003f <main+0x3f> mov    rdx,QWORD PTR [rbp-0x28]
0000000000000043 <main+0x43> movsd  xmm0,QWORD PTR [rbp-0x30]
0000000000000048 <main+0x48> mov    eax,0x0
000000000000004d <main+0x4d> movq   xmm1,rdx
0000000000000052 <main+0x52> mov    rdi,rax
0000000000000055 <main+0x55> mov    eax,0x2
000000000000005a <main+0x5a> call   000000000000005f <main+0x5f>
000000000000005f <main+0x5f> mov    eax,0x0
0000000000000064 <main+0x64> leave  
0000000000000065 <main+0x65> ret

The incorrect bit is

0000000000000016 <main+0x16> movsd  xmm2,xmm1
000000000000001a <main+0x1a> unpcklpd xmm2,xmm2
000000000000001e <main+0x1e> movapd xmm0,xmm2
0000000000000022 <main+0x22> movlpd xmm1,QWORD PTR [rbp-0x8]
0000000000000027 <main+0x27> movaps XMMWORD PTR [rbp-0x40],xmm0

which is corrected by -march=core2 to

0000000000000016 <main+0x16> movddup xmm0,QWORD PTR [rbp-0x8]
000000000000001b <main+0x1b> movapd XMMWORD PTR [rbp-0x40],xmm0

Of course all the redundant stores are collapsed at any positive optimization
level, and the result becomes correct regardless of -march.  Unfortunately, the
bug is in the generic x86-64 target so it's highly visible.  This bug is not
present in 4.4.2.

$ gcc-4.5 -v
Using built-in specs.
COLLECT_GCC=gcc-4.5
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-unknown-linux-gnu/4.5.0/lto-wrapper
Target: x86_64-unknown-linux-gnu
Configured with: ../configure --prefix=/usr --enable-languages=c,c++,fortran
--enable-gold --enable-plugin --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-lto --enable-gnu-unique-object --disable-multilib
--disable-libstdcxx-pch --with-tune=generic --with-system-zlib --with-ppl
--with-cloog --libdir=/usr/lib --libexecdir=/usr/lib --mandir=/usr/share/man
--infodir=/usr/share/info --disable-werror --enable-checking=release
--program-suffix=-4.5 --enable-version-specific-runtime-libs : (reconfigured)
../configure --prefix=/usr --enable-languages=c,c++,fortran --enable-gold
--enable-plugin --enable-threads=posix --enable-__cxa_atexit
--enable-clocale=gnu --enable-lto --enable-gnu-unique-object --disable-multilib
--disable-libstdcxx-pch --with-system-zlib --with-ppl --with-cloog
--libdir=/usr/lib --libexecdir=/usr/lib --mandir=/usr/share/man
--infodir=/usr/share/info --disable-werror --enable-checking=release
--program-suffix=-4.5 --enable-version-specific-runtime-libs
Thread model: posix
gcc version 4.5.0 20100121 (experimental) (GCC)


-- 
           Summary: SSE2 intrinsics miscompiled at -O0 -march=k8
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: major
          Priority: P3
         Component: inline-asm
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: bugs at 59A2 dot org
 GCC build triplet: x86_64-unknown-linux-gnu
  GCC host triplet: x86_64-unknown-linux-gnu
GCC target triplet: x86_64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Bug middle-end/42881] [4.5 Regression] SSE2 intrinsics miscompiled at -O0 -march=k8
  2010-01-27 11:03 [Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8 bugs at 59A2 dot org
@ 2010-01-27 16:17 ` rguenth at gcc dot gnu dot org
  2010-01-27 17:29 ` [Bug target/42881] " matz at gcc dot gnu dot org
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: rguenth at gcc dot gnu dot org @ 2010-01-27 16:17 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #1 from rguenth at gcc dot gnu dot org  2010-01-27 16:16 -------
At -O0 I see in .optimized:

<bb 2>:
  __F_7 = 3.0e+0;
  D.6259_8 = {__F_7, __F_7};
  D.6258_11 = D.6259_8;
  x.0_1 = D.6258_11;
  x = x.0_1;
  x.1_2 = x;
  __P_9 = &a[0];
  __A_10 = x.1_2;
  __builtin_ia32_storeupd (__P_9, __A_10);
  D.6247_3 = a[1];
  D.6248_4 = a[0];
  D.6249_5 = (const char * restrict) &"%f %f\n"[0];
  printf (D.6249_5, D.6248_4, D.6247_3);
  D.6250_6 = 0;
  return D.6250_6;

nothing wrong here sofar.  But we expand:

;; D.6259_8 = {__F_7, __F_7};

(insn 7 6 8 /usr/lib64/gcc/x86_64-suse-linux/4.5/include/emmintrin.h:65 (set
(reg:V2DF 68)
        (vec_duplicate:V2DF (reg:DF 69))) 1501 {vec_dupv2df} (nil))

(insn 8 7 9 /usr/lib64/gcc/x86_64-suse-linux/4.5/include/emmintrin.h:65 (set
(reg:DF 69)
        (mem/c/i:DF (plus:DI (reg/f:DI 54 virtual-stack-vars)
                (const_int -8 [0xfffffffffffffff8])) [0 __F+0 S8 A64])) -1
(nil))

(insn 9 8 0 /usr/lib64/gcc/x86_64-suse-linux/4.5/include/emmintrin.h:65 (set
(reg:V2DF 64 [ D.6259 ])
        (reg:V2DF 68)) -1 (nil))

whoops.  We duplicate reg:DF 69 before setting it.  That works ok in 4.4.

My bet is SSA expand.


-- 

rguenth at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |matz at gcc dot gnu dot org
             Status|UNCONFIRMED                 |NEW
          Component|inline-asm                  |middle-end
     Ever Confirmed|0                           |1
           Keywords|                            |wrong-code
           Priority|P3                          |P1
   Last reconfirmed|0000-00-00 00:00:00         |2010-01-27 16:16:46
               date|                            |
            Summary|SSE2 intrinsics miscompiled |[4.5 Regression] SSE2
                   |at -O0 -march=k8            |intrinsics miscompiled at -
                   |                            |O0 -march=k8
   Target Milestone|---                         |4.5.0


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Bug target/42881] [4.5 Regression] SSE2 intrinsics miscompiled at -O0 -march=k8
  2010-01-27 11:03 [Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8 bugs at 59A2 dot org
  2010-01-27 16:17 ` [Bug middle-end/42881] [4.5 Regression] " rguenth at gcc dot gnu dot org
@ 2010-01-27 17:29 ` matz at gcc dot gnu dot org
  2010-01-28 14:12 ` matz at gcc dot gnu dot org
  2010-01-28 14:40 ` matz at gcc dot gnu dot org
  3 siblings, 0 replies; 5+ messages in thread
From: matz at gcc dot gnu dot org @ 2010-01-27 17:29 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #2 from matz at gcc dot gnu dot org  2010-01-27 17:28 -------
I'm testing a patch.  It's target code, not ssa expand.


-- 

matz at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         AssignedTo|unassigned at gcc dot gnu   |matz at gcc dot gnu dot org
                   |dot org                     |
             Status|NEW                         |ASSIGNED
          Component|middle-end                  |target
   Last reconfirmed|2010-01-27 16:16:46         |2010-01-27 17:28:56
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Bug target/42881] [4.5 Regression] SSE2 intrinsics miscompiled at -O0 -march=k8
  2010-01-27 11:03 [Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8 bugs at 59A2 dot org
  2010-01-27 16:17 ` [Bug middle-end/42881] [4.5 Regression] " rguenth at gcc dot gnu dot org
  2010-01-27 17:29 ` [Bug target/42881] " matz at gcc dot gnu dot org
@ 2010-01-28 14:12 ` matz at gcc dot gnu dot org
  2010-01-28 14:40 ` matz at gcc dot gnu dot org
  3 siblings, 0 replies; 5+ messages in thread
From: matz at gcc dot gnu dot org @ 2010-01-28 14:12 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #3 from matz at gcc dot gnu dot org  2010-01-28 14:12 -------
Subject: Bug 42881

Author: matz
Date: Thu Jan 28 14:11:34 2010
New Revision: 156320

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=156320
Log:
        PR target/42881
        * config/i386/i386.c (ix86_expand_vector_init_duplicate):
        Wrap force_reg into a sequence, emit it before user.

testsuite/
        * gcc.target/i386/pr42881.c: New test.

Added:
    trunk/gcc/testsuite/gcc.target/i386/pr42881.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/config/i386/i386.c
    trunk/gcc/testsuite/ChangeLog


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Bug target/42881] [4.5 Regression] SSE2 intrinsics miscompiled at -O0 -march=k8
  2010-01-27 11:03 [Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8 bugs at 59A2 dot org
                   ` (2 preceding siblings ...)
  2010-01-28 14:12 ` matz at gcc dot gnu dot org
@ 2010-01-28 14:40 ` matz at gcc dot gnu dot org
  3 siblings, 0 replies; 5+ messages in thread
From: matz at gcc dot gnu dot org @ 2010-01-28 14:40 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #4 from matz at gcc dot gnu dot org  2010-01-28 14:40 -------
Fixed.


-- 

matz at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |RESOLVED
         Resolution|                            |FIXED


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42881


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-01-28 14:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-01-27 11:03 [Bug inline-asm/42881] New: SSE2 intrinsics miscompiled at -O0 -march=k8 bugs at 59A2 dot org
2010-01-27 16:17 ` [Bug middle-end/42881] [4.5 Regression] " rguenth at gcc dot gnu dot org
2010-01-27 17:29 ` [Bug target/42881] " matz at gcc dot gnu dot org
2010-01-28 14:12 ` matz at gcc dot gnu dot org
2010-01-28 14:40 ` matz at gcc dot gnu dot org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).