public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/50444] New: unaligned movdqa instruction after inlining
@ 2011-09-17 17:12 john.salmon at deshaw dot com
  2011-09-25 15:45 ` [Bug c/50444] " john.salmon at deshaw dot com
                   ` (21 more replies)
  0 siblings, 22 replies; 23+ messages in thread
From: john.salmon at deshaw dot com @ 2011-09-17 17:12 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

             Bug #: 50444
           Summary: unaligned movdqa instruction after inlining
    Classification: Unclassified
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: john.salmon@deshaw.com


Created attachment 25309
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=25309
C example

On Linux, x86_64, the attached code segfaults when compiled with
gcc4.6.1 and gcc4.6.0.  There's no problem with gcc-4.5.3, nor
when -O1 is removed.

gdb reports that the instruction at the pc when it segfaults is:

   movdqa %xmm0, 0x28(%rbx)

and that 0x28(%rbx) is only 8-bit aligned.  It looks like the
alignment requirement of the movdqa instruction has somehow been
"forgotten" in the optimization that inlined the call to both caster()
and ssefunc() and eliminated the memcpys.

salmonj@drdlogin0039.en.desres$ desres-cleanenv -m gcc/4.6.1-23A/bin gcc -Wall
-O1 e2.c
salmonj@drdlogin0039.en.desres$ a.out
Segmentation fault (core dumped)
salmonj@drdlogin0039.en.desres$ gdb a.out
GNU gdb (GDB) 7.0.1
Copyright (C) 2009 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-unknown-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from /d/en/salmonj-0/junk/misalign/a.out...done.
(gdb) r
Starting program: /d/en/salmonj-0/junk/misalign/a.out 
warning: no loadable sections found in added symbol-file system-supplied DSO at
0x2aaaaaaab000

Program received signal SIGSEGV, Segmentation fault.
_mm_xor_si128 (e=0x7fffffffc070) at e2.c:51
51            e->v = caster(*incr(&e->c), e->key);
(gdb) x /10i $pc
0x400504 <method+28>:    movdqa %xmm0,0x28(%rbx)
0x400509 <method+33>:    movq   $0x4,0x20(%rbx)
0x400511 <method+41>:    mov    0x20(%rbx),%rax
0x400515 <method+45>:    lea    -0x1(%rax),%rdx
0x400519 <method+49>:    mov    %rdx,0x20(%rbx)
0x40051d <method+53>:    mov    0x24(%rbx,%rax,4),%eax
0x400521 <method+57>:    pop    %rbx
0x400522 <method+58>:    retq   
0x400523 <main>:    push   %rbx
0x400524 <main+1>:    add    $0xffffffffffffff80,%rsp
(gdb) info reg
rax            0x7fffffffc080    140737488339072
rbx            0x7fffffffc070    140737488339056
rcx            0x400570    4195696
rdx            0x7fffffffc1a8    140737488339368
rsi            0x7fffffffc198    140737488339352
rdi            0x7fffffffc080    140737488339072
rbp            0x0    0x0
rsp            0x7fffffffc020    0x7fffffffc020
r8             0x3f8b3532d0    272918459088
r9             0x3f8ac0d730    272910833456
r10            0x0    0
r11            0x3f8b01d8a0    272915093664
r12            0x0    0
r13            0x7fffffffc190    140737488339344
r14            0x0    0
r15            0x0    0
rip            0x400504    0x400504 <method+28>
eflags         0x10202    [ IF RF ]
cs             0x33    51
ss             0x2b    43
ds             0x0    0
es             0x0    0
fs             0x0    0
gs             0x0    0
fctrl          0x37f    895
fstat          0x0    0
ftag           0xffff    65535
fiseg          0x0    0
fioff          0x0    0
foseg          0x0    0
fooff          0x0    0
fop            0x0    0
mxcsr          0x1f80    [ IM DM ZM OM UM PM ]
(gdb)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] unaligned movdqa instruction after inlining
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
@ 2011-09-25 15:45 ` john.salmon at deshaw dot com
  2011-09-25 18:06 ` rguenth at gcc dot gnu.org
                   ` (20 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: john.salmon at deshaw dot com @ 2011-09-25 15:45 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #1 from John Salmon <john.salmon at deshaw dot com> 2011-09-25 15:22:07 UTC ---
Here's a slightly smaller test case.  The problem is the 'movdqa'.  According
the x86-64 ABI, rsp+8 is 16-bit aligned at the entry to main, and therefore so
is %rdi when we try to execute 
   movdqa %xmm0, (%rdi)
resulting in segv.


thsalmonj@drdlogin0039.en.desres$ cat e2.c
#include <stdint.h>
#include <emmintrin.h>
#include <string.h>
struct a4x32{
      uint32_t v[4];
};
struct a1xm128i{
      __m128i m;
};
static struct a4x32 zero () {
    struct a1xm128i c1x128;
    struct a4x32 c4x32;
    c1x128.m = _mm_setzero_si128();
    memcpy (&c4x32.v[0], &c1x128.m, sizeof (c4x32));
    return c4x32;
}
struct S {
     struct a4x32 v;
};
void method (struct S * e) {
     e->v = zero ();
}
int main (int argc, char **argv) {
    struct S e;
    method(&e);
    return e.v.v[0];
}
salmonj@drdlogin0039.en.desres$ desres-cleanenv -m gcc/4.6.1-23A/bin gcc -Wall
-O -std=c99 -pedantic -S e2.c
salmonj@drdlogin0039.en.desres$ desres-cleanenv -m gcc/4.6.1-23A/bin gcc e2.s
salmonj@drdlogin0039.en.desres$ ./a.out
Segmentation fault (core dumped)
salmonj@drdlogin0039.en.desres$ cat e2.s
    .file    "e2.c"
    .text
    .globl    method
    .type    method, @function
method:
.LFB522:
    .cfi_startproc
    pxor    %xmm0, %xmm0
    movdqa    %xmm0, (%rdi)
    ret
    .cfi_endproc
.LFE522:
    .size    method, .-method
    .globl    main
    .type    main, @function
main:
.LFB523:
    .cfi_startproc
    subq    $16, %rsp
    .cfi_def_cfa_offset 24
    movq    %rsp, %rdi
    call    method
    movl    (%rsp), %eax
    addq    $16, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE523:
    .size    main, .-main
    .ident    "GCC: (GNU) 4.6.1"
    .section    .note.GNU-stack,"",@progbits
salmonj@drdlogin0039.en.desres$


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] unaligned movdqa instruction after inlining
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
  2011-09-25 15:45 ` [Bug c/50444] " john.salmon at deshaw dot com
@ 2011-09-25 18:06 ` rguenth at gcc dot gnu.org
  2011-09-26 17:46 ` hjl.tools at gmail dot com
                   ` (19 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2011-09-25 18:06 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Richard Guenther <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2011-09-25
                 CC|                            |hjl at gcc dot gnu.org,
                   |                            |rguenth at gcc dot gnu.org
     Ever Confirmed|0                           |1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] unaligned movdqa instruction after inlining
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
  2011-09-25 15:45 ` [Bug c/50444] " john.salmon at deshaw dot com
  2011-09-25 18:06 ` rguenth at gcc dot gnu.org
@ 2011-09-26 17:46 ` hjl.tools at gmail dot com
  2011-09-26 17:56 ` [Bug c/50444] -ftree-isa ignores alignment hjl.tools at gmail dot com
                   ` (18 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: hjl.tools at gmail dot com @ 2011-09-26 17:46 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #2 from H.J. Lu <hjl.tools at gmail dot com> 2011-09-26 17:24:47 UTC ---
The problem is the wrong alignment for e->v in method.
The initial RTL has

(insn 17 16 18 4 (set (mem/s/j:V2DI (plus:DI (reg/v/f:DI 66 [ e ]) 
                (const_int 40 [0x28])) [0 MEM[(struct Engine *)e_1(D) +
40B].m+0 S16 A128])
        (reg:V2DI 68)) x.c:36 -1
     (nil))

A128 is wrong.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] -ftree-isa ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (2 preceding siblings ...)
  2011-09-26 17:46 ` hjl.tools at gmail dot com
@ 2011-09-26 17:56 ` hjl.tools at gmail dot com
  2011-09-26 20:06 ` hjl.tools at gmail dot com
                   ` (17 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: hjl.tools at gmail dot com @ 2011-09-26 17:56 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jamborm at gcc dot gnu.org
            Summary|unaligned movdqa            |-ftree-isa ignores
                   |instruction after inlining  |alignment

--- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> 2011-09-26 17:39:41 UTC ---
-ftree-isa ignores alignment and generates:


  SR.11_17 = SR.10_18;
  MEM[(struct Engine *)e_1(D) + 40B].m = SR.11_17;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

This is wrong.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] -ftree-isa ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (3 preceding siblings ...)
  2011-09-26 17:56 ` [Bug c/50444] -ftree-isa ignores alignment hjl.tools at gmail dot com
@ 2011-09-26 20:06 ` hjl.tools at gmail dot com
  2011-09-27  8:59 ` rguenther at suse dot de
                   ` (16 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: hjl.tools at gmail dot com @ 2011-09-26 20:06 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

H.J. Lu <hjl.tools at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Version|unknown                     |4.6.2
   Target Milestone|---                         |4.6.2

--- Comment #4 from H.J. Lu <hjl.tools at gmail dot com> 2011-09-26 20:01:44 UTC ---
It is caused by revision 164515:

http://gcc.gnu.org/ml/gcc-cvs/2010-09/msg00812.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug c/50444] -ftree-isa ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (4 preceding siblings ...)
  2011-09-26 20:06 ` hjl.tools at gmail dot com
@ 2011-09-27  8:59 ` rguenther at suse dot de
  2011-10-24  7:55 ` [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra " rguenth at gcc dot gnu.org
                   ` (15 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenther at suse dot de @ 2011-09-27  8:59 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #5 from rguenther at suse dot de <rguenther at suse dot de> 2011-09-27 08:52:06 UTC ---
On Mon, 26 Sep 2011, hjl.tools at gmail dot com wrote:

> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444
> 
> H.J. Lu <hjl.tools at gmail dot com> changed:
> 
>            What    |Removed                     |Added
> ----------------------------------------------------------------------------
>                  CC|                            |jamborm at gcc dot gnu.org
>             Summary|unaligned movdqa            |-ftree-isa ignores
>                    |instruction after inlining  |alignment
> 
> --- Comment #3 from H.J. Lu <hjl.tools at gmail dot com> 2011-09-26 17:39:41 UTC ---
> -ftree-isa ignores alignment and generates:
> 
> 
>   SR.11_17 = SR.10_18;
>   MEM[(struct Engine *)e_1(D) + 40B].m = SR.11_17;
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> This is wrong.

Ah, so we now have the SSE testcase ;)


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (5 preceding siblings ...)
  2011-09-27  8:59 ` rguenther at suse dot de
@ 2011-10-24  7:55 ` rguenth at gcc dot gnu.org
  2011-10-26 17:38 ` jakub at gcc dot gnu.org
                   ` (14 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2011-10-24  7:55 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Richard Guenther <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |wrong-code
          Component|c                           |tree-optimization
      Known to work|                            |4.5.3
            Summary|-ftree-isa ignores          |[4.6/4.7 Regression]
                   |alignment                   |-ftree-sra ignores
                   |                            |alignment
      Known to fail|                            |4.6.0

--- Comment #6 from Richard Guenther <rguenth at gcc dot gnu.org> 2011-10-24 07:54:36 UTC ---
Trunk seems to "work", but surely the issue is latent there.  Regression from
4.5.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (6 preceding siblings ...)
  2011-10-24  7:55 ` [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra " rguenth at gcc dot gnu.org
@ 2011-10-26 17:38 ` jakub at gcc dot gnu.org
  2011-10-27 10:19 ` rguenth at gcc dot gnu.org
                   ` (13 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: jakub at gcc dot gnu.org @ 2011-10-26 17:38 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|4.6.2                       |4.6.3

--- Comment #7 from Jakub Jelinek <jakub at gcc dot gnu.org> 2011-10-26 17:13:39 UTC ---
GCC 4.6.2 is being released.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (7 preceding siblings ...)
  2011-10-26 17:38 ` jakub at gcc dot gnu.org
@ 2011-10-27 10:19 ` rguenth at gcc dot gnu.org
  2011-12-16 19:59 ` jamborm at gcc dot gnu.org
                   ` (12 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2011-10-27 10:19 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Richard Guenther <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P3                          |P1

--- Comment #8 from Richard Guenther <rguenth at gcc dot gnu.org> 2011-10-27 10:17:30 UTC ---
Let's do something about this.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (8 preceding siblings ...)
  2011-10-27 10:19 ` rguenth at gcc dot gnu.org
@ 2011-12-16 19:59 ` jamborm at gcc dot gnu.org
  2011-12-17 15:41 ` ebotcazou at gcc dot gnu.org
                   ` (11 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: jamborm at gcc dot gnu.org @ 2011-12-16 19:59 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #9 from Martin Jambor <jamborm at gcc dot gnu.org> 2011-12-16 19:35:07 UTC ---
I have just discovered this has not been fixed by the patch to dela with PR
50569. I'm moving this to the top of my todo list now.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (9 preceding siblings ...)
  2011-12-16 19:59 ` jamborm at gcc dot gnu.org
@ 2011-12-17 15:41 ` ebotcazou at gcc dot gnu.org
  2012-01-12 13:47 ` jamborm at gcc dot gnu.org
                   ` (10 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: ebotcazou at gcc dot gnu.org @ 2011-12-17 15:41 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Eric Botcazou <ebotcazou at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |ebotcazou at gcc dot
                   |                            |gnu.org

--- Comment #10 from Eric Botcazou <ebotcazou at gcc dot gnu.org> 2011-12-17 14:54:32 UTC ---
> I have just discovered this has not been fixed by the patch to dela with PR
> 50569. I'm moving this to the top of my todo list now.

This is expected, PR50569 is a different (sub-)problem.  The similar problem is
rather PR51315 (whose fix was restricted to strict-alignment platforms).


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (10 preceding siblings ...)
  2011-12-17 15:41 ` ebotcazou at gcc dot gnu.org
@ 2012-01-12 13:47 ` jamborm at gcc dot gnu.org
  2012-01-16 15:43 ` rguenth at gcc dot gnu.org
                   ` (9 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: jamborm at gcc dot gnu.org @ 2012-01-12 13:47 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #11 from Martin Jambor <jamborm at gcc dot gnu.org> 2012-01-12 13:47:04 UTC ---
I think that SRA's part of the fix is what I have just posted to the mailing
list:
http://gcc.gnu.org/ml/gcc-patches/2012-01/msg00613.html


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (11 preceding siblings ...)
  2012-01-12 13:47 ` jamborm at gcc dot gnu.org
@ 2012-01-16 15:43 ` rguenth at gcc dot gnu.org
  2012-01-16 15:58 ` rguenth at gcc dot gnu.org
                   ` (8 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-16 15:43 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #12 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-16 15:25:06 UTC ---
Testcase w/o includes that fails with 4.6 and 4.7:

typedef long long __m128i __attribute__ ((__vector_size__ (16),
__may_alias__));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
typedef unsigned int uint32_t;

typedef struct {
    uint32_t v[4];
} a4x32;

a4x32* incr(a4x32* x)
{
  x->v[0] += 1;
  return x;
}

typedef struct {
    __m128i m;
} a1xm128i;

static inline  a1xm128i ssefunc( a1xm128i in,  a1xm128i k)
{
  a1xm128i ret;
  ret.m = (__m128i)__builtin_ia32_pxor128 ((__v2di)in.m, (__v2di)k.m);
  return ret;
}

static  a4x32  caster( a4x32 c4x32,  a1xm128i k)
{
  a1xm128i c1x128;
  if( sizeof(c4x32) != sizeof(c1x128) ) __builtin_abort();
  __builtin_memcpy(&c1x128, &c4x32, sizeof(c1x128));
  c1x128 = ssefunc(c1x128, k);
  __builtin_memcpy(&c4x32, &c1x128, sizeof(c4x32));
  return c4x32;
}

typedef struct  {
    a1xm128i key;
    a4x32 c;
    __SIZE_TYPE__ elem;
    a4x32 v;
} Engine;

void ctor(Engine *e)
{
  e->elem = 0;
  e->key.m = (__m128i)(__v4si){ 0, 0, 0, 0 };
  e->c.v[0] = 0;
  e->c.v[1] = 0;
  e->c.v[2] = 0;
  e->c.v[3] = 0;
}

uint32_t method( Engine *e)
{
  if( e->elem == 0 )
    {
      e->v = caster(*incr(&e->c), e->key);
      e->elem = 4;
    }
  return e->v.v[--e->elem];
}

int main()
{
  Engine e4; ctor(&e4);
  Engine e5; ctor(&e5);
  if(method(&e4)!=method(&e5))
    __builtin_abort ();
  return 0;
}

and the problematic SRA is indeed happening during ESRA in caster () which
looks like (before SRA):

<bb 2>:
  MEM[(char * {ref-all})&c1x128] = MEM[(char * {ref-all})&c4x32];
  in = c1x128;
  k = k;
  D.1785_7 = k.m;
  D.1784_8 = in.m;
  D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
  D.1782.m = D.1783_9;
  D.1780 = D.1782;
  c1x128 = D.1780;
  MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];
  D.1760 = c4x32;
  c1x128 ={v} {CLOBBER};
  return D.1760;

and after SRA:

<bb 2>:
  c4x32$m_4 = MEM[(struct  *)&c4x32].m;
  c1x128$m_14 = c4x32$m_4;
  in$m_13 = c1x128$m_14;
  k$m_12 = MEM[(struct  *)&k].m;
  D.1785_7 = k$m_12;
  D.1784_8 = in$m_13;
  D.1783_9 = __builtin_ia32_pxor128 (D.1784_8, D.1785_7);
  SR.6_11 = D.1783_9;
  SR.7_10 = SR.6_11;
  c1x128$m_2 = SR.7_10;
  c4x32$m_15 = c1x128$m_2;
  MEM[(struct  *)&D.1760].m = c4x32$m_15;
  c1x128$m_16 = { 0, 0 };
  return D.1760;

notice that D.1760 is of type a4x32 and thus has the alignment of an
integer.  But SRA constructs in-place the object of type c1x128.
SRA analysis should have seen the alignment breaking copy

  MEM[(char * {ref-all})&c4x32] = MEM[(char * {ref-all})&c1x128];

which uses a properly aligned type for the store.  Similarly the
prevailing store

  D.1760 = c4x32;

has the alignment of D.1760.

D.1760 already has a bogus type in lacc->type.  We can easily avoid
translating across aggregate copies that would transfer bogusly aligned types
to an access via

Index: tree-sra.c
===================================================================
--- tree-sra.c  (revision 183205)
+++ tree-sra.c  (working copy)
@@ -2290,7 +2290,9 @@ propagate_subaccesses_across_link (struc

   if (is_gimple_reg_type (racc->type))
     {
-      if (!lacc->first_child && !racc->first_child)
+      if (!lacc->first_child && !racc->first_child
+         && (get_object_alignment (lacc->expr)
+             >= get_object_alignment (racc->expr)))
        {
          tree t = lacc->base;

or make sure to transfer the alignment to a constructed bare(!) MEM_REF
from lacc->expr before overwriting that (assuming it retains the original
form up until here).


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (12 preceding siblings ...)
  2012-01-16 15:43 ` rguenth at gcc dot gnu.org
@ 2012-01-16 15:58 ` rguenth at gcc dot gnu.org
  2012-01-18 11:57 ` jamborm at gcc dot gnu.org
                   ` (7 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-16 15:58 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #13 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-16 15:41:50 UTC ---
Or, transfering the alignment:

Index: tree-sra.c
===================================================================
--- tree-sra.c  (revision 183205)
+++ tree-sra.c  (working copy)
@@ -2294,15 +2294,17 @@ propagate_subaccesses_across_link (struc
        {
          tree t = lacc->base;

-         lacc->type = racc->type;
-         if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t),
-                                                 lacc->offset, racc->type))
+         if (lacc->type == racc->type
+             && build_user_friendly_ref_for_offset (&t, TREE_TYPE (t),
+                                                    lacc->offset, lacc->type))
            lacc->expr = t;
          else
            {
-             lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base),
-                                               lacc->base, lacc->offset,
-                                               racc, NULL, false);
+             lacc->type = build_aligned_type (racc->type,
+                                              get_object_alignment
(lacc->expr));
+             lacc->expr = build_ref_for_offset (EXPR_LOCATION (lacc->base),
+                                                lacc->base, lacc->offset,
+                                                lacc->type, NULL, false);
              lacc->grp_no_warning = true;
            }
        }

note the build_user_friendly_ref_for_offset which is really bogus as it
would construct component-refs out of thin air.  The above probably
completely disables that path ...

get_object_alignment is a little conservative - we can and need to
do better by mimicking what expand does.  Note that we can't use
build_ref_for_model here.

With the above the testcase passes as well and we still scalarize:

  SR.7_10 = SR.6_11;
  c1x128$m_2 = SR.7_10;
  c4x32_15 = c1x128$m_2;
  MEM[(struct  *)&D.1760] = c4x32_15;


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (13 preceding siblings ...)
  2012-01-16 15:58 ` rguenth at gcc dot gnu.org
@ 2012-01-18 11:57 ` jamborm at gcc dot gnu.org
  2012-01-20 14:27 ` rguenth at gcc dot gnu.org
                   ` (6 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: jamborm at gcc dot gnu.org @ 2012-01-18 11:57 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #14 from Martin Jambor <jamborm at gcc dot gnu.org> 2012-01-18 11:23:00 UTC ---
Created attachment 26362
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=26362
patch-in-progress

I talked to richi on IRC yesterday and we agreed that because we rely
on build_ref_for_model to create the whole chain of COMPONENT_REFs on
strict alignment platforms, that we would disable some scalarization
in cases like this, after all.

However, in addition to what richi suggested in comment #12, I found
out that that was not enough to make the test pass on i686.  Also,
propagation across assignments is not the only way how these may be
created for the aggregate on the LHS, they might just be there because
of pre-existing accesses elsewhere in the function.

So I looked at the caster dumps again and found out that indeed the
very first assignment 

  MEM[(char * {ref-all})&c1x128] = MEM[(char * {ref-all})&c4x32];

had the LHS replaced by a SRA vector replacement and the RHS by a
reference created by build_ref_for_model with the same problems.  This
lead to a similar situation but with switched sides.

So I looked at where these build_ref_for_model an came up with the
following patch which is fairly straightforward and makes the testcase
pass also on i686.  However, there are two problems with the patch.

1. This is the easy part.  I'm afraid similar problems are lurking in
processing aggregate copies with children accesses.  But those could
be dealt with afterwards.

2. When I bootstrapped gcc and run the testsuite with this patch, I
got a new failure in libstdc++:

  23_containers/forward_list/modifiers/1.cc

The failure persists with -fno-tree-sra, so something went wrong when
compiling the library.  The patch is very simple and only disabling
stuff which then has to through the VIEW_CONVERT_EXPR path so at the
moment I think that either the condition guarding the following

  force_gimple_rhs = true;

is wrong or I have uncovered some other bug :-(


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (14 preceding siblings ...)
  2012-01-18 11:57 ` jamborm at gcc dot gnu.org
@ 2012-01-20 14:27 ` rguenth at gcc dot gnu.org
  2012-01-20 15:54 ` rguenth at gcc dot gnu.org
                   ` (5 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-20 14:27 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Richard Guenther <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |ASSIGNED
         AssignedTo|unassigned at gcc dot       |rguenth at gcc dot gnu.org
                   |gnu.org                     |

--- Comment #15 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-20 14:16:25 UTC ---
Created attachment 26395
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=26395
other candidate patch

I'm testing the following patch instead, which avoids changing access types
for all-scalar across-link propagations (we're going to create proper V_C_Es
later).  I also remove the fancy code that tries to avoid adding V_C_Es,
it looks it will cause more trouble than missed-optimizations.

That way we completely avoid needing to care for alignment at that particular
places.  Whether the aggregate copy across-link propagation is affected in
a similar way remains to be seen.

I'll see if I run into the same issue as you and investigate that.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (15 preceding siblings ...)
  2012-01-20 14:27 ` rguenth at gcc dot gnu.org
@ 2012-01-20 15:54 ` rguenth at gcc dot gnu.org
  2012-01-27 15:12 ` rguenth at gcc dot gnu.org
                   ` (4 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-20 15:54 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #16 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-20 15:46:40 UTC ---
(In reply to comment #15)
> Created attachment 26395 [details]
> other candidate patch
> 
> I'm testing the following patch instead, which avoids changing access types
> for all-scalar across-link propagations (we're going to create proper V_C_Es
> later).  I also remove the fancy code that tries to avoid adding V_C_Es,
> it looks it will cause more trouble than missed-optimizations.
> 
> That way we completely avoid needing to care for alignment at that particular
> places.  Whether the aggregate copy across-link propagation is affected in
> a similar way remains to be seen.
> 
> I'll see if I run into the same issue as you and investigate that.

gcc.dg/torture/pr47228.c shows that we rely on the build-ref-for-model
path in sra_modify_assign as we scalarize

struct S4
{
  unsigned f0:24;
} __attribute__((__packed__));

to unsigned int : 24, which is of different size, so we refuse to
VIEW_CONVERT the SImode register to BLKmode.  I'm not entirely sure
what's the best cause of action here, but certainly either detecting
the size-mismatch issue at analysis phase or papering over the issue
with build-ref-for-model (which might not always suceed?!).

Other FAILs this patch causes are

Running target unix/
FAIL: gcc.dg/tree-ssa/pr45144.c scan-tree-dump optimized " =
VIEW_CONVERT_EXPR<u
nsigned int>\\(a\\);"

Running target unix/-m32
FAIL: gcc.dg/torture/pr45678-2.c  -Os  execution test
FAIL: gcc.dg/tree-ssa/pr45144.c scan-tree-dump optimized " =
VIEW_CONVERT_EXPR<u
nsigned int>\\(a\\);"

Running target unix/
FAIL: 20_util/hash/chi2_quality.cc execution test
FAIL: 23_containers/forward_list/capacity/resize_size.cc execution test
FAIL: 23_containers/forward_list/modifiers/2.cc execution test
FAIL: 23_containers/list/operations/3.cc execution test
FAIL: 23_containers/list/operations/3_c++0x.cc execution test
FAIL: ext/pb_ds/example/basic_map.cc execution test
FAIL: ext/pb_ds/example/basic_multiset.cc execution test
FAIL: ext/pb_ds/example/basic_set.cc execution test
FAIL: ext/pb_ds/example/erase_if.cc execution test
FAIL: ext/pb_ds/example/tree_intervals.cc execution test
FAIL: ext/pb_ds/example/tree_join.cc execution test
FAIL: ext/pb_ds/example/tree_order_statistics.cc execution test
FAIL: ext/pb_ds/regression/associative_containers.cc execution test
FAIL: ext/pb_ds/regression/tree_map_rand.cc execution test
FAIL: ext/pb_ds/regression/tree_set_rand.cc execution test

Running target unix//-m32
FAIL: 23_containers/list/operations/3_c++0x.cc execution test
FAIL: 25_algorithms/nth_element/2.cc execution test

I'm going to test the two parts of the patch separately now.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (16 preceding siblings ...)
  2012-01-20 15:54 ` rguenth at gcc dot gnu.org
@ 2012-01-27 15:12 ` rguenth at gcc dot gnu.org
  2012-01-27 15:23 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-27 15:12 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #17 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-27 14:54:44 UTC ---
Author: rguenth
Date: Fri Jan 27 14:54:37 2012
New Revision: 183629

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=183629
Log:
2012-01-27  Richard Guenther  <rguenther@suse.de>

    PR tree-optimization/50444
    * expr.c (mem_ref_refers_to_non_mem_p): New function.
    (expand_assignment): Use it.  Properly handle misaligned
    bases when expanding stores to component references.
    (expand_expr_real_1): Use mem_ref_refers_to_non_mem_p and
    refactor that case.

Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/expr.c


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (17 preceding siblings ...)
  2012-01-27 15:12 ` rguenth at gcc dot gnu.org
@ 2012-01-27 15:23 ` rguenth at gcc dot gnu.org
  2012-01-27 15:29 ` [Bug tree-optimization/50444] [4.6 " rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-27 15:23 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

--- Comment #18 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-27 14:57:04 UTC ---
Author: rguenth
Date: Fri Jan 27 14:56:54 2012
New Revision: 183630

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=183630
Log:
2012-01-27  Richard Guenther  <rguenther@suse.de>

    PR tree-optimization/50444
    * tree-sra.c (build_ref_for_offset): Properly adjust the
    MEM_REF type for unaligned accesses.

    * gcc.dg/torture/pr50444.c: New testcase.

Added:
    trunk/gcc/testsuite/gcc.dg/torture/pr50444.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/testsuite/ChangeLog
    trunk/gcc/tree-sra.c


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (18 preceding siblings ...)
  2012-01-27 15:23 ` rguenth at gcc dot gnu.org
@ 2012-01-27 15:29 ` rguenth at gcc dot gnu.org
  2012-03-01 15:21 ` jakub at gcc dot gnu.org
  2013-04-12 16:17 ` jakub at gcc dot gnu.org
  21 siblings, 0 replies; 23+ messages in thread
From: rguenth at gcc dot gnu.org @ 2012-01-27 15:29 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Richard Guenther <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P1                          |P2
      Known to work|                            |4.7.0
            Summary|[4.6/4.7 Regression]        |[4.6 Regression] -ftree-sra
                   |-ftree-sra ignores          |ignores alignment
                   |alignment                   |

--- Comment #19 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-01-27 15:00:49 UTC ---
Fixed on trunk.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (19 preceding siblings ...)
  2012-01-27 15:29 ` [Bug tree-optimization/50444] [4.6 " rguenth at gcc dot gnu.org
@ 2012-03-01 15:21 ` jakub at gcc dot gnu.org
  2013-04-12 16:17 ` jakub at gcc dot gnu.org
  21 siblings, 0 replies; 23+ messages in thread
From: jakub at gcc dot gnu.org @ 2012-03-01 15:21 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|4.6.3                       |4.6.4

--- Comment #20 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-03-01 14:38:33 UTC ---
GCC 4.6.3 is being released.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [Bug tree-optimization/50444] [4.6 Regression] -ftree-sra ignores alignment
  2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
                   ` (20 preceding siblings ...)
  2012-03-01 15:21 ` jakub at gcc dot gnu.org
@ 2013-04-12 16:17 ` jakub at gcc dot gnu.org
  21 siblings, 0 replies; 23+ messages in thread
From: jakub at gcc dot gnu.org @ 2013-04-12 16:17 UTC (permalink / raw)
  To: gcc-bugs


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |RESOLVED
         Resolution|                            |FIXED
   Target Milestone|4.6.4                       |4.7.0

--- Comment #21 from Jakub Jelinek <jakub at gcc dot gnu.org> 2013-04-12 16:17:41 UTC ---
The 4.6 branch has been closed, fixed in GCC 4.7.0.


^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2013-04-12 16:17 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-17 17:12 [Bug c/50444] New: unaligned movdqa instruction after inlining john.salmon at deshaw dot com
2011-09-25 15:45 ` [Bug c/50444] " john.salmon at deshaw dot com
2011-09-25 18:06 ` rguenth at gcc dot gnu.org
2011-09-26 17:46 ` hjl.tools at gmail dot com
2011-09-26 17:56 ` [Bug c/50444] -ftree-isa ignores alignment hjl.tools at gmail dot com
2011-09-26 20:06 ` hjl.tools at gmail dot com
2011-09-27  8:59 ` rguenther at suse dot de
2011-10-24  7:55 ` [Bug tree-optimization/50444] [4.6/4.7 Regression] -ftree-sra " rguenth at gcc dot gnu.org
2011-10-26 17:38 ` jakub at gcc dot gnu.org
2011-10-27 10:19 ` rguenth at gcc dot gnu.org
2011-12-16 19:59 ` jamborm at gcc dot gnu.org
2011-12-17 15:41 ` ebotcazou at gcc dot gnu.org
2012-01-12 13:47 ` jamborm at gcc dot gnu.org
2012-01-16 15:43 ` rguenth at gcc dot gnu.org
2012-01-16 15:58 ` rguenth at gcc dot gnu.org
2012-01-18 11:57 ` jamborm at gcc dot gnu.org
2012-01-20 14:27 ` rguenth at gcc dot gnu.org
2012-01-20 15:54 ` rguenth at gcc dot gnu.org
2012-01-27 15:12 ` rguenth at gcc dot gnu.org
2012-01-27 15:23 ` rguenth at gcc dot gnu.org
2012-01-27 15:29 ` [Bug tree-optimization/50444] [4.6 " rguenth at gcc dot gnu.org
2012-03-01 15:21 ` jakub at gcc dot gnu.org
2013-04-12 16:17 ` jakub at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).