From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 29717 invoked by alias); 10 Nov 2012 02:04:16 -0000 Received: (qmail 26601 invoked by uid 48); 10 Nov 2012 02:04:00 -0000 From: "hjl.tools at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/55258] New: SSE register isn't used for 16byte copy Date: Sat, 10 Nov 2012 02:04:00 -0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: hjl.tools at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Changed-Fields: Message-ID: X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated Content-Type: text/plain; charset="UTF-8" MIME-Version: 1.0 Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org X-SW-Source: 2012-11/txt/msg00890.txt.bz2 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55258 Bug #: 55258 Summary: SSE register isn't used for 16byte copy Classification: Unclassified Product: gcc Version: 4.8.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target AssignedTo: unassigned@gcc.gnu.org ReportedBy: hjl.tools@gmail.com CC: ubizjak@gmail.com [hjl@gnu-tools-1 pr55247]$ cat x.i typedef unsigned int uint32_t; typedef uint32_t Elf32_Word; typedef uint32_t Elf32_Addr; typedef struct { Elf32_Word st_name; Elf32_Addr st_value; Elf32_Word st_size; unsigned char st_other; } Elf32_Sym; typedef struct { Elf32_Word r_info; } Elf32_Rela; typedef struct { union { Elf32_Addr d_ptr; } d_un; } Elf32_Dyn; struct link_map { Elf32_Dyn *l_info[34]; }; extern void symbind32 (Elf32_Sym *); void _dl_profile_fixup (struct link_map *l, Elf32_Word reloc_arg) { const Elf32_Sym *const symtab = (const void *) l->l_info[6]->d_un.d_ptr; const Elf32_Rela *const reloc = (const void *) (l->l_info[23]->d_un.d_ptr + reloc_arg * sizeof (Elf32_Rela)); Elf32_Sym sym = symtab[(reloc->r_info) >> 8]; symbind32 (&sym); } [hjl@gnu-tools-1 pr55247]$ /export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O -Wall -mx32 -maddress-mode=short -S x.i -o short.asm [hjl@gnu-tools-1 pr55247]$ cat short.asm .file "x.i" .text .globl _dl_profile_fixup .type _dl_profile_fixup, @function _dl_profile_fixup: .LFB0: .cfi_startproc subl $24, %esp .cfi_def_cfa_offset 32 movl 24(%edi), %edx movl 92(%edi), %eax movl (%eax), %eax movl (%eax,%esi,4), %eax shrl $8, %eax sall $4, %eax addl (%edx), %eax movq 8(%eax), %rdx movq (%eax), %rax movq %rax, (%esp) movq %rdx, 8(%esp) movl %esp, %edi call symbind32 addl $24, %esp .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE0: .size _dl_profile_fixup, .-_dl_profile_fixup .ident "GCC: (GNU) 4.8.0 20121110 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-tools-1 pr55247]$ /export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -O -Wall -mx32 -maddress-mode=long -S x.i -o long.asm [hjl@gnu-tools-1 pr55247]$ cat long.asm .file "x.i" .text .globl _dl_profile_fixup .type _dl_profile_fixup, @function _dl_profile_fixup: .LFB0: .cfi_startproc subq $40, %rsp .cfi_def_cfa_offset 48 movl 24(%rdi), %edx movl 92(%rdi), %eax movl (%rax), %eax movl (%eax,%esi,4), %eax shrl $8, %eax sall $4, %eax addl (%rdx), %eax movdqu (%eax), %xmm0 movdqa %xmm0, (%rsp) movq (%rsp), %rax movq 8(%rsp), %rdx movq %rax, 16(%rsp) movq %rdx, 24(%rsp) leaq 16(%rsp), %rdi call symbind32 addq $40, %rsp .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE0: .size _dl_profile_fixup, .-_dl_profile_fixup .ident "GCC: (GNU) 4.8.0 20121110 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-tools-1 pr55247]$ For TARGET_SSE_UNALIGNED_LOAD_OPTIMAL/TARGET_SSE_UNALIGNED_STORE_OPTIMAL, we should always generate movdqu (%eax), %xmm0 movdqa %xmm0, (%rsp)