public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/63537] New: Missed optimization: Loop unrolling adds extra copy when returning aggregate
@ 2014-10-14 19:02 tavianator at gmail dot com
  2014-10-15  8:38 ` [Bug tree-optimization/63537] [4.9/5 Regression] " rguenth at gcc dot gnu.org
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: tavianator at gmail dot com @ 2014-10-14 19:02 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63537

            Bug ID: 63537
           Summary: Missed optimization: Loop unrolling adds extra copy
                    when returning aggregate
           Product: gcc
           Version: 4.9.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tavianator at gmail dot com

Created attachment 33715
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=33715&action=edit
Reproducer

At -O2 and above on x86_64, this manually unrolled loop generates much better
code than the automatically unrolled one:

    struct vec {
        double n[3];
    };

    struct vec mul_unrolled(struct vec lhs, double rhs) {
        struct vec ret;
        ret.n[0] = lhs.n[0]*rhs;
        ret.n[1] = lhs.n[1]*rhs;
        ret.n[2] = lhs.n[2]*rhs;
        return ret;
    }

This generates the beautiful:

    movsd    16(%rsp), %xmm2
    movq    %rdi, %rax
    movsd    24(%rsp), %xmm1
    mulsd    %xmm0, %xmm2
    mulsd    %xmm0, %xmm1
    mulsd    8(%rsp), %xmm0
    movsd    %xmm2, 8(%rdi)
    movsd    %xmm1, 16(%rdi)
    movsd    %xmm0, (%rdi)
    ret

In contrast, at -O2 this:

    struct vec mul_loop(struct vec lhs, double rhs) {
        struct vec ret;
        for (int i = 0; i < 3; ++i) {
            ret.n[i] = lhs.n[i]*rhs;
        }
        return ret;
    }

generates this:

    movsd    8(%rsp), %xmm1
    movq    %rdi, %rax
    mulsd    %xmm0, %xmm1
    movsd    %xmm1, -40(%rsp)
    movq    -40(%rsp), %rdx
    movsd    16(%rsp), %xmm1
    mulsd    %xmm0, %xmm1
    movq    %rdx, (%rdi)
    mulsd    24(%rsp), %xmm0
    movsd    %xmm1, -32(%rsp)
    movq    -32(%rsp), %rdx
    movsd    %xmm0, -24(%rsp)
    movq    %rdx, 8(%rdi)
    movq    -24(%rsp), %rdx
    movq    %rdx, 16(%rdi)
    ret

which puts the result in -40(%rsp) and then copies it to (%rdi).  At -O3 it
gets vectorized but the extra copy is still there:

    movapd    %xmm0, %xmm1
    mulsd    24(%rsp), %xmm0
    movupd    8(%rsp), %xmm2
    movq    %rdi, %rax
    unpcklpd    %xmm1, %xmm1
    mulpd    %xmm1, %xmm2
    movsd    %xmm0, -24(%rsp)
    movaps    %xmm2, -40(%rsp)
    movq    -40(%rsp), %rdx
    movq    %rdx, (%rdi)
    movq    -32(%rsp), %rdx
    movq    %rdx, 8(%rdi)
    movq    -24(%rsp), %rdx
    movq    %rdx, 16(%rdi)


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-06-26 20:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-14 19:02 [Bug tree-optimization/63537] New: Missed optimization: Loop unrolling adds extra copy when returning aggregate tavianator at gmail dot com
2014-10-15  8:38 ` [Bug tree-optimization/63537] [4.9/5 Regression] " rguenth at gcc dot gnu.org
2014-10-15  8:39 ` [Bug tree-optimization/63537] [4.8/4.9/5 " rguenth at gcc dot gnu.org
2014-10-15 15:20 ` tavianator at gmail dot com
2014-11-24 13:36 ` rguenth at gcc dot gnu.org
2014-12-19 13:33 ` jakub at gcc dot gnu.org
2015-06-23  8:25 ` [Bug tree-optimization/63537] [4.8/4.9/5/6 " rguenth at gcc dot gnu.org
2015-06-26 20:01 ` [Bug tree-optimization/63537] [4.9/5/6 " jakub at gcc dot gnu.org
2015-06-26 20:31 ` jakub at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).