public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "wouter.vermaelen at scarlet dot be" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop
Date: Fri, 27 May 2011 22:19:00 -0000	[thread overview]
Message-ID: <bug-49203-4@http.gcc.gnu.org/bugzilla/> (raw)

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203

           Summary: missed-optimization: useless expressions not moved out
                    of loop
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: wouter.vermaelen@scarlet.be


Hi all,

Below is (a simplified version of) some real code I recently
encountered. The stores to the 'output' array are written in the inner
loop, but the intention was probably to have them in the outer loop.

Gcc is able to 'correct' this programming mistake, but only partly:
the stores itself are moved to the outer loop, but the instructions that
calculate those values remain in the inner loop.

For this particular example, the best solution is of course to fix the
C code. But maybe this missed-optimization can also occur in other,
more valid, contexts.

Below I've included the generated x86_64 code for this example by
recent versions of both gcc and llvm.

///////////////////////////////////////////////////////////////////

unsigned char input[100];
unsigned char output[100];

void f() {
    for (int i = 0; i < 32; i += 4) {
        unsigned tmp = 0;
        for (int j = 0; j < 16; ++j) {
            tmp = (tmp << 2) | (input[i + j] & 0x03);
            output[i + 0] = (tmp >> 24) & 0xFF;
            output[i + 1] = (tmp >> 16) & 0xFF;
            output[i + 2] = (tmp >>  8) & 0xFF;
            output[i + 3] = (tmp >>  0) & 0xFF;
        }
    }
}

///////////////////////////////////////////////////////////////////

g++ (GCC) 4.7.0 20110527 (experimental)
g++ -O2 -S

        movl    $output, %r10d
        movq    %r10, %r9
        .p2align 4,,10
        .p2align 3
.L2:
        movl    %r9d, %esi
        xorl    %edx, %edx
        xorl    %eax, %eax
        subl    %r10d, %esi
        .p2align 4,,10
        .p2align 3
.L3:
        leal    0(,%rax,4), %ecx
        leal    (%rdx,%rsi), %eax
        addl    $1, %edx
        cltq
        movzbl  input(%rax), %eax
        andl    $3, %eax
        orl     %ecx, %eax
        movl    %eax, %r8d
        movl    %eax, %edi
        movl    %eax, %ecx
        shrl    $24, %r8d
        shrl    $16, %edi
        shrl    $8, %ecx
        cmpl    $16, %edx
        jne     .L3
        movb    %r8b, (%r9)
        movb    %dil, 1(%r9)
        movb    %cl, 2(%r9)
        movb    %al, 3(%r9)
        addq    $4, %r9
        cmpq    $output+32, %r9
        jne     .L2
        rep
        ret

///////////////////////////////////////////////////////////////////

clang version 3.0 (http://llvm.org/git/clang.git
855f41963e545172a935d07b4713d079e258a207)
clang++ -O2 -S

# BB#0:                                 # %entry
        xorl    %eax, %eax
        .align  16, 0x90
.LBB0_1:                                # %for.cond4.preheader
                                        # =>This Loop Header: Depth=1
                                        #     Child Loop BB0_2 Depth 2
        xorl    %esi, %esi
        movq    $-16, %rdx
        .align  16, 0x90
.LBB0_2:                                # %for.body7
                                        #   Parent Loop BB0_1 Depth=1
                                        # =>  This Inner Loop Header: Depth=2
        movl    %esi, %ecx
        movzbl  input+16(%rdx,%rax), %edi
        andl    $3, %edi
        leal    (,%rcx,4), %esi
        orl     %edi, %esi
        incq    %rdx
        jne     .LBB0_2
# BB#3:                                 # %for.inc44
                                        #   in Loop: Header=BB0_1 Depth=1
        movb    %sil, output+3(%rax)
        movl    %ecx, %edx
        shrl    $6, %edx
        movb    %dl, output+2(%rax)
        movl    %ecx, %edx
        shrl    $14, %edx
        movb    %dl, output+1(%rax)
        shrl    $22, %ecx
        movb    %cl, output(%rax)
        addq    $4, %rax
        cmpq    $32, %rax
        jne     .LBB0_1
# BB#4:                                 # %for.end47
        ret


             reply	other threads:[~2011-05-27 22:03 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-27 22:19 wouter.vermaelen at scarlet dot be [this message]
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
2011-05-28 12:59 ` rguenth at gcc dot gnu.org
2021-06-08 21:29 ` pinskia at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-49203-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).