public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop
@ 2011-05-27 22:19 wouter.vermaelen at scarlet dot be
  2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: wouter.vermaelen at scarlet dot be @ 2011-05-27 22:19 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203

           Summary: missed-optimization: useless expressions not moved out
                    of loop
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: wouter.vermaelen@scarlet.be


Hi all,

Below is (a simplified version of) some real code I recently
encountered. The stores to the 'output' array are written in the inner
loop, but the intention was probably to have them in the outer loop.

Gcc is able to 'correct' this programming mistake, but only partly:
the stores itself are moved to the outer loop, but the instructions that
calculate those values remain in the inner loop.

For this particular example, the best solution is of course to fix the
C code. But maybe this missed-optimization can also occur in other,
more valid, contexts.

Below I've included the generated x86_64 code for this example by
recent versions of both gcc and llvm.

///////////////////////////////////////////////////////////////////

unsigned char input[100];
unsigned char output[100];

void f() {
    for (int i = 0; i < 32; i += 4) {
        unsigned tmp = 0;
        for (int j = 0; j < 16; ++j) {
            tmp = (tmp << 2) | (input[i + j] & 0x03);
            output[i + 0] = (tmp >> 24) & 0xFF;
            output[i + 1] = (tmp >> 16) & 0xFF;
            output[i + 2] = (tmp >>  8) & 0xFF;
            output[i + 3] = (tmp >>  0) & 0xFF;
        }
    }
}

///////////////////////////////////////////////////////////////////

g++ (GCC) 4.7.0 20110527 (experimental)
g++ -O2 -S

        movl    $output, %r10d
        movq    %r10, %r9
        .p2align 4,,10
        .p2align 3
.L2:
        movl    %r9d, %esi
        xorl    %edx, %edx
        xorl    %eax, %eax
        subl    %r10d, %esi
        .p2align 4,,10
        .p2align 3
.L3:
        leal    0(,%rax,4), %ecx
        leal    (%rdx,%rsi), %eax
        addl    $1, %edx
        cltq
        movzbl  input(%rax), %eax
        andl    $3, %eax
        orl     %ecx, %eax
        movl    %eax, %r8d
        movl    %eax, %edi
        movl    %eax, %ecx
        shrl    $24, %r8d
        shrl    $16, %edi
        shrl    $8, %ecx
        cmpl    $16, %edx
        jne     .L3
        movb    %r8b, (%r9)
        movb    %dil, 1(%r9)
        movb    %cl, 2(%r9)
        movb    %al, 3(%r9)
        addq    $4, %r9
        cmpq    $output+32, %r9
        jne     .L2
        rep
        ret

///////////////////////////////////////////////////////////////////

clang version 3.0 (http://llvm.org/git/clang.git
855f41963e545172a935d07b4713d079e258a207)
clang++ -O2 -S

# BB#0:                                 # %entry
        xorl    %eax, %eax
        .align  16, 0x90
.LBB0_1:                                # %for.cond4.preheader
                                        # =>This Loop Header: Depth=1
                                        #     Child Loop BB0_2 Depth 2
        xorl    %esi, %esi
        movq    $-16, %rdx
        .align  16, 0x90
.LBB0_2:                                # %for.body7
                                        #   Parent Loop BB0_1 Depth=1
                                        # =>  This Inner Loop Header: Depth=2
        movl    %esi, %ecx
        movzbl  input+16(%rdx,%rax), %edi
        andl    $3, %edi
        leal    (,%rcx,4), %esi
        orl     %edi, %esi
        incq    %rdx
        jne     .LBB0_2
# BB#3:                                 # %for.inc44
                                        #   in Loop: Header=BB0_1 Depth=1
        movb    %sil, output+3(%rax)
        movl    %ecx, %edx
        shrl    $6, %edx
        movb    %dl, output+2(%rax)
        movl    %ecx, %edx
        shrl    $14, %edx
        movb    %dl, output+1(%rax)
        shrl    $22, %ecx
        movb    %cl, output(%rax)
        addq    $4, %rax
        cmpq    $32, %rax
        jne     .LBB0_1
# BB#4:                                 # %for.end47
        ret


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
  2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
@ 2011-05-27 23:53 ` pinskia at gcc dot gnu.org
  2011-05-28 12:59 ` rguenth at gcc dot gnu.org
  2021-06-08 21:29 ` pinskia at gcc dot gnu.org
  2 siblings, 0 replies; 4+ messages in thread
From: pinskia at gcc dot gnu.org @ 2011-05-27 23:53 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Keywords|                            |missed-optimization
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2011.05.27 22:31:13
     Ever Confirmed|0                           |1

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> 2011-05-27 22:31:13 UTC ---
Looks like LIM is only able to hoist invariants rather than push down
invariants.  It can push down invariant stores though.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
  2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
  2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
@ 2011-05-28 12:59 ` rguenth at gcc dot gnu.org
  2021-06-08 21:29 ` pinskia at gcc dot gnu.org
  2 siblings, 0 replies; 4+ messages in thread
From: rguenth at gcc dot gnu.org @ 2011-05-28 12:59 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203

--- Comment #2 from Richard Guenther <rguenth at gcc dot gnu.org> 2011-05-28 12:57:19 UTC ---
(In reply to comment #1)
> Looks like LIM is only able to hoist invariants rather than push down
> invariants.  It can push down invariant stores though.

Indeed.  code-sinking (in tree-ssa-sink.c) would be able to fix this
but it is run before loop store motion.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Bug tree-optimization/49203] missed-optimization: useless expressions not moved out of loop
  2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
  2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
  2011-05-28 12:59 ` rguenth at gcc dot gnu.org
@ 2021-06-08 21:29 ` pinskia at gcc dot gnu.org
  2 siblings, 0 replies; 4+ messages in thread
From: pinskia at gcc dot gnu.org @ 2021-06-08 21:29 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49203

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|---                         |FIXED
             Status|NEW                         |RESOLVED
      Known to work|                            |7.0
   Target Milestone|---                         |7.0
      Known to fail|                            |4.8.5

--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Fixed in at least in GCC 7.0:

.L2:
        leaq    16(%r8), %rsi
        movq    %r8, %rdx
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L3:
        movzbl  (%rdx), %ecx
        sall    $2, %eax
        addq    $1, %rdx
        andl    $3, %ecx
        orl     %ecx, %eax
        cmpq    %rsi, %rdx
        jne     .L3
        movl    %eax, %edx
        addq    $4, %r8
        movb    %ah, 2(%rdi)
        shrl    $24, %edx
        movb    %al, 3(%rdi)
        addq    $4, %rdi
        movb    %dl, -4(%rdi)
        movl    %eax, %edx
        shrl    $16, %edx
        movb    %dl, -3(%rdi)
        cmpq    %r8, %r9
        jne     .L2



  <bb 3> [94.12%]:
  # tmp_37 = PHI <tmp_23(3), 0(5)>
  # ivtmp.17_17 = PHI <ivtmp.17_15(3), ivtmp.28_79(5)>
  _1 = tmp_37 << 2;
  _87 = (void *) ivtmp.17_17;
  _3 = MEM[base: _87, offset: 0B];
  _20 = _3 & 3;
  _4 = (unsigned int) _20;
  tmp_23 = _1 | _4;
  ivtmp.17_15 = ivtmp.17_17 + 1;
  if (ivtmp.17_15 != _83)
    goto <bb 3>; [93.75%]
  else
    goto <bb 4>; [6.25%]

  <bb 4> [5.88%]:
  _5 = tmp_23 >> 24;
  _6 = (unsigned char) _5;
  _76 = (void *) ivtmp.27_82;
  MEM[base: _76, offset: 0B] = _6;
  _7 = tmp_23 >> 16;
  _9 = (unsigned char) _7;
  MEM[base: _76, offset: 1B] = _9;
  _10 = tmp_23 >> 8;
  _12 = (unsigned char) _10;
  MEM[base: _76, offset: 2B] = _12;
  _14 = (unsigned char) tmp_23;
  MEM[base: _76, offset: 3B] = _14;
  ivtmp.27_81 = ivtmp.27_82 + 4;
  ivtmp.28_78 = ivtmp.28_79 + 4;
  if (_71 != ivtmp.28_78)
    goto <bb 5>; [87.51%]
  else
    goto <bb 6>; [12.49%]

  <bb 5> [5.88%]:
  # ivtmp.27_82 = PHI <ivtmp.27_81(4), ivtmp.27_80(2)>
  # ivtmp.28_79 = PHI <ivtmp.28_78(4), ivtmp.28_77(2)>
  _83 = ivtmp.28_79 + 16;
  goto <bb 3>; [100.00%]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-06-08 21:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-27 22:19 [Bug tree-optimization/49203] New: missed-optimization: useless expressions not moved out of loop wouter.vermaelen at scarlet dot be
2011-05-27 23:53 ` [Bug tree-optimization/49203] " pinskia at gcc dot gnu.org
2011-05-28 12:59 ` rguenth at gcc dot gnu.org
2021-06-08 21:29 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).