[Bug rtl-optimization/115344] New: Missing loop counter reversal

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

From: "cmuellner at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug rtl-optimization/115344] New: Missing loop counter reversal
Date: Tue, 04 Jun 2024 12:47:10 +0000	[thread overview]
Message-ID: <bug-115344-4@http.gcc.gnu.org/bugzilla/> (raw)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115344

            Bug ID: 115344
           Summary: Missing loop counter reversal
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: cmuellner at gcc dot gnu.org
  Target Milestone: ---

Let's take a simple for-loop with an unknown bound:

void bar ();
void foo1 (int n) {
    for (int i = 0; i < n; i++) {
        bar ();
    }
}

We see that two variables are in the program,
but we could eliminate the loop variable `i` as follows:

void bar ();
void foo2 (int n) {
    while (n) {
        bar ();
        n--;
    }
}

Optimizing the loop as above has the following benefits:
- No need for a register for the loop variable `i`
- No need for an additional slot in the stack frame
- No need for instructions to save/restore the loop variable register in the
prologue/epilogue
- No need for an initialization instruction for the loop variable `i` (to zero)

LLVM does this transformation on (at least) x86-64,  RISC-V (rv64gc),
and AArch64 with -O3, but GCC does not.
Tests have been done with trunk and older GCC releases (I've tested down to GCC
4.4).

Related bug tickets:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=22041 (open - with uses of the
loop counter as an array index)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31238 (closed - fixed for GCC
4.5.0)
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40886 (closed - fixed for GCC
4.5.0)

GCC AArch64 -O3:
foo1:
        cmp     w0, 0
        ble     .L6
        stp     x29, x30, [sp, -32]!
        mov     x29, sp
        stp     x19, x20, [sp, 16]
        mov     w20, w0
        mov     w19, 0
.L3:
        add     w19, w19, 1
        bl      bar
        cmp     w20, w19
        bne     .L3
        ldp     x19, x20, [sp, 16]
        ldp     x29, x30, [sp], 32
        ret
.L6:
        ret
foo2:
        cbz     w0, .L18
        stp     x29, x30, [sp, -32]!
        mov     x29, sp
        str     x19, [sp, 16]
        mov     w19, w0
.L12:
        bl      bar
        subs    w19, w19, #1
        bne     .L12
        ldr     x19, [sp, 16]
        ldp     x29, x30, [sp], 32
        ret
.L18:
        ret

LLVM AArch64 -O3:
foo1:                                   // @foo1
        cmp     w0, #1
        b.lt    .LBB0_4
        stp     x29, x30, [sp, #-32]!           // 16-byte Folded Spill
        str     x19, [sp, #16]                  // 8-byte Folded Spill
        mov     x29, sp
        mov     w19, w0
.LBB0_2:                                // =>This Inner Loop Header: Depth=1
        bl      bar
        subs    w19, w19, #1
        b.ne    .LBB0_2
        ldr     x19, [sp, #16]                  // 8-byte Folded Reload
        ldp     x29, x30, [sp], #32             // 16-byte Folded Reload
.LBB0_4:
        ret
foo2:                                   // @foo2
        cbz     w0, .LBB1_4
        stp     x29, x30, [sp, #-32]!           // 16-byte Folded Spill
        str     x19, [sp, #16]                  // 8-byte Folded Spill
        mov     x29, sp
        mov     w19, w0
.LBB1_2:                                // =>This Inner Loop Header: Depth=1
        bl      bar
        subs    w19, w19, #1
        b.ne    .LBB1_2
        ldr     x19, [sp, #16]                  // 8-byte Folded Reload
        ldp     x29, x30, [sp], #32             // 16-byte Folded Reload
.LBB1_4:
        ret

GCC RISC-V -O3 -march=rv64gc:
foo1:
        ble     a0,zero,.L6
        addi    sp,sp,-32
        sd      s0,16(sp)
        sd      s1,8(sp)
        sd      ra,24(sp)
        mv      s1,a0
        li      s0,0
.L3:
        addiw   s0,s0,1
        call    bar
        bne     s1,s0,.L3
        ld      ra,24(sp)
        ld      s0,16(sp)
        ld      s1,8(sp)
        addi    sp,sp,32
        jr      ra
.L6:
        ret
foo2:
        beq     a0,zero,.L18
        addi    sp,sp,-16
        sd      s0,0(sp)
        sd      ra,8(sp)
        mv      s0,a0
.L12:
        addiw   s0,s0,-1
        call    bar
        bne     s0,zero,.L12
        ld      ra,8(sp)
        ld      s0,0(sp)
        addi    sp,sp,16
        jr      ra
.L18:
        ret

LLVM RISC-V -O3 -march=rv64gc
foo1:                                   # @foo1
        blez    a0, .LBB0_4
        addi    sp, sp, -16
        sd      ra, 8(sp)                       # 8-byte Folded Spill
        sd      s0, 0(sp)                       # 8-byte Folded Spill
        mv      s0, a0
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        call    bar
        addiw   s0, s0, -1
        bnez    s0, .LBB0_2
        ld      ra, 8(sp)                       # 8-byte Folded Reload
        ld      s0, 0(sp)                       # 8-byte Folded Reload
        addi    sp, sp, 16
.LBB0_4:
        ret
foo2:                                   # @foo2
        beqz    a0, .LBB1_4
        addi    sp, sp, -16
        sd      ra, 8(sp)                       # 8-byte Folded Spill
        sd      s0, 0(sp)                       # 8-byte Folded Spill
        mv      s0, a0
.LBB1_2:                                # =>This Inner Loop Header: Depth=1
        call    bar
        addiw   s0, s0, -1
        bnez    s0, .LBB1_2
        ld      ra, 8(sp)                       # 8-byte Folded Reload
        ld      s0, 0(sp)                       # 8-byte Folded Reload
        addi    sp, sp, 16
.LBB1_4:
        ret

GCC x86-64 -O3:
foo1:
        testl   %edi, %edi
        jle     .L6
        pushq   %rbp
        movl    %edi, %ebp
        pushq   %rbx
        xorl    %ebx, %ebx
        subq    $8, %rsp
.L3:
        xorl    %eax, %eax
        addl    $1, %ebx
        call    bar
        cmpl    %ebx, %ebp
        jne     .L3
        addq    $8, %rsp
        popq    %rbx
        popq    %rbp
        ret
.L6:
        ret
foo2:
        testl   %edi, %edi
        je      .L18
        pushq   %rbx
        movl    %edi, %ebx
.L12:
        xorl    %eax, %eax
        call    bar
        subl    $1, %ebx
        jne     .L12
        popq    %rbx
        ret

LLVM x86-64 -O3:
foo1:                                   # @foo1
        testl   %edi, %edi
        jle     .LBB0_4
        pushq   %rbx
        movl    %edi, %ebx
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        xorl    %eax, %eax
        callq   bar@PLT
        decl    %ebx
        jne     .LBB0_2
        popq    %rbx
.LBB0_4:
        retq
foo2:                                   # @foo2
        testl   %edi, %edi
        je      .LBB1_4
        pushq   %rbx
        movl    %edi, %ebx
.LBB1_2:                                # =>This Inner Loop Header: Depth=1
        xorl    %eax, %eax
        callq   bar@PLT
        decl    %ebx
        jne     .LBB1_2
        popq    %rbx
.LBB1_4:
        retq

next             reply	other threads:[~2024-06-04 12:47 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-04 12:47 cmuellner at gcc dot gnu.org [this message]
2024-06-04 17:49 ` [Bug tree-optimization/115344] " pinskia at gcc dot gnu.org
2024-06-05  6:50 ` rguenth at gcc dot gnu.org
2024-06-24  6:18 ` andi-gcc at firstfloor dot org
2024-06-24  6:24 ` pinskia at gcc dot gnu.org
2024-06-24 17:04 ` andi-gcc at firstfloor dot org
2024-06-24 17:13 ` andi-gcc at firstfloor dot org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-115344-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).