public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/42972]  New: Very bad
@ 2010-02-05 11:13 steven at gcc dot gnu dot org
  2010-02-05 12:43 ` [Bug tree-optimization/42972] Very bad bit field code steven at gcc dot gnu dot org
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: steven at gcc dot gnu dot org @ 2010-02-05 11:13 UTC (permalink / raw)
  To: gcc-bugs

Taken from http://hardwarebug.org/2010/01/30/bit-field-badness/

-----------------------------------
struct bf1_31 {
    unsigned a:1;
    unsigned b:31;
};

void func(struct bf1_31 *p, int n, int a)
{
    int i = 0;
    do {
        if (p[i].a)
            p[i].b += a;
    } while (++i < n);
}
-----------------------------------

GCC produces this dreadful code for arm-elf with options -march=armv5te -O3:

        .file   "t.c"
        .text
        .align  2
        .global func
        .type   func, %function
func:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        mov     r3, #0
        str     r4, [sp, #-4]!
.L3:
        ldrb    ip, [r0, #0]    @ zero_extendqisi2
        add     r3, r3, #1
        tst     ip, #1
        ldrne   ip, [r0, #0]
        addne   r4, r2, ip, lsr #1
        andne   ip, ip, #1
        orrne   ip, ip, r4, asl #1
        strne   ip, [r0, #0]
        cmp     r3, r1
        add     r0, r0, #4
        blt     .L3
        ldmfd   sp!, {r4}
        bx      lr
        .size   func, .-func
        .ident  "GCC: (GNU) 4.5.0 20100204 (experimental) [trunk revision
156492]"


What is wrong with this code:
1) The same value is loaded from memory twice (from [r0, #0])
2) Mask/shift/or operations are used where a simple shifted add would suffice
3) Write-back addressing is not used
4) The loop control counts up and compares instead of counting down

(Re. 4: The loop is clearly invertible. Does GCC fail to see this? Or is this
optimization just not implemented?)


Hand-optimized assembly would look like this according to Hardwarebug:
func:
        ldr     r3,  [r0], #4
        tst     r3,  #1
        add     r3,  r3,  r2,  lsl #1
        strne   r3,  [r0, #-4]
        subs    r1,  r1,  #1
        bgt     func
        bx      lr

Best compiler available produces this:
func:
        mov     r3, #0
        push    {r4, lr}
loop:
        ldr     ip, [r0, r3, lsl #2]
        tst     ip, #1
        addne   ip, ip, r2, lsl #1
        strne   ip, [r0, r3, lsl #2]
        add     r3, r3, #1
        cmp     r3, r1
        blt     loop
        pop     {r4, pc}


Part of the problem seems to be that the redundant load is not exposed in
GIMPLE (altough the RTL optimizers should also be able to see it). The
.139t.optimized dump looks like this:

     1  ;; Function func (func)
     2
     3  func (struct bf1_31 * p, int n, int a)
     4  {
     5    long unsigned int ivtmp.16;
     6    int i;
     7    <unnamed-unsigned:31> D.1973;
     8    unsigned int D.1972;
     9    int D.1971;
    10    int D.1970;
    11    <unnamed-unsigned:31> D.1969;
    12    unsigned char D.1966;
    13    unsigned char D.1965;
    14    struct bf1_31 * D.1964;
    15
    16  <bb 2>:
    17    ivtmp.16_30 = (long unsigned int) p_5(D);
    18
    19  <bb 3>:
    20    # i_1 = PHI <0(2), i_21(5)>
    21    # ivtmp.16_32 = PHI <ivtmp.16_30(2), ivtmp.16_31(5)>
    22    D.1964_38 = (struct bf1_31 *) ivtmp.16_32;
    23    D.1965_7 = BIT_FIELD_REF <*D.1964_38, 8, 0>;
    24    D.1966_8 = D.1965_7 & 1;
    25    if (D.1966_8 != 0)
    26      goto <bb 4>;
    27    else
    28      goto <bb 5>;
    29
    30  <bb 4>:
    31    D.1969_15 = D.1964_38->b;
    32    D.1970_16 = (int) D.1969_15;
    33    D.1971_18 = a_17(D) + D.1970_16;
    34    D.1972_19 = (unsigned int) D.1971_18;
    35    D.1973_20 = (<unnamed-unsigned:31>) D.1972_19;
    36    D.1964_38->b = D.1973_20;
    37
    38  <bb 5>:
    39    i_21 = i_1 + 1;
    40    ivtmp.16_31 = ivtmp.16_32 + 4;
    41    if (i_21 < n_22(D))
    42      goto <bb 3>;
    43    else
    44      goto <bb 6>;
    45
    46  <bb 6>:
    47    return;
    48
    49  }



There are two loads from *D.1964_38 at line 23 and line 31, but one is a
BIT_FIELD_REF and the other is an INDIRECT_REF, so the redundant load is not
noticed.


RTL PRE fails to kill the redundant load because the expressions for the loads
are not the same:

Index 0 (hash value 10)
  (zero_extend:SI (mem/s:QI (reg:SI 158 [ ivtmp.16 ]) [0+0 S1 A32]))
Index 3 (hash value 7)
  (mem/s:SI (reg:SI 158 [ ivtmp.16 ]) [0+0 S4 A32])


CSE also does not see the redundant load although it could notice it:

* It sees the path:
;; Following path with 17 sets: 3 4

* It sees the first load:
(insn 23 22 24 3 t.c:10 (set (reg:SI 169)
        (zero_extend:SI (mem/s:QI (reg:SI 158 [ ivtmp.16 ]) [0+0 S1 A32]))) 148
{*arm_zero_extendqisi2} (nil))

* It sees the second load:
(insn 31 30 32 4 t.c:11 (set (reg:SI 174)
        (mem/s:SI (reg:SI 158 [ ivtmp.16 ]) [0+0 S4 A32])) 166
{*arm_movsi_insn} (nil))

The problem here probably is the same as for RTL PRE, that the zero_extend
hides the redundancy.


-- 
           Summary: Very bad
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: steven at gcc dot gnu dot org
GCC target triplet: arm-elf
 BugsThisDependsOn: 16996


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42972


^ permalink raw reply	[flat|nested] 8+ messages in thread
[parent not found: <bug-42972-4@http.gcc.gnu.org/bugzilla/>]

end of thread, other threads:[~2021-07-20 23:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-05 11:13 [Bug tree-optimization/42972] New: Very bad steven at gcc dot gnu dot org
2010-02-05 12:43 ` [Bug tree-optimization/42972] Very bad bit field code steven at gcc dot gnu dot org
2010-02-05 12:46 ` [Bug middle-end/42972] " steven at gcc dot gnu dot org
2010-02-05 13:17 ` rguenth at gcc dot gnu dot org
2010-02-05 13:26 ` rguenth at gcc dot gnu dot org
2010-02-05 15:36 ` matz at gcc dot gnu dot org
     [not found] <bug-42972-4@http.gcc.gnu.org/bugzilla/>
2012-03-19  4:11 ` steven at gcc dot gnu.org
2021-07-20 23:59 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).