public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug middle-end/110052] New: useless local variable not optimized away
@ 2023-05-31  8:06 aldot at gcc dot gnu.org
  2023-05-31  8:31 ` [Bug middle-end/110052] " amonakov at gcc dot gnu.org
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: aldot at gcc dot gnu.org @ 2023-05-31  8:06 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

            Bug ID: 110052
           Summary: useless local variable not optimized away
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: aldot at gcc dot gnu.org
  Target Milestone: ---

Created attachment 55221
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55221&action=edit
reduced test, v1

A superfluous local variable is not optimized away.

Consider:
$ cat localvar.c ; echo EOF
#include <stddef.h>
// original flags: gcc-12 -std=gnu99 -malign-data=abi -finline-limit=0
-fno-builtin-strlen -fomit-frame-pointer -ffunction-sections -fdata-sections
-fno-guess-branch-probability -funsigned-char -falign-functions=1
-falign-jumps=1 -falign-labels=1 -falign-loops=1 -fno-unwind-tables
-fno-asynchronous-unwind-tables -fno-builtin-printf -Oz
extern void *realloc(void *ptr, size_t size);
static char * __attribute__((noipa))
myrealloc(void *ptr, int n, int *size)
{
  if (!ptr || n >= *size) {
    *size = n + (n >> 1) + 80;
    ptr = realloc (ptr, *size);
    if (ptr == NULL) /* mimic xrealloc */
      __builtin_abort();
  }
  return ptr;
}
typedef struct foo_s {
  int fd;
  char *buffer;
  int adv;
  int size;
  int pos;
  signed char boo;
} foo_t;

void bloat(foo_t *);
void bloat(foo_t *foo) {
  char *m = foo->buffer;
  /* This is the original code, the local variable is superfluous */
  int size = foo->size;
  if (!m)
    m = myrealloc(m, 256, &size);
  foo->size = size;
}

void ok(foo_t *);
void ok(foo_t *foo) {
  char *m = foo->buffer;
  if (!m)
    m = myrealloc(m, 256, &foo->size);
}
EOF


$ readelf -s localvar.o | grep "FUNC *GLOBAL"
    13: 0000000000000000    52 FUNC    GLOBAL DEFAULT    6 bloat
    14: 0000000000000000    24 FUNC    GLOBAL DEFAULT    8 ok
$ strip -R .comment localvar.o
$ objdump -D localvar.o

localvar.o:     file format elf64-x86-64


Disassembly of section .text.myrealloc:

0000000000000000 <.text.myrealloc>:
   0:   48 89 f8                mov    %rdi,%rax
   3:   48 85 ff                test   %rdi,%rdi
   6:   74 04                   je     0xc
   8:   39 32                   cmp    %esi,(%rdx)
   a:   7f 21                   jg     0x2d
   c:   51                      push   %rcx
   d:   89 f1                   mov    %esi,%ecx
   f:   48 97                   xchg   %rax,%rdi
  11:   d1 f9                   sar    %ecx
  13:   8d 74 0e 50             lea    0x50(%rsi,%rcx,1),%esi
  17:   89 32                   mov    %esi,(%rdx)
  19:   48 63 f6                movslq %esi,%rsi
  1c:   e8 00 00 00 00          callq  0x21
  21:   48 85 c0                test   %rax,%rax
  24:   75 05                   jne    0x2b
  26:   e8 00 00 00 00          callq  0x2b
  2b:   5a                      pop    %rdx
  2c:   c3                      retq   
  2d:   c3                      retq   

Disassembly of section .text.bloat:

0000000000000000 <.text.bloat>:
   0:   53                      push   %rbx
   1:   48 89 fb                mov    %rdi,%rbx
   4:   48 83 ec 10             sub    $0x10,%rsp
   8:   8b 47 14                mov    0x14(%rdi),%eax
   b:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
  10:   89 44 24 0c             mov    %eax,0xc(%rsp)
  14:   75 11                   jne    0x27
  16:   48 8d 54 24 0c          lea    0xc(%rsp),%rdx
  1b:   be 00 01 00 00          mov    $0x100,%esi
  20:   31 ff                   xor    %edi,%edi
  22:   e8 00 00 00 00          callq  0x27
  27:   8b 44 24 0c             mov    0xc(%rsp),%eax
  2b:   89 43 14                mov    %eax,0x14(%rbx)
  2e:   48 83 c4 10             add    $0x10,%rsp
  32:   5b                      pop    %rbx
  33:   c3                      retq   

Disassembly of section .text.ok:

0000000000000000 <.text.ok>:
   0:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
   5:   75 10                   jne    0x17
   7:   48 8d 57 14             lea    0x14(%rdi),%rdx
   b:   be 00 01 00 00          mov    $0x100,%esi
  10:   31 ff                   xor    %edi,%edi
  12:   e9 00 00 00 00          jmpq   0x17
  17:   c3                      retq   

object size with each myrealloc and either bloat or ok:
$ size localvar-*.o
   text    data     bss     dec     hex filename
     70       0       0      70      46 localvar-ok.o
     98       0       0      98      62 localvar-bloat.o

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug middle-end/110052] useless local variable not optimized away
  2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
@ 2023-05-31  8:31 ` amonakov at gcc dot gnu.org
  2023-05-31  8:51 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: amonakov at gcc dot gnu.org @ 2023-05-31  8:31 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

Alexander Monakov <amonakov at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |amonakov at gcc dot gnu.org

--- Comment #1 from Alexander Monakov <amonakov at gcc dot gnu.org> ---
That would be an invalid transform because 'myrealloc' might check its third
argument ('size') for equality against some other pointer (e.g. in some global
variable, or even the first argument) and change behavior based on the outcome.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug middle-end/110052] useless local variable not optimized away
  2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
  2023-05-31  8:31 ` [Bug middle-end/110052] " amonakov at gcc dot gnu.org
@ 2023-05-31  8:51 ` rguenth at gcc dot gnu.org
  2023-05-31 18:12 ` aldot at gcc dot gnu.org
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-05-31  8:51 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |WAITING
   Last reconfirmed|                            |2023-05-31
     Ever confirmed|0                           |1

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
  /* This is the original code, the local variable is superfluous */
  int size = foo->size;
  if (!m)
    m = myrealloc(m, 256, &size);
  foo->size = size;

I'm not sure we are allowed to change the &size argument to &foo->size
since it's observable whether the argument is equal to &foo->size or
not since 'foo' is global and myrealloc could have access to it
and also clobber it.  Consider

myrealloc(..., int *size)
{
  *size = 4;
  foo->size = 0;
}

if 'foo' were global.  Can you make the testcase avoid these
considerations?  I think we have no pass doing this kind of transform.

Maybe sth along

  int size1, size2;
  foo (&size1);
  size2 = size1;
  bar (&size2);

of course since size1 escapes to foo() bar() might do the same as myrealloc
above and it would break passing &size1 to bar().

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug middle-end/110052] useless local variable not optimized away
  2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
  2023-05-31  8:31 ` [Bug middle-end/110052] " amonakov at gcc dot gnu.org
  2023-05-31  8:51 ` rguenth at gcc dot gnu.org
@ 2023-05-31 18:12 ` aldot at gcc dot gnu.org
  2023-05-31 18:24 ` aldot at gcc dot gnu.org
  2023-06-01 12:48 ` amonakov at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: aldot at gcc dot gnu.org @ 2023-05-31 18:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

--- Comment #3 from Bernhard Reutner-Fischer <aldot at gcc dot gnu.org> ---
Note that in this particular case myrealloc() is static, maybe i should have
omitted the noipa attribute for it was only meant to simplify analysis and
there is no such attribute in the original code.
Furthermore, i would have hoped that given the placement of the definition of
myrealloc() before the typedef foo_t, it should be unlikely that the usage of
*foo would invalidate such an optimisation _in this particular case_. While the
original code unfortunately does not mark *foo as restrict, adding restrict
would hopefully make it clear that myrealloc() has no business in comparing
foo's struct members, i'd hope?

When i drop the noipa attribute in the v1 testcase to make it similar to the
motivating case, i get:
    9: 0000000000000000    44 FUNC    GLOBAL DEFAULT    4 bloat
   12: 0000000000000000    38 FUNC    GLOBAL DEFAULT    6 ok


0000000000000000 <bloat>:
   0:   53                      push   %rbx
   1:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
   6:   48 89 fb                mov    %rdi,%rbx
   9:   8b 47 14                mov    0x14(%rdi),%eax
   c:   75 19                   jne    27 <bloat+0x27>
   e:   bf d0 01 00 00          mov    $0x1d0,%edi
  13:   e8 00 00 00 00          callq  18 <bloat+0x18>
  18:   48 85 c0                test   %rax,%rax
  1b:   75 05                   jne    22 <bloat+0x22>
  1d:   e8 00 00 00 00          callq  22 <bloat+0x22>
  22:   b8 d0 01 00 00          mov    $0x1d0,%eax
  27:   89 43 14                mov    %eax,0x14(%rbx)
  2a:   5b                      pop    %rbx
  2b:   c3                      retq   

Disassembly of section .text.ok:

0000000000000000 <ok>:
   0:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
   5:   75 1e                   jne    25 <ok+0x25>
   7:   52                      push   %rdx
   8:   c7 47 14 d0 01 00 00    movl   $0x1d0,0x14(%rdi)
   f:   bf d0 01 00 00          mov    $0x1d0,%edi
  14:   e8 00 00 00 00          callq  19 <ok+0x19>
  19:   48 85 c0                test   %rax,%rax
  1c:   75 05                   jne    23 <ok+0x23>
  1e:   e8 00 00 00 00          callq  23 <ok+0x23>
  23:   58                      pop    %rax
  24:   c3                      retq   
  25:   c3                      retq

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug middle-end/110052] useless local variable not optimized away
  2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
                   ` (2 preceding siblings ...)
  2023-05-31 18:12 ` aldot at gcc dot gnu.org
@ 2023-05-31 18:24 ` aldot at gcc dot gnu.org
  2023-06-01 12:48 ` amonakov at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: aldot at gcc dot gnu.org @ 2023-05-31 18:24 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

--- Comment #4 from Bernhard Reutner-Fischer <aldot at gcc dot gnu.org> ---
(In reply to Bernhard Reutner-Fischer from comment #3)
> Note that in this particular case myrealloc() is static, maybe i should have
> omitted the noipa attribute for it was only meant to simplify analysis and
> there is no such attribute in the original code.
> Furthermore, i would have hoped that given the placement of the definition
> of myrealloc() before the typedef foo_t, it should be unlikely that the
> usage of *foo would invalidate such an optimisation _in this particular
> case_. While the original code unfortunately does not mark *foo as restrict,
> adding restrict would hopefully make it clear that myrealloc() has no
> business in comparing foo's struct members, i'd hope?
> 
> When i drop the noipa attribute in the v1 testcase to make it similar to the
> motivating case, i get:

Correction: When i replace noipa with noinline,noclone (and drop
-ffunction-sections -fdata-sections to improve readability), i get:
    10: 0000000000000030    52 FUNC    GLOBAL DEFAULT    1 bloat
    10: 0000000000000030    21 FUNC    GLOBAL DEFAULT    1 ok

0000000000000030 <bloat>:
  30:   53                      push   %rbx
  31:   48 89 fb                mov    %rdi,%rbx
  34:   48 83 ec 10             sub    $0x10,%rsp
  38:   8b 47 14                mov    0x14(%rdi),%eax
  3b:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
  40:   89 44 24 0c             mov    %eax,0xc(%rsp)
  44:   75 11                   jne    57 <bloat+0x27>
  46:   48 8d 54 24 0c          lea    0xc(%rsp),%rdx
  4b:   be 00 01 00 00          mov    $0x100,%esi
  50:   31 ff                   xor    %edi,%edi
  52:   e8 a9 ff ff ff          callq  0 <myrealloc>
  57:   8b 44 24 0c             mov    0xc(%rsp),%eax
  5b:   89 43 14                mov    %eax,0x14(%rbx)
  5e:   48 83 c4 10             add    $0x10,%rsp
  62:   5b                      pop    %rbx
  63:   c3                      retq   

0000000000000030 <ok>:
  30:   48 83 7f 08 00          cmpq   $0x0,0x8(%rdi)
  35:   75 0d                   jne    44 <ok+0x14>
  37:   48 8d 57 14             lea    0x14(%rdi),%rdx
  3b:   be 00 01 00 00          mov    $0x100,%esi
  40:   31 ff                   xor    %edi,%edi
  42:   eb bc                   jmp    0 <myrealloc>
  44:   c3                      retq

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Bug middle-end/110052] useless local variable not optimized away
  2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
                   ` (3 preceding siblings ...)
  2023-05-31 18:24 ` aldot at gcc dot gnu.org
@ 2023-06-01 12:48 ` amonakov at gcc dot gnu.org
  4 siblings, 0 replies; 6+ messages in thread
From: amonakov at gcc dot gnu.org @ 2023-06-01 12:48 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110052

--- Comment #5 from Alexander Monakov <amonakov at gcc dot gnu.org> ---
There are other reasons why it's invalid. For instance, in a multi-threaded
program it could introduce a data race on assignment to foo->size inside of
'myrealloc' where the original program might have aborted without invoking UB.

Can you give more context about the problem you're trying to solve?

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-06-01 12:48 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-31  8:06 [Bug middle-end/110052] New: useless local variable not optimized away aldot at gcc dot gnu.org
2023-05-31  8:31 ` [Bug middle-end/110052] " amonakov at gcc dot gnu.org
2023-05-31  8:51 ` rguenth at gcc dot gnu.org
2023-05-31 18:12 ` aldot at gcc dot gnu.org
2023-05-31 18:24 ` aldot at gcc dot gnu.org
2023-06-01 12:48 ` amonakov at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).