public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* Optimiser failure for ternary foo == 0L ? NULL : bar;
@ 2021-07-17 18:54 Stefan Kanthak
  2021-07-17 19:31 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Stefan Kanthak @ 2021-07-17 18:54 UTC (permalink / raw)
  To: gcc

Hi,

GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and
amd64 not tested) generate rather bad code for the following ternary
expression:

--- repro.c ---
#define NULL (char *) 0

char *dummy(char *string, long count) {
    return count == 0 ? NULL : string + 1;
}
--- EOF ---

$ gcc -m64 -o- -O3 -S repro.c

dummy:
        addq    $1, %rdi
        movl    $0, %eax
        testq   %rsi, %rsi
        cmovne  %rdi, %rax
        ret

JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here?

$ gcc -m64 -O3 -c dummy.c
$ objdump -D dummy.o

0000000000000000 <dummy>:
   0: 48 83 c7 01           add    $0x1,%rdi
   4: b8 00 00 00 00        mov    $0x0,%eax
   9: 48 85 f6              test   %rsi,%rsi
   c: 48 0f 45 c7           cmovne %rdi,%rax
  10: c3                    retq   


i386 and AMD64 use the ILP32 and LP64 data model where a "long" and
a "pointer" have the same size, and 0L and the null pointer have the
same binary representation, so the contents of RSI should be used to
load RAX with 0 conditionally:

dummy:
        leaq    1(%rdi), %rax
        testq   %rsi, %rsi
        cmoveq  %rdi, %rax
        ret

$ gcc -m32 -o- -O3 -S dummy.c

_dummy:
        movl   8(%esp), %edx
        movl   4(%esp), %eax
        addl   $1, %eax
        testl  %edx, %edx
        movl   $0, %edx
        cmove  %edx, %eax    # OUCH: if this executes, EDX was 0 before,
        ret                  #       so the MOV is really a NOP!


$ gcc -m32 -O3 -c dummy.c
$ objdump -D dummy.o

00000000 <_dummy>:
   0:   8b 54 24 08             mov    0x8(%esp),%edx
   4:   8b 44 24 04             mov    0x4(%esp),%eax
   8:   83 c0 01                add    $0x1,%eax
   b:   85 d2                   test   %edx,%edx
   d:   ba 00 00 00 00          mov    $0x0,%edx
  12:   0f 44 c2                cmove  %edx,%eax
  15:   c3                      ret    

Here's what GCC should but generate:

00000000 <_dummy>:
   0:   8b 44 24 04             mov    0x4(%esp),%eax
   4:   8b 4c 24 08             mov    0x8(%esp),%ecx
   8:   40                      inc    %eax
   9:   f7 d9                   neg    %ecx
   b:   19 c9                   sbb    %ecx,%ecx
   d:   21 c8                   and    %ecx,%eax
   f:   c3                      ret    


For (pre)historic processors which don't support CMOVcc the
following code is generated:

$ gcc -m32 -mtune=i386 -o- -S dummy.c

_dummy:
        movl    8(%esp), %eax
        testl   %eax, %eax
        je      L3
        movl    4(%esp), %eax
        incl    %eax
        ret
        .p2align 2
L3:                        # OUCH: EAX is already 0 here!
        xorl    %eax, %eax
        ret

00000000 <dummy>:
   0:   8b 44 24 08             mov    0x8(%esp),%eax
   4:   85 c0                   test   %eax,%eax
   6:   74 08                   je     10 <dummy+0x10>
   8:   8b 44 24 04             mov    0x4(%esp),%eax
   c:   40                      inc    %eax
   d:   c3                      ret
   e:   66 90                   xchg   %ax,%ax
  10:   31 c0                   xor    %eax,%eax
  12:   c3                      ret


not amused
Stefan Kanthak

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Optimiser failure for ternary foo == 0L ? NULL : bar;
  2021-07-17 18:54 Optimiser failure for ternary foo == 0L ? NULL : bar; Stefan Kanthak
@ 2021-07-17 19:31 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2021-07-17 19:31 UTC (permalink / raw)
  To: gcc, Stefan Kanthak

On July 17, 2021 8:54:38 PM GMT+02:00, Stefan Kanthak <stefan.kanthak@nexgo.de> wrote:
>Hi,
>
>GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and
>amd64 not tested) generate rather bad code for the following ternary
>expression:
>
>--- repro.c ---
>#define NULL (char *) 0
>
>char *dummy(char *string, long count) {
>    return count == 0 ? NULL : string + 1;
>}
>--- EOF ---
>
>$ gcc -m64 -o- -O3 -S repro.c
>
>dummy:
>        addq    $1, %rdi
>        movl    $0, %eax
>        testq   %rsi, %rsi
>        cmovne  %rdi, %rax
>        ret
>
>JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here?
>
>$ gcc -m64 -O3 -c dummy.c
>$ objdump -D dummy.o
>
>0000000000000000 <dummy>:
>   0: 48 83 c7 01           add    $0x1,%rdi
>   4: b8 00 00 00 00        mov    $0x0,%eax
>   9: 48 85 f6              test   %rsi,%rsi
>   c: 48 0f 45 c7           cmovne %rdi,%rax
>  10: c3                    retq   
>
>
>i386 and AMD64 use the ILP32 and LP64 data model where a "long" and
>a "pointer" have the same size, and 0L and the null pointer have the
>same binary representation, so the contents of RSI should be used to
>load RAX with 0 conditionally:
>
>dummy:
>        leaq    1(%rdi), %rax
>        testq   %rsi, %rsi
>        cmoveq  %rdi, %rax
>        ret
>
>$ gcc -m32 -o- -O3 -S dummy.c
>
>_dummy:
>        movl   8(%esp), %edx
>        movl   4(%esp), %eax
>        addl   $1, %eax
>        testl  %edx, %edx
>        movl   $0, %edx
>       cmove  %edx, %eax    # OUCH: if this executes, EDX was 0 before,
>        ret                  #       so the MOV is really a NOP!
>
>
>$ gcc -m32 -O3 -c dummy.c
>$ objdump -D dummy.o
>
>00000000 <_dummy>:
>   0:   8b 54 24 08             mov    0x8(%esp),%edx
>   4:   8b 44 24 04             mov    0x4(%esp),%eax
>   8:   83 c0 01                add    $0x1,%eax
>   b:   85 d2                   test   %edx,%edx
>   d:   ba 00 00 00 00          mov    $0x0,%edx
>  12:   0f 44 c2                cmove  %edx,%eax
>  15:   c3                      ret    
>
>Here's what GCC should but generate:
>
>00000000 <_dummy>:
>   0:   8b 44 24 04             mov    0x4(%esp),%eax
>   4:   8b 4c 24 08             mov    0x8(%esp),%ecx
>   8:   40                      inc    %eax
>   9:   f7 d9                   neg    %ecx
>   b:   19 c9                   sbb    %ecx,%ecx
>   d:   21 c8                   and    %ecx,%eax
>   f:   c3                      ret    
>
>
>For (pre)historic processors which don't support CMOVcc the
>following code is generated:
>
>$ gcc -m32 -mtune=i386 -o- -S dummy.c
>
>_dummy:
>        movl    8(%esp), %eax
>        testl   %eax, %eax
>        je      L3
>        movl    4(%esp), %eax
>        incl    %eax
>        ret
>        .p2align 2
>L3:                        # OUCH: EAX is already 0 here!
>        xorl    %eax, %eax
>        ret
>
>00000000 <dummy>:
>   0:   8b 44 24 08             mov    0x8(%esp),%eax
>   4:   85 c0                   test   %eax,%eax
>   6:   74 08                   je     10 <dummy+0x10>
>   8:   8b 44 24 04             mov    0x4(%esp),%eax
>   c:   40                      inc    %eax
>   d:   c3                      ret
>   e:   66 90                   xchg   %ax,%ax
>  10:   31 c0                   xor    %eax,%eax
>  12:   c3                      ret
>
>
>not amused

Patches welcome.  You might want to file a bugzilla report which has a higher chance of being found after a while.

Richard. 

>Stefan Kanthak


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-07-17 19:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-17 18:54 Optimiser failure for ternary foo == 0L ? NULL : bar; Stefan Kanthak
2021-07-17 19:31 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).