public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* B^HDEAD code generation (AMD64)
@ 2023-01-09 11:35 Stefan Kanthak
  2023-01-09 11:48 ` Thomas Koenig
  0 siblings, 1 reply; 6+ messages in thread
From: Stefan Kanthak @ 2023-01-09 11:35 UTC (permalink / raw)
  To: gcc

Hi,

compile the following 128-bit GCD routine for the AMD64 processor
with full optimization:

--- gcdti3.c ---
// Stein's algorithm: greatest common divisor

__uint128_t __gcdti3(__uint128_t p, __uint128_t q)
{
    unsigned    r, s = 0;
    __uint128_t t;

    if (p == 0)
        return q;

    if (q == 0)
        return p;

    if (p == q)
        return p;

    if (((unsigned long long) p | (unsigned long long) q) == 0)
        p >>= 64, q >>= 64, s = 64;

    r = __builtin_ctzll((unsigned long long) p | (unsigned long long) q), p >>= r, q >>= r, s += r;

    if ((unsigned long long) p == 0)
        p >>= 64;

    r = __builtin_ctzll(p), p >>= r;

    do
    {
        if ((unsigned long long) q == 0)
            q >>= 64;

        r = __builtin_ctzll(q), q >>= r;

        if (p < q)
            t = q, q = p, p = t;
    } while (q -= p);

    return p << s;
}
--- EOF ---

GCC 12.2: gcc -O3 gcdti3.c

# https://godbolt.org/z/d1Ma9qnsf
__gcdti3(unsigned __int128, unsigned __int128):
        mov     rax, rsi      # OOPS: GCCs plays six rounds of shell game!
        mov     r8, rdi       #
        mov     rsi, rdi      #
        mov     rdi, rax      #
        mov     rax, rdx      #
        mov     rdx, rcx      #
        mov     rcx, rdi
        or      rcx, r8
        je      .L1
        mov     rcx, r8
        mov     r8, rdi
        xor     rcx, rax
        xor     r8, rdx
        or      rcx, r8
        je      .L9
        mov     rcx, rax
        or      rcx, rdx
        je      .L9
        mov     rcx, rsi
        xor     r10d, r10d
        or      rcx, rax
        jne     .L3
        mov     rsi, rdi
        mov     rax, rdx
        xor     edi, edi
        xor     edx, edx
        mov     rcx, rsi
        mov     r10d, 64
        or      rcx, rax
.L3:
        rep bsf rcx, rcx
        xor     r8d, r8d      # OUCH: BSF and TZCNT return at most 63,
        shrd    rsi, rdi, cl
        shr     rdi, cl
        test    cl, 64        #       so this is dead code!
        cmovne  rsi, rdi      #
        cmovne  rdi, r8       #
        shrd    rax, rdx, cl
        xor     r11d, r11d    # OUCH: BSF and TZCNT return at most 63,
        shr     rdx, cl
        test    cl, 64        #       so this is dead code!
        cmovne  rax, rdx      #
        cmovne  rdx, r11      #
        mov     r8, rsi
        mov     r9, rdi
        add     r10d, ecx
        mov     rcx, r8
        mov     rsi, rax
        mov     rdi, rdx
        test    r8, r8
        je      .L14
.L4:
        rep bsf rcx, rcx
        mov     rax, r8
        mov     rdx, r9
        xor     r11d, r11d    # OUCH: BSF and TZCNT return at most 63,
        shr     rdx, cl
        shrd    rax, r9, cl
        and     ecx, 64       #       (there's also no need to modify ECX)
        cmovne  rax, rdx      #       so this is dead code!
        cmovne  rdx, r11      #
.L7:
        mov     rcx, rsi
        test    rsi, rsi
        jne     .L5
        mov     rsi, rdi
        xor     edi, edi
        mov     rcx, rsi
.L5:
        rep bsf rcx, rcx
        xor     r8d, r8d      # OUCH: BSF and TZCNT return at most 63,
        shrd    rsi, rdi, cl
        shr     rdi, cl
        test    cl, 64        #       so this is dead code,
        mov     rcx, rdx
        cmovne  rsi, rdi      #       and that too!
        cmovne  rdi, r8       #
        cmp     rax, rsi
        sbb     rcx, rdi
        jnc     .L6
        mov     r8, rax
        mov     r9, rdx
        mov     rax, rsi
        mov     rdx, rdi
        mov     rsi, r8
        mov     rdi, r9
.L6:
        sub     rsi, rax
        sbb     rdi, rdx
        mov     rcx, rdi
        or      rcx, rsi
        jne     .L7
        mov     ecx, r10d
        xor     esi, esi
        shld    rdx, rax, cl
        sal     rax, cl
        and     ecx, 64       # Oops: there's no need to modify ECX!
        cmovne  rdx, rax
        cmovne  rax, rsi
        ret
.L9:
        mov     rax, rsi
        mov     rdx, rdi
.L1:
        ret
.L14:
        mov     r8, r9
        xor     r9d, r9d
        mov     rcx, r8
        jmp     .L4

20 superfluous instructions of the total 102 instructions!

NOT AMUSED
Stefan Kanthak

PS: <https://skanthak.homepage.t-online.de/gcc.html#case27-amd64-s>
    shows properly written assembly code.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: B^HDEAD code generation (AMD64)
  2023-01-09 11:35 B^HDEAD code generation (AMD64) Stefan Kanthak
@ 2023-01-09 11:48 ` Thomas Koenig
  2023-01-09 12:17   ` LIU Hao
  2023-01-10  0:34   ` Stefan Kanthak
  0 siblings, 2 replies; 6+ messages in thread
From: Thomas Koenig @ 2023-01-09 11:48 UTC (permalink / raw)
  To: Stefan Kanthak, gcc

On 09.01.23 12:35, Stefan Kanthak wrote:
> 20 superfluous instructions of the total 102 instructions!

The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
Feel free to submit these cases there.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: B^HDEAD code generation (AMD64)
  2023-01-09 11:48 ` Thomas Koenig
@ 2023-01-09 12:17   ` LIU Hao
  2023-01-10  0:34   ` Stefan Kanthak
  1 sibling, 0 replies; 6+ messages in thread
From: LIU Hao @ 2023-01-09 12:17 UTC (permalink / raw)
  To: Thomas Koenig, Stefan Kanthak, gcc


[-- Attachment #1.1: Type: text/plain, Size: 966 bytes --]

在 2023/1/9 19:48, Thomas Koenig via Gcc 写道:
> On 09.01.23 12:35, Stefan Kanthak wrote:
>> 20 superfluous instructions of the total 102 instructions!
> 
> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
> Feel free to submit these cases there.

Created https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108341, although I'm not sure whether it's 
related to this issue.


Given

    ```
    extern int r;

    int
    bz(int value)
      {
        r = __builtin_ctz(value);
        return value != 0;  // always true
      }
    ```

`value` is passed to `__builtin_ctz` and GCC should assume it can't be zero, otherwise the behavior 
is undefined. However GCC fails to optimize it and produces:

    ```
    bz:
      endbr64
      xor	eax, eax
      rep bsf	eax, edi
      mov	DWORD PTR r[rip], eax
      xor	eax, eax
      test	edi, edi
      setne	al
      ret
    ```


-- 
Best regards,
LIU Hao


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 840 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: B^HDEAD code generation (AMD64)
  2023-01-09 11:48 ` Thomas Koenig
  2023-01-09 12:17   ` LIU Hao
@ 2023-01-10  0:34   ` Stefan Kanthak
  2023-01-10  3:22     ` Andrew Pinski
  2023-01-10  7:12     ` Gabriel Ravier
  1 sibling, 2 replies; 6+ messages in thread
From: Stefan Kanthak @ 2023-01-10  0:34 UTC (permalink / raw)
  To: gcc, Thomas Koenig

"Thomas Koenig" <tkoenig@netcologne.de> wrote:

> On 09.01.23 12:35, Stefan Kanthak wrote:
>> 20 superfluous instructions of the total 102 instructions!
> 
> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .

OUCH: there's NO proper place for bugs at all!

> Feel free to submit these cases there.

I feel free to do whatever I like to do where I do it, for example:

--- bug.cpp ---
int main() {
    __uint128_t long long bug = 0;
}
--- EOF ---

See <https://godbolt.org/z/G7f81Mx5Y>

regards
Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: B^HDEAD code generation (AMD64)
  2023-01-10  0:34   ` Stefan Kanthak
@ 2023-01-10  3:22     ` Andrew Pinski
  2023-01-10  7:12     ` Gabriel Ravier
  1 sibling, 0 replies; 6+ messages in thread
From: Andrew Pinski @ 2023-01-10  3:22 UTC (permalink / raw)
  To: Stefan Kanthak; +Cc: gcc, Thomas Koenig

On Mon, Jan 9, 2023 at 4:42 PM Stefan Kanthak <stefan.kanthak@nexgo.de> wrote:
>
> "Thomas Koenig" <tkoenig@netcologne.de> wrote:
>
> > On 09.01.23 12:35, Stefan Kanthak wrote:
> >> 20 superfluous instructions of the total 102 instructions!
> >
> > The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
>
> OUCH: there's NO proper place for bugs at all!

HUH? soon people will ignore emails that are demeaning/ableist like
what you have been recently (saying things like braindead, etc.). And
yes bugzilla is where GCC tracks bug reports.

>
> > Feel free to submit these cases there.
>
> I feel free to do whatever I like to do where I do it, for example:
>
> --- bug.cpp ---
> int main() {
>     __uint128_t long long bug = 0;
> }
> --- EOF ---

With -pedantic-errors we get:

<source>: In function 'int main()':
<source>:2:22: error: 'long long' specified with '__int128 unsigned'
[-Wpedantic]
    2 |     __uint128_t long long bug = 0;
      |                      ^~~~

And also run into https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108099 .

This is a known extension but maybe it is not documented ...
Anyways read that bug report.


>
> See <https://godbolt.org/z/G7f81Mx5Y>
>
> regards
> Stefan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: B^HDEAD code generation (AMD64)
  2023-01-10  0:34   ` Stefan Kanthak
  2023-01-10  3:22     ` Andrew Pinski
@ 2023-01-10  7:12     ` Gabriel Ravier
  1 sibling, 0 replies; 6+ messages in thread
From: Gabriel Ravier @ 2023-01-10  7:12 UTC (permalink / raw)
  To: Stefan Kanthak, gcc

On 1/10/23 01:34, Stefan Kanthak wrote:
> "Thomas Koenig" <tkoenig@netcologne.de> wrote:
>
>> On 09.01.23 12:35, Stefan Kanthak wrote:
>>> 20 superfluous instructions of the total 102 instructions!
>> The proper place for bug reports is https://gcc.gnu.org/bugzilla/ .
> OUCH: there's NO proper place for bugs at all!
>
>> Feel free to submit these cases there.
> I feel free to do whatever I like to do where I do it, for example:
>
> --- bug.cpp ---
> int main() {
>      __uint128_t long long bug = 0;
> }
> --- EOF ---
>
> See <https://godbolt.org/z/G7f81Mx5Y>
>
> regards
> Stefan

If you're trying to speedrun actually getting banned from this mailing 
list, then sure, I guess you can "do whatever I like to do where I do 
it", but you might find that more difficult after somebody decides to do 
something about it.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-01-10  7:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-09 11:35 B^HDEAD code generation (AMD64) Stefan Kanthak
2023-01-09 11:48 ` Thomas Koenig
2023-01-09 12:17   ` LIU Hao
2023-01-10  0:34   ` Stefan Kanthak
2023-01-10  3:22     ` Andrew Pinski
2023-01-10  7:12     ` Gabriel Ravier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).