public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/106236] New: Not a bug, bad performance (with GCC 11.3.0 - O3) of a small etude in C
@ 2022-07-08 14:24 sanmayce at hotmail dot com
  2022-07-08 18:31 ` [Bug middle-end/106236] " pinskia at gcc dot gnu.org
  0 siblings, 1 reply; 2+ messages in thread
From: sanmayce at hotmail dot com @ 2022-07-08 14:24 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106236

            Bug ID: 106236
           Summary: Not a bug, bad performance (with GCC 11.3.0 - O3) of a
                    small etude in C
           Product: gcc
           Version: og11 (devel/omp/gcc-11)
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: sanmayce at hotmail dot com
  Target Milestone: ---

Created attachment 53279
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=53279&action=edit
The full C source, along with how to compile

The following C etude is compiled not in a good way by GCC 11.3.0 -O3; the
thing that is unpleasant to look on, is the unnecesary jumps GCC generates:

                                        // 'Magnetica' partitioning, mainloop
rev.7p [
                                        for (;;) {
                                                for (;Pivot < *Jndx; Jndx--) {
                                                }
                                                // Jndx could be PR i.e. PR ==
Jndx
                                                if (PR == Jndx) break;
                                                //if (PR == Jndx) goto KUH; //]
QS_bench_r14_CLANG_14.0.1_rev5bypass.exe Glupendxr DNA Done in 240/59 seconds.
                                                //PR++;                    
//]<----+
                                                //PR = PR + !!(PR - Jndx); //] 
    |
                                                //PR = PR + (PR != Jndx); 
//]------+ QS_bench_r14_CLANG_14.0.1_rev5bypass.exe Glupendxr DNA Done in
256/63 seconds.
                                                PR++;                           
                                                M18_swapUnconditional (PR,
Jndx);
                                                // Inhere Pivot is either == or
> *(PR) when PR<Jndx
                                                if (Pivot > *(PR)) {
                                                        *PL=*(PR); PL++;
*(PR)=Pivot;                                                                   
                                                }
                                        }
                                       
//M18_SwapConditional_ifXbY_BUGGY((uint64_t)Jndx, (uint64_t)PR, PR, Jndx); //]
QS_bench_r14_CLANG_14.0.1_rev5bypass.exe Glupendxr DNA Done in 239/57 seconds.
                                        //PR = PR - (PR > Jndx);               
                                   //]
                                        //PR = PR +
M18_SwapConditional_ifXbY_BUGGY_DidWeSwap((uint64_t)Jndx, (uint64_t)PR, PR,
Jndx); //] QS_bench_r14_CLANG_14.0.1_rev5bypass.exe Glupendxr DNA Done in
233/57 seconds.
                                        KUH:;
                                        // 'Magnetica' partitioning, mainloop
rev.7p ]

// QS_bench_r14_CLANG_14.0.1_rev5bypass.exe Glupendxr DNA; Done in 236/33
seconds.
// QS_bench_r14_GCC11.3.0_rev5bypass.exe Glupendxr DNA; Done in 239/37 seconds.
// QS_bench_r14_ICL19.0_rev5bypass.exe Glupendxr DNA; Done in 244/32 seconds.

// clang_14.0.1 -O3 -mavx2 -S -fverbose-asm
/*
// 'Magnetica' partitioning, mainloop rev.7p [
.LBB181_7:                         ]<-----+
        movq    %rax, (%rsi)              |
        movq    %rsi, %rdi                |
        .p2align        4, 0x90           |
.LBB181_8:                        ]<--+   |
        addq    $8, %rcx              |   |
        .p2align        4, 0x90       |   |
.LBB181_9:                       ]<-+ |   |
        movq    -8(%rcx), %rbx      | |   |
        addq    $-8, %rcx           | |   |
        cmpq    %rbx, %rax          | |   |
        jb      .LBB181_9        ]--+ |   |
        cmpq    %rdi, %rcx            |   |
        je      .LBB181_13            |   |
        leaq    8(%rdi), %rsi         |   |
        movq    8(%rdi), %r14         |   |
        movq    %rbx, 8(%rdi)         |   |
        movq    %r14, (%rcx)          |   |
        movq    8(%rdi), %rbx         |   |
        movq    %rsi, %rdi            |   |
        cmpq    %rbx, %rax            |   |
        jbe     .LBB181_8         ]---+   |
        movq    %rbx, (%r11)              |
        addq    $8, %r11                  |
        jmp     .LBB181_7          ]------+
// 'Magnetica' partitioning, mainloop rev.7p ]
*/

// gcc_11.3.0 -S -O3 -mavx2 -m64 -static -fomit-frame-pointer
/*
// 'Magnetica' partitioning, mainloop rev.7p [
.L5597:                        ]<----------------------+<-+
        cmpq    %rcx, %r8                              |  |
        jb      .L5598          ]-------------------+  |  |
.L5609:                       ]<--+                 |  |  |
        leaq    8(%rax), %r11     |                 |  |  |
        cmpq    %rax, %rdx        |                 |  |  |
        je      .L5599            |                 |  |  |
        movq    8(%rax), %r10     |                 |  |  |
        movq    %rcx, 8(%rax)     |                 |  |  |
        movq    %r10, (%rdx)      |                 |  |  |
        movq    8(%rax), %rcx     |                 |  |  |
        cmpq    %r8, %rcx         |                 |  |  |
        jnb     .L5605            | ]---+           |  |  |
        movq    %rcx, (%r9)       |     |           |  |  |
        addq    $8, %r9           |     |           |  |  |
        movq    %r8, 8(%rax)      |     |           |  |  |
        movq    (%rdx), %rcx      |     |           |  |  |
        movq    %r11, %rax        |     |           |  |  |
        cmpq    %rcx, %r8         |     |           |  |  |
        jnb     .L5609        ]---+     |           |  |  |
.L5598:                                 | ]<--------+  |  |
        movq    -8(%rdx), %rcx          |              |  |
        subq    $8, %rdx                |              |  |
        jmp     .L5597                  | ]------------+  |
        .p2align 4,,10                  |                 |
        .p2align 3                      |                 |
.L5605:                        ]<-------+                 |
        movq    %r10, %rcx                                |
        movq    %r11, %rax                                |
        jmp     .L5597         ]--------------------------+
// 'Magnetica' partitioning, mainloop rev.7p ]
*/

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [Bug middle-end/106236] Not a bug, bad performance (with GCC 11.3.0 - O3) of a small etude in C
  2022-07-08 14:24 [Bug c/106236] New: Not a bug, bad performance (with GCC 11.3.0 - O3) of a small etude in C sanmayce at hotmail dot com
@ 2022-07-08 18:31 ` pinskia at gcc dot gnu.org
  0 siblings, 0 replies; 2+ messages in thread
From: pinskia at gcc dot gnu.org @ 2022-07-08 18:31 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106236

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Note the jumps are not the issue here.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-07-08 18:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-08 14:24 [Bug c/106236] New: Not a bug, bad performance (with GCC 11.3.0 - O3) of a small etude in C sanmayce at hotmail dot com
2022-07-08 18:31 ` [Bug middle-end/106236] " pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).