public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "hubicka at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/99408] s3251 benchmark of TSVC vectorized by clang runs about 7 times faster compared to gcc
Date: Wed, 16 Nov 2022 17:03:34 +0000	[thread overview]
Message-ID: <bug-99408-4-4xRP4o3D8p@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-99408-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99408

--- Comment #2 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
This also reproduces with zen4 and double.

jh@alberti:~/tsvc/bin> cat tt.c
typedef double real_t;
#define iterations 100000
#define LEN_1D 32000
#define LEN_2D 256
real_t a[LEN_1D],b[LEN_1D],c[LEN_1D],d[LEN_1D],e[LEN_1D];
void
main(void)
{
    for (int nl = 0; nl < iterations; nl++) {
        for (int i = 0; i < LEN_1D-1; i++){
            a[i+1] = b[i]+c[i];
            b[i]   = c[i]*e[i];
            d[i]   = a[i]*e[i];
        }
    }
}
jh@alberti:~/tsvc/bin> ~/trunk-install/bin/gcc -Ofast -march=native tt.c
jh@alberti:~/tsvc/bin> time ./a.out

real    0m3.590s
user    0m3.585s
sys     0m0.004s
jh@alberti:~/tsvc/bin> clang -Ofast -march=native tt.c
tt.c:6:1: warning: return type of 'main' is not 'int' [-Wmain-return-type]
void
^
tt.c:6:1: note: change return type to 'int'
void
^~~~
int
1 warning generated.
jh@alberti:~/tsvc/bin> time ./a.out

real    0m1.538s
user    0m1.538s
sys     0m0.000s

gcc generates:

       │ 60:   vmovapd    0x67e080(%rax),%zmm7                                  
  0.15 │       vmovapd    0x601080(%rax),%zmm2                                  
  1.07 │       add        $0x40,%rax                                            
       │       vaddpd     0x6bc840(%rax),%zmm7,%zmm0                            
  0.00 │       vmovupd    %zmm0,0x6fb048(%rax)                                  
 11.10 │       vmulpd     0x601040(%rax),%zmm7,%zmm0                            
  9.46 │       vmovapd    %zmm0,0x6bc840(%rax)                                  
  0.01 │       vmulpd     0x6fb040(%rax),%zmm2,%zmm0                            
 78.20 │       vmovapd    %zmm0,0x63f840(%rax)                                  
       │       cmp        $0x3e7c0,%rax                                         
       │     ↑ jne        60                                                    

clang generates:
       │       nop                                                              
       │ a0:   vmovupd       (%r9,%rdx,1),%zmm15                                
  0.46 │       vmovupd       (%r9,%rdi,1),%zmm19                                
  0.22 │       vmovupd       0x40(%r9,%rdx,1),%zmm16                            
  0.56 │       vmovupd       0x40(%r9,%rdi,1),%zmm22                            
  0.92 │       vmovupd       0x80(%r9,%rdx,1),%zmm17                            
  1.85 │       vmovupd       0x80(%r9,%rdi,1),%zmm21                            
  1.51 │       vaddpd        (%r9,%rcx,1),%zmm15,%zmm18                         
  0.84 │       vmulpd        %zmm15,%zmm19,%zmm15                               
  0.47 │       vmovupd       %zmm15,(%r9,%rcx,1)                                
  3.37 │       vaddpd        0x40(%r9,%rcx,1),%zmm16,%zmm15                     
  0.56 │       vmulpd        %zmm16,%zmm22,%zmm16                               
  0.69 │       vmovupd       %zmm16,0x40(%r9,%rcx,1)                            
  3.82 │       vmovupd       %zmm18,0x8(%r9,%rsi,1)                             
  3.27 │       vmovapd       %zmm15,%zmm20                                      
       │       vmovupd       %zmm15,0x48(%r9,%rsi,1)                            
  3.60 │       vpermt2pd     %zmm18,%zmm13,%zmm20                               
  0.47 │       vpermt2pd     %zmm14,%zmm13,%zmm18                               
  0.36 │       vmulpd        %zmm19,%zmm18,%zmm18                               
  1.07 │       vmulpd        %zmm22,%zmm20,%zmm14                               
  1.33 │       vmovupd       %zmm18,(%r9,%r8,1)                                 
  6.31 │       vmovupd       %zmm14,0x40(%r9,%r8,1)                             
  8.02 │       vaddpd        0x80(%r9,%rcx,1),%zmm17,%zmm14                     
  0.53 │       vmovapd       %zmm14,%zmm16                                      
  0.05 │       vmovupd       %zmm14,0x88(%r9,%rsi,1)                            
  3.08 │       vpermt2pd     %zmm15,%zmm13,%zmm16                               
  0.41 │       vmulpd        %zmm17,%zmm21,%zmm15                               
  0.20 │       vmovupd       %zmm15,0x80(%r9,%rcx,1)                            
  1.60 │       vmulpd        %zmm21,%zmm16,%zmm15                               
  1.16 │       vmovupd       %zmm15,0x80(%r9,%r8,1)                             
  3.13 │       add           $0xc0,%r9                                          
       │       cmp           $0x3e7c0,%r9                                       
  0.03 │     ↑ jne           a0       

So a forward-dependency here?

  parent reply	other threads:[~2022-11-16 17:03 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05 14:11 [Bug middle-end/99408] New: " hubicka at gcc dot gnu.org
2021-03-08  8:17 ` [Bug middle-end/99408] " rguenth at gcc dot gnu.org
2021-12-22 10:42 ` [Bug tree-optimization/99408] " pinskia at gcc dot gnu.org
2022-11-16 17:03 ` hubicka at gcc dot gnu.org [this message]
2023-01-11 19:03 ` hubicka at gcc dot gnu.org
2023-01-14 22:33 ` hubicka at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-99408-4-4xRP4o3D8p@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).