public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "hubicka at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug ipa/103585] fatigue2 requires inlining of peridida to work well
Date: Wed, 08 Dec 2021 18:24:42 +0000	[thread overview]
Message-ID: <bug-103585-4-IOYmRC7HAB@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-103585-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103585

--- Comment #7 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
Created attachment 51952
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51952&action=edit
Patch to teach modref about global memory

This patch extends modref so we eliminate the dead part of array descriptors.
This is done by making difference between accesses to global memory and to
unknown memory accesses.  The first is less evil since it can not access things
that does not escape and it is possible to arrange unanalyzed functions to
access only global memory (since everything reaching them escapes).

With the patch I now get
 Performance counter stats for './a.out-badmod':

         27,448.82 msec task-clock                #    0.999 CPUs utilized      
               334      context-switches          #   12.168 /sec               
                10      cpu-migrations            #    0.364 /sec               
                70      page-faults               #    2.550 /sec               
   109,079,862,444      cycles                    #    3.974 GHz                
       130,234,675      stalled-cycles-frontend   #    0.12% frontend cycles
idle   
    80,559,978,185      stalled-cycles-backend    #   73.85% backend cycles
idle    
   230,337,342,608      instructions              #    2.11  insn per cycle     
                                                  #    0.35  stalled cycles per
insn
     9,808,852,327      branches                  #  357.351 M/sec              
        24,742,926      branch-misses             #    0.25% of all branches    

      27.468971377 seconds time elapsed

      27.445934000 seconds user
       0.003999000 seconds sys


so 56% slowdown is now only 22% which is quite nice improvement.
The call of perdida now looks as follows:
  <bb 70> [local count: 109362591]:
  _818 = (unsigned long) _44;
  _817 = _818 + ivtmp.1050_1737;
  _246 = (real(kind=8) *) _817;
  parm.326.data = _246;
  _811 = (unsigned long) _33;
  _810 = _811 + ivtmp.1050_1737;
  _247 = (real(kind=8) *) _810;
  parm.327.data = _247;
  _712 = (unsigned long) _56;
  _711 = _712 + ivtmp.1050_1737;
  _248 = (real(kind=8) *) _711;
  parm.328.data = _248;
  _612 = (unsigned long) _20;
  _574 = _612 + ivtmp.1050_1737;
  _249 = (real(kind=8) *) _574;
  parm.329.data = _249;
  _573 = (unsigned long) _67;
  _572 = _573 + ivtmp.1050_1737;
  _250 = (real(kind=8) *) _572;
  parm.330.data = _250;
  _251 = (real(kind=8) *) ivtmp.1049_1740;
  _252 = (real(kind=8) *) ivtmp.1048_1753;
  _253 = (real(kind=8) *) ivtmp.1047_1762;
  perdida.constprop (&dt, &lambda, &mu, &yield_stress, &r_infinity, &b,
&x_infinity, &gamma, &eta, &plastic_strain_threshold, &parm.326, &parm.327,
&parm.328, &parm.329, _253, &parm.330, _252, _251, &failure_threshold,
&crack_closure_parameter);
  parm.326 ={v} {CLOBBER};
  parm.327 ={v} {CLOBBER};
  parm.328 ={v} {CLOBBER};
  parm.329 ={v} {CLOBBER};
  parm.330 ={v} {CLOBBER};

and compiles to:
        movq    48(%rsp), %rax
        movq    %r10, 24(%rsp)
        addq    %rbp, %rax
        movq    %rax, 720(%rsp)
        movq    32(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 816(%rsp)
        movq    64(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 912(%rsp)
        movq    40(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 1008(%rsp)
        movq    72(%rsp), %rax
        addq    %rbp, %rax
        addq    $72, %rbp
        movq    %rax, 1104(%rsp)
        leaq    352(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1720
        leaq    384(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1728
        pushq   %r13
        .cfi_def_cfa_offset 1736
        addq    $8, %r13
        pushq   %r12
        .cfi_def_cfa_offset 1744
        addq    $8, %r12
        leaq    1136(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1752
        pushq   %r14
        .cfi_def_cfa_offset 1760
        addq    $8, %r14
        leaq    1056(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1768
        leaq    968(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1776
        leaq    880(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1784
        leaq    792(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1792
        leaq    488(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1800
        leaq    456(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1808
        leaq    480(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1816
        leaq    568(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1824
        leaq    440(%rsp), %r9
        leaq    528(%rsp), %r8
        leaq    584(%rsp), %rcx
        leaq    512(%rsp), %rdx
        leaq    504(%rsp), %rsi
        leaq    472(%rsp), %rdi
        call    __perdida_m_MOD_perdida.constprop.0

  parent reply	other threads:[~2021-12-08 18:24 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-06 16:59 [Bug tree-optimization/103585] New: " hubicka at gcc dot gnu.org
2021-12-06 17:15 ` [Bug tree-optimization/103585] " hubicka at gcc dot gnu.org
2021-12-07  9:31 ` [Bug ipa/103585] " hubicka at gcc dot gnu.org
2021-12-07  9:49 ` hubicka at gcc dot gnu.org
2021-12-07 10:10 ` hubicka at gcc dot gnu.org
2021-12-07 10:17 ` hubicka at gcc dot gnu.org
2021-12-07 10:24 ` marxin at gcc dot gnu.org
2021-12-08 18:24 ` hubicka at gcc dot gnu.org [this message]
2021-12-08 18:28 ` hubicka at gcc dot gnu.org
2021-12-08 18:43 ` hubicka at gcc dot gnu.org
2021-12-12  8:43 ` hubicka at gcc dot gnu.org
2021-12-14 15:51 ` cvs-commit at gcc dot gnu.org
2022-01-31 10:33 ` hubicka at gcc dot gnu.org
2022-12-14  0:05 ` cvs-commit at gcc dot gnu.org
2023-01-19 13:21 ` jamborm at gcc dot gnu.org
2023-01-29  2:23 ` hubicka at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-103585-4-IOYmRC7HAB@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).