From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
 id 8A2C7385841B; Wed,  8 Dec 2021 18:24:42 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8A2C7385841B
From: "hubicka at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug ipa/103585] fatigue2 requires inlining of peridida to work well
Date: Wed, 08 Dec 2021 18:24:42 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: changed
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: ipa
X-Bugzilla-Version: 12.0
X-Bugzilla-Keywords: missed-optimization
X-Bugzilla-Severity: normal
X-Bugzilla-Who: hubicka at gcc dot gnu.org
X-Bugzilla-Status: NEW
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: attachments.created
Message-ID: <bug-103585-4-IOYmRC7HAB@http.gcc.gnu.org/bugzilla/>
In-Reply-To: <bug-103585-4@http.gcc.gnu.org/bugzilla/>
References: <bug-103585-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-BeenThere: gcc-bugs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-bugs mailing list <gcc-bugs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Wed, 08 Dec 2021 18:24:42 -0000

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D103585
--- Comment #7 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
Created attachment 51952
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=3D51952&action=3Dedit
Patch to teach modref about global memory

This patch extends modref so we eliminate the dead part of array descriptor=
s.
This is done by making difference between accesses to global memory and to
unknown memory accesses.  The first is less evil since it can not access th=
ings
that does not escape and it is possible to arrange unanalyzed functions to
access only global memory (since everything reaching them escapes).

With the patch I now get
 Performance counter stats for './a.out-badmod':

         27,448.82 msec task-clock                #    0.999 CPUs utilized=
=20=20=20=20=20=20
               334      context-switches          #   12.168 /sec=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20=20
                10      cpu-migrations            #    0.364 /sec=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20=20
                70      page-faults               #    2.550 /sec=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20=20
   109,079,862,444      cycles                    #    3.974 GHz=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20=20=20
       130,234,675      stalled-cycles-frontend   #    0.12% frontend cycles
idle=20=20=20
    80,559,978,185      stalled-cycles-backend    #   73.85% backend cycles
idle=20=20=20=20
   230,337,342,608      instructions              #    2.11  insn per cycle=
=20=20=20=20=20
                                                  #    0.35  stalled cycles=
 per
insn
     9,808,852,327      branches                  #  357.351 M/sec=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20
        24,742,926      branch-misses             #    0.25% of all branche=
s=20=20=20=20

      27.468971377 seconds time elapsed

      27.445934000 seconds user
       0.003999000 seconds sys


so 56% slowdown is now only 22% which is quite nice improvement.
The call of perdida now looks as follows:
  <bb 70> [local count: 109362591]:
  _818 =3D (unsigned long) _44;
  _817 =3D _818 + ivtmp.1050_1737;
  _246 =3D (real(kind=3D8) *) _817;
  parm.326.data =3D _246;
  _811 =3D (unsigned long) _33;
  _810 =3D _811 + ivtmp.1050_1737;
  _247 =3D (real(kind=3D8) *) _810;
  parm.327.data =3D _247;
  _712 =3D (unsigned long) _56;
  _711 =3D _712 + ivtmp.1050_1737;
  _248 =3D (real(kind=3D8) *) _711;
  parm.328.data =3D _248;
  _612 =3D (unsigned long) _20;
  _574 =3D _612 + ivtmp.1050_1737;
  _249 =3D (real(kind=3D8) *) _574;
  parm.329.data =3D _249;
  _573 =3D (unsigned long) _67;
  _572 =3D _573 + ivtmp.1050_1737;
  _250 =3D (real(kind=3D8) *) _572;
  parm.330.data =3D _250;
  _251 =3D (real(kind=3D8) *) ivtmp.1049_1740;
  _252 =3D (real(kind=3D8) *) ivtmp.1048_1753;
  _253 =3D (real(kind=3D8) *) ivtmp.1047_1762;
  perdida.constprop (&dt, &lambda, &mu, &yield_stress, &r_infinity, &b,
&x_infinity, &gamma, &eta, &plastic_strain_threshold, &parm.326, &parm.327,
&parm.328, &parm.329, _253, &parm.330, _252, _251, &failure_threshold,
&crack_closure_parameter);
  parm.326 =3D{v} {CLOBBER};
  parm.327 =3D{v} {CLOBBER};
  parm.328 =3D{v} {CLOBBER};
  parm.329 =3D{v} {CLOBBER};
  parm.330 =3D{v} {CLOBBER};

and compiles to:
        movq    48(%rsp), %rax
        movq    %r10, 24(%rsp)
        addq    %rbp, %rax
        movq    %rax, 720(%rsp)
        movq    32(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 816(%rsp)
        movq    64(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 912(%rsp)
        movq    40(%rsp), %rax
        addq    %rbp, %rax
        movq    %rax, 1008(%rsp)
        movq    72(%rsp), %rax
        addq    %rbp, %rax
        addq    $72, %rbp
        movq    %rax, 1104(%rsp)
        leaq    352(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1720
        leaq    384(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1728
        pushq   %r13
        .cfi_def_cfa_offset 1736
        addq    $8, %r13
        pushq   %r12
        .cfi_def_cfa_offset 1744
        addq    $8, %r12
        leaq    1136(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1752
        pushq   %r14
        .cfi_def_cfa_offset 1760
        addq    $8, %r14
        leaq    1056(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1768
        leaq    968(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1776
        leaq    880(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1784
        leaq    792(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1792
        leaq    488(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1800
        leaq    456(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1808
        leaq    480(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1816
        leaq    568(%rsp), %rax
        pushq   %rax
        .cfi_def_cfa_offset 1824
        leaq    440(%rsp), %r9
        leaq    528(%rsp), %r8
        leaq    584(%rsp), %rcx
        leaq    512(%rsp), %rdx
        leaq    504(%rsp), %rsi
        leaq    472(%rsp), %rdi
        call    __perdida_m_MOD_perdida.constprop.0=