public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
@ 2021-07-30  7:08 Eugene Rozenfeld
  2021-07-30 16:00 ` Andi Kleen
  2021-08-02  9:56 ` Richard Biener
  0 siblings, 2 replies; 4+ messages in thread
From: Eugene Rozenfeld @ 2021-07-30  7:08 UTC (permalink / raw)
  To: gcc-patches, mliska, Andi Kleen

This patch has the following changes:

1. The main fix is in auto-profile.c: the histogram value for
   indirect calls was incorrectly set up. That is fixed now.

2. Several tests now have -fdump-ipa-afdo-optimized instead of -fdump-ipa-afdo
   in dg-options so that the expected output can be found.

3. I increased the number of iterations in several tests so that perf can have
   enough sampling events.

4. indir-call-prof-2.c has -fno-early-inlining but AutoFDO can't work without
   early inlining (it needs to match the inlining of the profiled binary).
   I changed profopt.exp to always pass -fearly-inlining for AutoFDO.
   With that the indirect call inlining in indir-call-prof-2.c happens in the early inliner
   so I changed the dg-final-use-autofdo.

5. create_gcov tool doesn't currently support dwarf 5 so I made a change in profopt.exp
   to pass -gdwarf-4 when compiling the binary to profile.

6. I updated the invocation of create_gcov in profopt.exp to pass -gcov_version=2.
   I recently made a change to create_gcov to support version 2:
   https://github.com/google/autofdo/pull/117

7. I removed useless -o perf.data from the invocation of gcc-auto-profile in
   target-supports.exp.

With these changes the tests checking indirect call inlining in gcc.dg and g++.dg
are passing.

gcc/ChangeLog:
        PR gcov-profile/71672
        * auto-profile.c (afdo_indirect_call): Fix the setup of the historgram value for indirect calls.

gcc/testsuite/ChangeLog:
        PR gcov-profile/71672
        * g++.dg/tree-prof/indir-call-prof.C: Fix options, increase the number of iterations.
        * g++.dg/tree-prof/morefunc.C: Fix options, increase the number of iterations.
        * g++.dg/tree-prof/reorder.C: Fix options, increase the number of iterations.
        * gcc.dg/tree-prof/indir-call-prof-2.c: Fix options, fix dg-final-use-autofdo, increase the number of iterations.
        * gcc.dg/tree-prof/indir-call-prof.c: Fix options.
        * lib/profopt.exp: Pass gdwarf-4 when compiling binary to profile; pass -fearly-inlining when compiling with AutoFDO; pass -gcov_version=2 to create_gcov.
        * lib/target-supports.exp: Remove unnecessary -o perf.data passed to gcc-auto-profile.
---
 gcc/auto-profile.c                                 | 13 +++++++++----
 gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C   |  4 ++--
 gcc/testsuite/g++.dg/tree-prof/morefunc.C          |  7 ++++---
 gcc/testsuite/g++.dg/tree-prof/reorder.C           |  6 +++---
 gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c |  8 ++++----
 gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c   |  2 +-
 gcc/testsuite/lib/profopt.exp                      |  6 +++---
 gcc/testsuite/lib/target-supports.exp              |  2 +-
 8 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index b23b82b2df4..4c1fc6b536b 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -1009,13 +1009,18 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,

   histogram_value hist = gimple_alloc_histogram_value (
       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
-  hist->n_counters = 3;
+  hist->n_counters = 4;
   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
   gimple_add_histogram_value (cfun, stmt, hist);

-  hist->hvalue.counters[0] = direct_call->profile_id;
-  hist->hvalue.counters[1] = max_iter->second;
-  hist->hvalue.counters[2] = total;
+  // Total counter
+  hist->hvalue.counters[0] = total;
+  // Number of value/counter pairs
+  hist->hvalue.counters[1] = 1;
+  // Value
+  hist->hvalue.counters[2] = direct_call->profile_id;
+  // Counter
+  hist->hvalue.counters[3] = max_iter->second;

   if (!transform)
     return;
diff --git a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
index 3374744613e..b45417106d0 100644
--- a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
+++ b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */

 struct A {
   A () {}
@@ -26,7 +26,7 @@ main (void)

   int i;

-  for (i = 0; i < 1000000; i++)
+  for (i = 0; i < 10000000; i++)
     {
       p = (A *)wrap ((void *)&a);
       p->AA ();
diff --git a/gcc/testsuite/g++.dg/tree-prof/morefunc.C b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
index 621d09aec5b..96e0073ca8f 100644
--- a/gcc/testsuite/g++.dg/tree-prof/morefunc.C
+++ b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
@@ -1,4 +1,5 @@
-/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
+/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
+
 #include "reorder_class1.h"
 #include "reorder_class2.h"

@@ -19,7 +20,7 @@ static __attribute__((always_inline))
 void test1 (A *tc)
 {
   int i;
-  for (i = 0; i < 1000; i++)
+  for (i = 0; i < 10000000; i++)
      g += tc->foo();
    if (g<100) g++;
 }
@@ -28,7 +29,7 @@ static __attribute__((always_inline))
 void test2 (B *tc)
 {
   int i;
-  for (i = 0; i < 1000000; i++)
+  for (i = 0; i < 10000000; i++)
      g += tc->foo();
 }

diff --git a/gcc/testsuite/g++.dg/tree-prof/reorder.C b/gcc/testsuite/g++.dg/tree-prof/reorder.C
index 000fb651a69..50490963369 100644
--- a/gcc/testsuite/g++.dg/tree-prof/reorder.C
+++ b/gcc/testsuite/g++.dg/tree-prof/reorder.C
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-coverage-mismatch -Wno-attributes" } */
+/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-coverage-mismatch -Wno-attributes" } */

 #ifdef _PROFILE_USE
 #include "reorder_class1.h"
@@ -13,7 +13,7 @@ static __attribute__((always_inline))
 void test1 (A *tc)
 {
   int i;
-  for (i = 0; i < 1000000; i++)
+  for (i = 0; i < 10000000; i++)
      g += tc->foo();
    if (g<100) g++;
 }
@@ -22,7 +22,7 @@ static __attribute__((always_inline))
 void test2 (B *tc)
 {
   int i;
-  for (i = 0; i < 1000000; i++)
+  for (i = 0; i < 10000000; i++)
      g += tc->foo();
 }

diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
index bbba0521018..594c3f34d57 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-tree-einline-optimized" } */
 volatile int one;
 static int
 add1 (int val)
@@ -22,7 +22,7 @@ int
 main (void)
 {
   int i, val = 0;
-  for (i = 0; i < 100000; i++)
+  for (i = 0; i < 10000000; i++)
     {
       val = do_op (val, add1);
       val = do_op (val, sub1);
@@ -31,5 +31,5 @@ main (void)
 }
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
 /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
-/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "afdo"} } */
-/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "afdo"} } */
+/* { dg-final-use-autofdo { scan-tree-dump "Inlining add1/1 into main/4." "einline"} } */
+/* { dg-final-use-autofdo { scan-tree-dump "Inlining sub1/2 into main/4." "einline"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
index 138b85a08d2..702045239f3 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */

 static int a1 (void)
 {
diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
index 9997eb3bb7e..25f45ecf2de 100644
--- a/gcc/testsuite/lib/profopt.exp
+++ b/gcc/testsuite/lib/profopt.exp
@@ -289,8 +289,8 @@ proc auto-profopt-execute { src } {
         return
     }
     set profile_wrapper [profopt-perf-wrapper]
-    set profile_option "-g -DFOR_AUTOFDO_TESTING"
-    set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING"
+    set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING"
+    set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining"
     set run_autofdo 1
     profopt-execute $src
     unset profile_wrapper
@@ -451,7 +451,7 @@ proc profopt-execute { src } {
            # convert profile
            if { $run_autofdo == 1 } {
                 set bprefix "afdo."
-               set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=1 --gcov=$tmpdir/$bprefix$base.$ext"
+               set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=2 --gcov=$tmpdir/$bprefix$base.$ext"
                verbose "Running $cmd"
                set id [remote_spawn "" $cmd]
                if { $id < 0 } {
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 789723fb287..ae22e103883 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -626,7 +626,7 @@ proc check_effective_target_keeps_null_pointer_checks { } {
 # this allows parallelism of 16 and higher of parallel gcc-auto-profile
 proc profopt-perf-wrapper { } {
     global srcdir
-    return "$srcdir/../config/i386/gcc-auto-profile -o perf.data -m8 "
+    return "$srcdir/../config/i386/gcc-auto-profile -m8 "
 }

 # Return true if profiling is supported on the target.
--
2.25.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-03 21:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-30  7:08 [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO Eugene Rozenfeld
2021-07-30 16:00 ` Andi Kleen
2021-08-02  9:56 ` Richard Biener
2021-08-03 21:41   ` [EXTERNAL] " Eugene Rozenfeld

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).