* [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
@ 2021-07-30 7:08 Eugene Rozenfeld
2021-07-30 16:00 ` Andi Kleen
2021-08-02 9:56 ` Richard Biener
0 siblings, 2 replies; 4+ messages in thread
From: Eugene Rozenfeld @ 2021-07-30 7:08 UTC (permalink / raw)
To: gcc-patches, mliska, Andi Kleen
This patch has the following changes:
1. The main fix is in auto-profile.c: the histogram value for
indirect calls was incorrectly set up. That is fixed now.
2. Several tests now have -fdump-ipa-afdo-optimized instead of -fdump-ipa-afdo
in dg-options so that the expected output can be found.
3. I increased the number of iterations in several tests so that perf can have
enough sampling events.
4. indir-call-prof-2.c has -fno-early-inlining but AutoFDO can't work without
early inlining (it needs to match the inlining of the profiled binary).
I changed profopt.exp to always pass -fearly-inlining for AutoFDO.
With that the indirect call inlining in indir-call-prof-2.c happens in the early inliner
so I changed the dg-final-use-autofdo.
5. create_gcov tool doesn't currently support dwarf 5 so I made a change in profopt.exp
to pass -gdwarf-4 when compiling the binary to profile.
6. I updated the invocation of create_gcov in profopt.exp to pass -gcov_version=2.
I recently made a change to create_gcov to support version 2:
https://github.com/google/autofdo/pull/117
7. I removed useless -o perf.data from the invocation of gcc-auto-profile in
target-supports.exp.
With these changes the tests checking indirect call inlining in gcc.dg and g++.dg
are passing.
gcc/ChangeLog:
PR gcov-profile/71672
* auto-profile.c (afdo_indirect_call): Fix the setup of the historgram value for indirect calls.
gcc/testsuite/ChangeLog:
PR gcov-profile/71672
* g++.dg/tree-prof/indir-call-prof.C: Fix options, increase the number of iterations.
* g++.dg/tree-prof/morefunc.C: Fix options, increase the number of iterations.
* g++.dg/tree-prof/reorder.C: Fix options, increase the number of iterations.
* gcc.dg/tree-prof/indir-call-prof-2.c: Fix options, fix dg-final-use-autofdo, increase the number of iterations.
* gcc.dg/tree-prof/indir-call-prof.c: Fix options.
* lib/profopt.exp: Pass gdwarf-4 when compiling binary to profile; pass -fearly-inlining when compiling with AutoFDO; pass -gcov_version=2 to create_gcov.
* lib/target-supports.exp: Remove unnecessary -o perf.data passed to gcc-auto-profile.
---
gcc/auto-profile.c | 13 +++++++++----
gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C | 4 ++--
gcc/testsuite/g++.dg/tree-prof/morefunc.C | 7 ++++---
gcc/testsuite/g++.dg/tree-prof/reorder.C | 6 +++---
gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c | 8 ++++----
gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c | 2 +-
gcc/testsuite/lib/profopt.exp | 6 +++---
gcc/testsuite/lib/target-supports.exp | 2 +-
8 files changed, 27 insertions(+), 21 deletions(-)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index b23b82b2df4..4c1fc6b536b 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -1009,13 +1009,18 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
histogram_value hist = gimple_alloc_histogram_value (
cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
- hist->n_counters = 3;
+ hist->n_counters = 4;
hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
gimple_add_histogram_value (cfun, stmt, hist);
- hist->hvalue.counters[0] = direct_call->profile_id;
- hist->hvalue.counters[1] = max_iter->second;
- hist->hvalue.counters[2] = total;
+ // Total counter
+ hist->hvalue.counters[0] = total;
+ // Number of value/counter pairs
+ hist->hvalue.counters[1] = 1;
+ // Value
+ hist->hvalue.counters[2] = direct_call->profile_id;
+ // Counter
+ hist->hvalue.counters[3] = max_iter->second;
if (!transform)
return;
diff --git a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
index 3374744613e..b45417106d0 100644
--- a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
+++ b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
struct A {
A () {}
@@ -26,7 +26,7 @@ main (void)
int i;
- for (i = 0; i < 1000000; i++)
+ for (i = 0; i < 10000000; i++)
{
p = (A *)wrap ((void *)&a);
p->AA ();
diff --git a/gcc/testsuite/g++.dg/tree-prof/morefunc.C b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
index 621d09aec5b..96e0073ca8f 100644
--- a/gcc/testsuite/g++.dg/tree-prof/morefunc.C
+++ b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
@@ -1,4 +1,5 @@
-/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
+/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
+
#include "reorder_class1.h"
#include "reorder_class2.h"
@@ -19,7 +20,7 @@ static __attribute__((always_inline))
void test1 (A *tc)
{
int i;
- for (i = 0; i < 1000; i++)
+ for (i = 0; i < 10000000; i++)
g += tc->foo();
if (g<100) g++;
}
@@ -28,7 +29,7 @@ static __attribute__((always_inline))
void test2 (B *tc)
{
int i;
- for (i = 0; i < 1000000; i++)
+ for (i = 0; i < 10000000; i++)
g += tc->foo();
}
diff --git a/gcc/testsuite/g++.dg/tree-prof/reorder.C b/gcc/testsuite/g++.dg/tree-prof/reorder.C
index 000fb651a69..50490963369 100644
--- a/gcc/testsuite/g++.dg/tree-prof/reorder.C
+++ b/gcc/testsuite/g++.dg/tree-prof/reorder.C
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-coverage-mismatch -Wno-attributes" } */
+/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-coverage-mismatch -Wno-attributes" } */
#ifdef _PROFILE_USE
#include "reorder_class1.h"
@@ -13,7 +13,7 @@ static __attribute__((always_inline))
void test1 (A *tc)
{
int i;
- for (i = 0; i < 1000000; i++)
+ for (i = 0; i < 10000000; i++)
g += tc->foo();
if (g<100) g++;
}
@@ -22,7 +22,7 @@ static __attribute__((always_inline))
void test2 (B *tc)
{
int i;
- for (i = 0; i < 1000000; i++)
+ for (i = 0; i < 10000000; i++)
g += tc->foo();
}
diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
index bbba0521018..594c3f34d57 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-tree-einline-optimized" } */
volatile int one;
static int
add1 (int val)
@@ -22,7 +22,7 @@ int
main (void)
{
int i, val = 0;
- for (i = 0; i < 100000; i++)
+ for (i = 0; i < 10000000; i++)
{
val = do_op (val, add1);
val = do_op (val, sub1);
@@ -31,5 +31,5 @@ main (void)
}
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
-/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "afdo"} } */
-/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "afdo"} } */
+/* { dg-final-use-autofdo { scan-tree-dump "Inlining add1/1 into main/4." "einline"} } */
+/* { dg-final-use-autofdo { scan-tree-dump "Inlining sub1/2 into main/4." "einline"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
index 138b85a08d2..702045239f3 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
static int a1 (void)
{
diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
index 9997eb3bb7e..25f45ecf2de 100644
--- a/gcc/testsuite/lib/profopt.exp
+++ b/gcc/testsuite/lib/profopt.exp
@@ -289,8 +289,8 @@ proc auto-profopt-execute { src } {
return
}
set profile_wrapper [profopt-perf-wrapper]
- set profile_option "-g -DFOR_AUTOFDO_TESTING"
- set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING"
+ set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING"
+ set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining"
set run_autofdo 1
profopt-execute $src
unset profile_wrapper
@@ -451,7 +451,7 @@ proc profopt-execute { src } {
# convert profile
if { $run_autofdo == 1 } {
set bprefix "afdo."
- set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=1 --gcov=$tmpdir/$bprefix$base.$ext"
+ set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=2 --gcov=$tmpdir/$bprefix$base.$ext"
verbose "Running $cmd"
set id [remote_spawn "" $cmd]
if { $id < 0 } {
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 789723fb287..ae22e103883 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -626,7 +626,7 @@ proc check_effective_target_keeps_null_pointer_checks { } {
# this allows parallelism of 16 and higher of parallel gcc-auto-profile
proc profopt-perf-wrapper { } {
global srcdir
- return "$srcdir/../config/i386/gcc-auto-profile -o perf.data -m8 "
+ return "$srcdir/../config/i386/gcc-auto-profile -m8 "
}
# Return true if profiling is supported on the target.
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
2021-07-30 7:08 [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO Eugene Rozenfeld
@ 2021-07-30 16:00 ` Andi Kleen
2021-08-02 9:56 ` Richard Biener
1 sibling, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2021-07-30 16:00 UTC (permalink / raw)
To: Eugene Rozenfeld, gcc-patches, mliska
On 7/30/2021 12:08 AM, Eugene Rozenfeld wrote:
> This patch has the following changes:
Great thanks. Thanks for working on this. Looks all good to me (except I
guess the patches could be split up for commit)
-Andi
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
2021-07-30 7:08 [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO Eugene Rozenfeld
2021-07-30 16:00 ` Andi Kleen
@ 2021-08-02 9:56 ` Richard Biener
2021-08-03 21:41 ` [EXTERNAL] " Eugene Rozenfeld
1 sibling, 1 reply; 4+ messages in thread
From: Richard Biener @ 2021-08-02 9:56 UTC (permalink / raw)
To: Eugene Rozenfeld; +Cc: gcc-patches, mliska, Andi Kleen
On Fri, Jul 30, 2021 at 9:09 AM Eugene Rozenfeld via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch has the following changes:
>
> 1. The main fix is in auto-profile.c: the histogram value for
> indirect calls was incorrectly set up. That is fixed now.
>
> 2. Several tests now have -fdump-ipa-afdo-optimized instead of -fdump-ipa-afdo
> in dg-options so that the expected output can be found.
>
> 3. I increased the number of iterations in several tests so that perf can have
> enough sampling events.
>
> 4. indir-call-prof-2.c has -fno-early-inlining but AutoFDO can't work without
> early inlining (it needs to match the inlining of the profiled binary).
> I changed profopt.exp to always pass -fearly-inlining for AutoFDO.
> With that the indirect call inlining in indir-call-prof-2.c happens in the early inliner
> so I changed the dg-final-use-autofdo.
>
> 5. create_gcov tool doesn't currently support dwarf 5 so I made a change in profopt.exp
> to pass -gdwarf-4 when compiling the binary to profile.
>
> 6. I updated the invocation of create_gcov in profopt.exp to pass -gcov_version=2.
> I recently made a change to create_gcov to support version 2:
> https://github.com/google/autofdo/pull/117
>
> 7. I removed useless -o perf.data from the invocation of gcc-auto-profile in
> target-supports.exp.
>
> With these changes the tests checking indirect call inlining in gcc.dg and g++.dg
> are passing.
OK.
Thanks,
Richard.
> gcc/ChangeLog:
> PR gcov-profile/71672
> * auto-profile.c (afdo_indirect_call): Fix the setup of the historgram value for indirect calls.
>
> gcc/testsuite/ChangeLog:
> PR gcov-profile/71672
> * g++.dg/tree-prof/indir-call-prof.C: Fix options, increase the number of iterations.
> * g++.dg/tree-prof/morefunc.C: Fix options, increase the number of iterations.
> * g++.dg/tree-prof/reorder.C: Fix options, increase the number of iterations.
> * gcc.dg/tree-prof/indir-call-prof-2.c: Fix options, fix dg-final-use-autofdo, increase the number of iterations.
> * gcc.dg/tree-prof/indir-call-prof.c: Fix options.
> * lib/profopt.exp: Pass gdwarf-4 when compiling binary to profile; pass -fearly-inlining when compiling with AutoFDO; pass -gcov_version=2 to create_gcov.
> * lib/target-supports.exp: Remove unnecessary -o perf.data passed to gcc-auto-profile.
> ---
> gcc/auto-profile.c | 13 +++++++++----
> gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C | 4 ++--
> gcc/testsuite/g++.dg/tree-prof/morefunc.C | 7 ++++---
> gcc/testsuite/g++.dg/tree-prof/reorder.C | 6 +++---
> gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c | 8 ++++----
> gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c | 2 +-
> gcc/testsuite/lib/profopt.exp | 6 +++---
> gcc/testsuite/lib/target-supports.exp | 2 +-
> 8 files changed, 27 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
> index b23b82b2df4..4c1fc6b536b 100644
> --- a/gcc/auto-profile.c
> +++ b/gcc/auto-profile.c
> @@ -1009,13 +1009,18 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
>
> histogram_value hist = gimple_alloc_histogram_value (
> cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
> - hist->n_counters = 3;
> + hist->n_counters = 4;
> hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
> gimple_add_histogram_value (cfun, stmt, hist);
>
> - hist->hvalue.counters[0] = direct_call->profile_id;
> - hist->hvalue.counters[1] = max_iter->second;
> - hist->hvalue.counters[2] = total;
> + // Total counter
> + hist->hvalue.counters[0] = total;
> + // Number of value/counter pairs
> + hist->hvalue.counters[1] = 1;
> + // Value
> + hist->hvalue.counters[2] = direct_call->profile_id;
> + // Counter
> + hist->hvalue.counters[3] = max_iter->second;
>
> if (!transform)
> return;
> diff --git a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> index 3374744613e..b45417106d0 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
>
> struct A {
> A () {}
> @@ -26,7 +26,7 @@ main (void)
>
> int i;
>
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> {
> p = (A *)wrap ((void *)&a);
> p->AA ();
> diff --git a/gcc/testsuite/g++.dg/tree-prof/morefunc.C b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> index 621d09aec5b..96e0073ca8f 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> @@ -1,4 +1,5 @@
> -/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
> +/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-attributes -Wno-coverage-mismatch -Wno-missing-profile" } */
> +
> #include "reorder_class1.h"
> #include "reorder_class2.h"
>
> @@ -19,7 +20,7 @@ static __attribute__((always_inline))
> void test1 (A *tc)
> {
> int i;
> - for (i = 0; i < 1000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> if (g<100) g++;
> }
> @@ -28,7 +29,7 @@ static __attribute__((always_inline))
> void test2 (B *tc)
> {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> }
>
> diff --git a/gcc/testsuite/g++.dg/tree-prof/reorder.C b/gcc/testsuite/g++.dg/tree-prof/reorder.C
> index 000fb651a69..50490963369 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/reorder.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/reorder.C
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo -Wno-coverage-mismatch -Wno-attributes" } */
> +/* { dg-options "-O2 -fno-devirtualize --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized -Wno-coverage-mismatch -Wno-attributes" } */
>
> #ifdef _PROFILE_USE
> #include "reorder_class1.h"
> @@ -13,7 +13,7 @@ static __attribute__((always_inline))
> void test1 (A *tc)
> {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> if (g<100) g++;
> }
> @@ -22,7 +22,7 @@ static __attribute__((always_inline))
> void test2 (B *tc)
> {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> }
>
> diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> index bbba0521018..594c3f34d57 100644
> --- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized -fdump-tree-einline-optimized" } */
> volatile int one;
> static int
> add1 (int val)
> @@ -22,7 +22,7 @@ int
> main (void)
> {
> int i, val = 0;
> - for (i = 0; i < 100000; i++)
> + for (i = 0; i < 10000000; i++)
> {
> val = do_op (val, add1);
> val = do_op (val, sub1);
> @@ -31,5 +31,5 @@ main (void)
> }
> /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
> /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
> -/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "afdo"} } */
> -/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "afdo"} } */
> +/* { dg-final-use-autofdo { scan-tree-dump "Inlining add1/1 into main/4." "einline"} } */
> +/* { dg-final-use-autofdo { scan-tree-dump "Inlining sub1/2 into main/4." "einline"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> index 138b85a08d2..702045239f3 100644
> --- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> +++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
>
> static int a1 (void)
> {
> diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp
> index 9997eb3bb7e..25f45ecf2de 100644
> --- a/gcc/testsuite/lib/profopt.exp
> +++ b/gcc/testsuite/lib/profopt.exp
> @@ -289,8 +289,8 @@ proc auto-profopt-execute { src } {
> return
> }
> set profile_wrapper [profopt-perf-wrapper]
> - set profile_option "-g -DFOR_AUTOFDO_TESTING"
> - set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING"
> + set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING"
> + set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining"
> set run_autofdo 1
> profopt-execute $src
> unset profile_wrapper
> @@ -451,7 +451,7 @@ proc profopt-execute { src } {
> # convert profile
> if { $run_autofdo == 1 } {
> set bprefix "afdo."
> - set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=1 --gcov=$tmpdir/$bprefix$base.$ext"
> + set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=2 --gcov=$tmpdir/$bprefix$base.$ext"
> verbose "Running $cmd"
> set id [remote_spawn "" $cmd]
> if { $id < 0 } {
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 789723fb287..ae22e103883 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -626,7 +626,7 @@ proc check_effective_target_keeps_null_pointer_checks { } {
> # this allows parallelism of 16 and higher of parallel gcc-auto-profile
> proc profopt-perf-wrapper { } {
> global srcdir
> - return "$srcdir/../config/i386/gcc-auto-profile -o perf.data -m8 "
> + return "$srcdir/../config/i386/gcc-auto-profile -m8 "
> }
>
> # Return true if profiling is supported on the target.
> --
> 2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [EXTERNAL] Re: [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
2021-08-02 9:56 ` Richard Biener
@ 2021-08-03 21:41 ` Eugene Rozenfeld
0 siblings, 0 replies; 4+ messages in thread
From: Eugene Rozenfeld @ 2021-08-03 21:41 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, mliska, Andi Kleen
Thank you for the reviews, Andy and Richard.
I split up the patch into 4 commits and pushed to trunk.
Eugene
-----Original Message-----
From: Richard Biener <richard.guenther@gmail.com>
Sent: Monday, August 2, 2021 2:57 AM
To: Eugene Rozenfeld <Eugene.Rozenfeld@microsoft.com>
Cc: gcc-patches@gcc.gnu.org; mliska@suse.cz; Andi Kleen <ak@linux.intel.com>
Subject: [EXTERNAL] Re: [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO
On Fri, Jul 30, 2021 at 9:09 AM Eugene Rozenfeld via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>
> This patch has the following changes:
>
> 1. The main fix is in auto-profile.c: the histogram value for
> indirect calls was incorrectly set up. That is fixed now.
>
> 2. Several tests now have -fdump-ipa-afdo-optimized instead of -fdump-ipa-afdo
> in dg-options so that the expected output can be found.
>
> 3. I increased the number of iterations in several tests so that perf can have
> enough sampling events.
>
> 4. indir-call-prof-2.c has -fno-early-inlining but AutoFDO can't work without
> early inlining (it needs to match the inlining of the profiled binary).
> I changed profopt.exp to always pass -fearly-inlining for AutoFDO.
> With that the indirect call inlining in indir-call-prof-2.c happens in the early inliner
> so I changed the dg-final-use-autofdo.
>
> 5. create_gcov tool doesn't currently support dwarf 5 so I made a change in profopt.exp
> to pass -gdwarf-4 when compiling the binary to profile.
>
> 6. I updated the invocation of create_gcov in profopt.exp to pass -gcov_version=2.
> I recently made a change to create_gcov to support version 2:
>
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgith
> ub.com%2Fgoogle%2Fautofdo%2Fpull%2F117&data=04%7C01%7CEugene.Rozen
> feld%40microsoft.com%7C92927d4029754d0d6b4708d9559be06d%7C72f988bf86f1
> 41af91ab2d7cd011db47%7C1%7C0%7C637634950245832767%7CUnknown%7CTWFpbGZs
> b3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D
> %7C1000&sdata=Ex1OpS0gt9dpsBVIK71k7hvjJbfIkN%2BlRr%2BYD86%2FqEs%3D
> &reserved=0
>
> 7. I removed useless -o perf.data from the invocation of gcc-auto-profile in
> target-supports.exp.
>
> With these changes the tests checking indirect call inlining in gcc.dg
> and g++.dg are passing.
OK.
Thanks,
Richard.
> gcc/ChangeLog:
> PR gcov-profile/71672
> * auto-profile.c (afdo_indirect_call): Fix the setup of the historgram value for indirect calls.
>
> gcc/testsuite/ChangeLog:
> PR gcov-profile/71672
> * g++.dg/tree-prof/indir-call-prof.C: Fix options, increase the number of iterations.
> * g++.dg/tree-prof/morefunc.C: Fix options, increase the number of iterations.
> * g++.dg/tree-prof/reorder.C: Fix options, increase the number of iterations.
> * gcc.dg/tree-prof/indir-call-prof-2.c: Fix options, fix dg-final-use-autofdo, increase the number of iterations.
> * gcc.dg/tree-prof/indir-call-prof.c: Fix options.
> * lib/profopt.exp: Pass gdwarf-4 when compiling binary to profile; pass -fearly-inlining when compiling with AutoFDO; pass -gcov_version=2 to create_gcov.
> * lib/target-supports.exp: Remove unnecessary -o perf.data passed to gcc-auto-profile.
> ---
> gcc/auto-profile.c | 13 +++++++++----
> gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C | 4 ++--
> gcc/testsuite/g++.dg/tree-prof/morefunc.C | 7 ++++---
> gcc/testsuite/g++.dg/tree-prof/reorder.C | 6 +++---
> gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c | 8 ++++----
> gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c | 2 +-
> gcc/testsuite/lib/profopt.exp | 6 +++---
> gcc/testsuite/lib/target-supports.exp | 2 +-
> 8 files changed, 27 insertions(+), 21 deletions(-)
>
> diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c index
> b23b82b2df4..4c1fc6b536b 100644
> --- a/gcc/auto-profile.c
> +++ b/gcc/auto-profile.c
> @@ -1009,13 +1009,18 @@ afdo_indirect_call (gimple_stmt_iterator *gsi,
> const icall_target_map &map,
>
> histogram_value hist = gimple_alloc_histogram_value (
> cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
> - hist->n_counters = 3;
> + hist->n_counters = 4;
> hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
> gimple_add_histogram_value (cfun, stmt, hist);
>
> - hist->hvalue.counters[0] = direct_call->profile_id;
> - hist->hvalue.counters[1] = max_iter->second;
> - hist->hvalue.counters[2] = total;
> + // Total counter
> + hist->hvalue.counters[0] = total;
> + // Number of value/counter pairs
> + hist->hvalue.counters[1] = 1;
> + // Value
> + hist->hvalue.counters[2] = direct_call->profile_id; // Counter
> + hist->hvalue.counters[3] = max_iter->second;
>
> if (!transform)
> return;
> diff --git a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> index 3374744613e..b45417106d0 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/indir-call-prof.C
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fdump-tree-optimized
> -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fdump-tree-optimized
> +-fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
>
> struct A {
> A () {}
> @@ -26,7 +26,7 @@ main (void)
>
> int i;
>
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> {
> p = (A *)wrap ((void *)&a);
> p->AA ();
> diff --git a/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> index 621d09aec5b..96e0073ca8f 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/morefunc.C
> @@ -1,4 +1,5 @@
> -/* { dg-options "-O2 -fno-devirtualize
> --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized
> -fdump-ipa-afdo -Wno-attributes -Wno-coverage-mismatch
> -Wno-missing-profile" } */
> +/* { dg-options "-O2 -fno-devirtualize
> +--param=profile-func-internal-id=0 -fdump-ipa-profile-optimized
> +-fdump-ipa-afdo-optimized -Wno-attributes -Wno-coverage-mismatch
> +-Wno-missing-profile" } */
> +
> #include "reorder_class1.h"
> #include "reorder_class2.h"
>
> @@ -19,7 +20,7 @@ static __attribute__((always_inline)) void test1 (A
> *tc) {
> int i;
> - for (i = 0; i < 1000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> if (g<100) g++;
> }
> @@ -28,7 +29,7 @@ static __attribute__((always_inline)) void test2 (B
> *tc) {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> }
>
> diff --git a/gcc/testsuite/g++.dg/tree-prof/reorder.C
> b/gcc/testsuite/g++.dg/tree-prof/reorder.C
> index 000fb651a69..50490963369 100644
> --- a/gcc/testsuite/g++.dg/tree-prof/reorder.C
> +++ b/gcc/testsuite/g++.dg/tree-prof/reorder.C
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fno-devirtualize
> --param=profile-func-internal-id=0 -fdump-ipa-profile-optimized
> -fdump-ipa-afdo -Wno-coverage-mismatch -Wno-attributes" } */
> +/* { dg-options "-O2 -fno-devirtualize
> +--param=profile-func-internal-id=0 -fdump-ipa-profile-optimized
> +-fdump-ipa-afdo-optimized -Wno-coverage-mismatch -Wno-attributes" }
> +*/
>
> #ifdef _PROFILE_USE
> #include "reorder_class1.h"
> @@ -13,7 +13,7 @@ static __attribute__((always_inline)) void test1 (A
> *tc) {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> if (g<100) g++;
> }
> @@ -22,7 +22,7 @@ static __attribute__((always_inline)) void test2 (B
> *tc) {
> int i;
> - for (i = 0; i < 1000000; i++)
> + for (i = 0; i < 10000000; i++)
> g += tc->foo();
> }
>
> diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> index bbba0521018..594c3f34d57 100644
> --- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof-2.c
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized
> -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fno-early-inlining -fdump-ipa-profile-optimized
> +-fdump-tree-einline-optimized" } */
> volatile int one;
> static int
> add1 (int val)
> @@ -22,7 +22,7 @@ int
> main (void)
> {
> int i, val = 0;
> - for (i = 0; i < 100000; i++)
> + for (i = 0; i < 10000000; i++)
> {
> val = do_op (val, add1);
> val = do_op (val, sub1);
> @@ -31,5 +31,5 @@ main (void)
> }
> /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call ->
> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
> /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call ->
> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
> -/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct
> call.* add1 .will resolve by ipa-profile" "afdo"} } */
> -/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct
> call.* sub1 .will resolve by ipa-profile" "afdo"} } */
> +/* { dg-final-use-autofdo { scan-tree-dump "Inlining add1/1 into
> +main/4." "einline"} } */
> +/* { dg-final-use-autofdo { scan-tree-dump "Inlining sub1/2 into
> +main/4." "einline"} } */
> diff --git a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> index 138b85a08d2..702045239f3 100644
> --- a/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> +++ b/gcc/testsuite/gcc.dg/tree-prof/indir-call-prof.c
> @@ -1,4 +1,4 @@
> -/* { dg-options "-O2 -fdump-tree-optimized
> -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */
> +/* { dg-options "-O2 -fdump-tree-optimized
> +-fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
>
> static int a1 (void)
> {
> diff --git a/gcc/testsuite/lib/profopt.exp
> b/gcc/testsuite/lib/profopt.exp index 9997eb3bb7e..25f45ecf2de 100644
> --- a/gcc/testsuite/lib/profopt.exp
> +++ b/gcc/testsuite/lib/profopt.exp
> @@ -289,8 +289,8 @@ proc auto-profopt-execute { src } {
> return
> }
> set profile_wrapper [profopt-perf-wrapper]
> - set profile_option "-g -DFOR_AUTOFDO_TESTING"
> - set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING"
> + set profile_option "-gdwarf-4 -DFOR_AUTOFDO_TESTING"
> + set feedback_option "-fauto-profile -DFOR_AUTOFDO_TESTING -fearly-inlining"
> set run_autofdo 1
> profopt-execute $src
> unset profile_wrapper
> @@ -451,7 +451,7 @@ proc profopt-execute { src } {
> # convert profile
> if { $run_autofdo == 1 } {
> set bprefix "afdo."
> - set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=1 --gcov=$tmpdir/$bprefix$base.$ext"
> + set cmd "create_gcov --binary $execname1 --profile=$tmpdir/$base.perf.data -gcov_version=2 --gcov=$tmpdir/$bprefix$base.$ext"
> verbose "Running $cmd"
> set id [remote_spawn "" $cmd]
> if { $id < 0 } {
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index 789723fb287..ae22e103883 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -626,7 +626,7 @@ proc
> check_effective_target_keeps_null_pointer_checks { } { # this allows
> parallelism of 16 and higher of parallel gcc-auto-profile proc profopt-perf-wrapper { } {
> global srcdir
> - return "$srcdir/../config/i386/gcc-auto-profile -o perf.data -m8 "
> + return "$srcdir/../config/i386/gcc-auto-profile -m8 "
> }
>
> # Return true if profiling is supported on the target.
> --
> 2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-08-03 21:41 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-30 7:08 [PATCH] gcov-profile/71672 Fix indirect call inlining with AutoFDO Eugene Rozenfeld
2021-07-30 16:00 ` Andi Kleen
2021-08-02 9:56 ` Richard Biener
2021-08-03 21:41 ` [EXTERNAL] " Eugene Rozenfeld
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).