public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] predcom: Adjust some unnecessary update_ssa calls
@ 2021-06-02  9:29 Kewen.Lin
  2021-06-07 14:46 ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Kewen.Lin @ 2021-06-02  9:29 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Biener, Segher Boessenkool, Bill Schmidt

[-- Attachment #1: Type: text/plain, Size: 896 bytes --]

Hi,

As Richi suggested in PR100794, this patch is to remove
some unnecessary update_ssa calls with flag
TODO_update_ssa_only_virtuals, also do some refactoring.

Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu, built well
on Power9 ppc64le with --with-build-config=bootstrap-O3,
and passed both P8 and P9 SPEC2017 full build with
{-O3, -Ofast} + {,-funroll-loops}.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	* tree-predcom.c (execute_pred_commoning): Remove update_ssa call.
	(tree_predictive_commoning_loop): Factor some cleanup stuffs into
	lambda function cleanup, remove scev_reset call, and adjust return
	value.
	(tree_predictive_commoning): Adjust for different changed values,
	only set flag TODO_update_ssa_only_virtuals if changed.
	(pass_data pass_data_predcom): Remove TODO_update_ssa_only_virtuals
	from todo_flags_finish.


[-- Attachment #2: predcom_update_ssa.diff --]
[-- Type: text/plain, Size: 4660 bytes --]

 gcc/tree-predcom.c | 50 ++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 5482f50198a..02f911a08bb 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -2280,8 +2280,6 @@ execute_pred_commoning (class loop *loop, vec<chain_p> chains,
 	    remove_stmt (a->stmt);
 	}
     }
-
-  update_ssa (TODO_update_ssa_only_virtuals);
 }
 
 /* For each reference in CHAINS, if its defining statement is
@@ -3174,9 +3172,10 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains)
       }
 }
 
-/* Performs predictive commoning for LOOP.  Sets bit 1<<0 of return value
-   if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
-   form was corrupted.  */
+/* Performs predictive commoning for LOOP.  Sets bit 1<<1 of return value
+   if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
+   form was corrupted.  Non-zero return value indicates some changes were
+   applied to this loop.  */
 
 static unsigned
 tree_predictive_commoning_loop (class loop *loop)
@@ -3188,7 +3187,6 @@ tree_predictive_commoning_loop (class loop *loop)
   unsigned unroll_factor;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
-  edge exit;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Processing loop %d\n",  loop->num);
@@ -3244,13 +3242,22 @@ tree_predictive_commoning_loop (class loop *loop)
   determine_roots (loop, components, &chains);
   release_components (components);
 
+  auto cleanup = [&]() {
+    release_chains (chains);
+    free_data_refs (datarefs);
+    BITMAP_FREE (looparound_phis);
+    free_affine_expand_cache (&name_expansions);
+  };
+
   if (!chains.exists ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Predictive commoning failed: no suitable chains\n");
-      goto end;
+      cleanup ();
+      return 0;
     }
+
   prepare_initializers (loop, chains);
   loop_closed_ssa = prepare_finalizers (loop, chains);
 
@@ -3268,10 +3275,8 @@ tree_predictive_commoning_loop (class loop *loop)
   /* Determine the unroll factor, and if the loop should be unrolled, ensure
      that its number of iterations is divisible by the factor.  */
   unroll_factor = determine_unroll_factor (chains);
-  scev_reset ();
   unroll = (unroll_factor > 1
 	    && can_unroll_loop_p (loop, unroll_factor, &desc));
-  exit = single_dom_exit (loop);
 
   /* Execute the predictive commoning transformations, and possibly unroll the
      loop.  */
@@ -3285,8 +3290,6 @@ tree_predictive_commoning_loop (class loop *loop)
       dta.chains = chains;
       dta.tmp_vars = tmp_vars;
 
-      update_ssa (TODO_update_ssa_only_virtuals);
-
       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
 	 execute_pred_commoning_cbck is called may cause phi nodes to be
 	 reallocated, which is a problem since CHAINS may point to these
@@ -3295,6 +3298,7 @@ tree_predictive_commoning_loop (class loop *loop)
 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
       replace_phis_by_defined_names (chains);
 
+      edge exit = single_dom_exit (loop);
       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
 				      execute_pred_commoning_cbck, &dta);
       eliminate_temp_copies (loop, tmp_vars);
@@ -3307,14 +3311,9 @@ tree_predictive_commoning_loop (class loop *loop)
       execute_pred_commoning (loop, chains, tmp_vars);
     }
 
-end: ;
-  release_chains (chains);
-  free_data_refs (datarefs);
-  BITMAP_FREE (looparound_phis);
+  cleanup ();
 
-  free_affine_expand_cache (&name_expansions);
-
-  return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
+  return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
 }
 
 /* Runs predictive commoning.  */
@@ -3335,12 +3334,19 @@ tree_predictive_commoning (void)
 
   if (changed > 0)
     {
-      scev_reset ();
+      ret = TODO_update_ssa_only_virtuals;
 
+      /* Some loop(s) got unrolled.  */
       if (changed > 1)
-	rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+	{
+	  scev_reset ();
 
-      ret = TODO_cleanup_cfg;
+	  /* Need to fix up loop closed SSA.  */
+	  if (changed >= 4)
+	    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+
+	  ret |= TODO_cleanup_cfg;
+	}
     }
 
   return ret;
@@ -3369,7 +3375,7 @@ const pass_data pass_data_predcom =
   0, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
-  TODO_update_ssa_only_virtuals, /* todo_flags_finish */
+  0, /* todo_flags_finish */
 };
 
 class pass_predcom : public gimple_opt_pass
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-02  9:29 [PATCH] predcom: Adjust some unnecessary update_ssa calls Kewen.Lin
@ 2021-06-07 14:46 ` Richard Biener
  2021-06-07 15:20   ` Martin Sebor
  2021-06-08  9:30   ` Kewen.Lin
  0 siblings, 2 replies; 16+ messages in thread
From: Richard Biener @ 2021-06-07 14:46 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: GCC Patches, Segher Boessenkool, Bill Schmidt

On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi,
>
> As Richi suggested in PR100794, this patch is to remove
> some unnecessary update_ssa calls with flag
> TODO_update_ssa_only_virtuals, also do some refactoring.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu, built well
> on Power9 ppc64le with --with-build-config=bootstrap-O3,
> and passed both P8 and P9 SPEC2017 full build with
> {-O3, -Ofast} + {,-funroll-loops}.
>
> Is it ok for trunk?

LGTM, minor comment on the fancy C++:

+  auto cleanup = [&]() {
+    release_chains (chains);
+    free_data_refs (datarefs);
+    BITMAP_FREE (looparound_phis);
+    free_affine_expand_cache (&name_expansions);
+  };

+      cleanup ();
+      return 0;

so that could have been

  class cleanup {
     ~cleanup()
        {
          release_chains (chains);
          free_data_refs (datarefs);
          BITMAP_FREE (looparound_phis);
          free_affine_expand_cache (&name_expansions);
        }
  } cleanup;

?  Or some other means of adding registering a RAII-style cleanup?
I mean, we can't wrap it all in

  try {...}
  finally {...}

because C++ doesn't have finally.

OK with this tiny part of the C++ refactoring delayed, but we can also simply
discuss best options.  At least for looparound_phis a good cleanup would
be to pass the bitmap around and use auto_bitmap local to
tree_predictive_commoning_loop ...

Thanks,
Richard.

> BR,
> Kewen
> -----
> gcc/ChangeLog:
>
>         * tree-predcom.c (execute_pred_commoning): Remove update_ssa call.
>         (tree_predictive_commoning_loop): Factor some cleanup stuffs into
>         lambda function cleanup, remove scev_reset call, and adjust return
>         value.
>         (tree_predictive_commoning): Adjust for different changed values,
>         only set flag TODO_update_ssa_only_virtuals if changed.
>         (pass_data pass_data_predcom): Remove TODO_update_ssa_only_virtuals
>         from todo_flags_finish.
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-07 14:46 ` Richard Biener
@ 2021-06-07 15:20   ` Martin Sebor
  2021-06-08  9:30   ` Kewen.Lin
  1 sibling, 0 replies; 16+ messages in thread
From: Martin Sebor @ 2021-06-07 15:20 UTC (permalink / raw)
  To: Richard Biener, Kewen.Lin; +Cc: Bill Schmidt, GCC Patches, Segher Boessenkool

On 6/7/21 8:46 AM, Richard Biener via Gcc-patches wrote:
> On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>>
>> Hi,
>>
>> As Richi suggested in PR100794, this patch is to remove
>> some unnecessary update_ssa calls with flag
>> TODO_update_ssa_only_virtuals, also do some refactoring.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
>> x86_64-redhat-linux and aarch64-linux-gnu, built well
>> on Power9 ppc64le with --with-build-config=bootstrap-O3,
>> and passed both P8 and P9 SPEC2017 full build with
>> {-O3, -Ofast} + {,-funroll-loops}.
>>
>> Is it ok for trunk?
> 
> LGTM, minor comment on the fancy C++:
> 
> +  auto cleanup = [&]() {
> +    release_chains (chains);
> +    free_data_refs (datarefs);
> +    BITMAP_FREE (looparound_phis);
> +    free_affine_expand_cache (&name_expansions);
> +  };
> 
> +      cleanup ();
> +      return 0;
> 
> so that could have been
> 
>    class cleanup {
>       ~cleanup()
>          {
>            release_chains (chains);
>            free_data_refs (datarefs);
>            BITMAP_FREE (looparound_phis);
>            free_affine_expand_cache (&name_expansions);
>          }
>    } cleanup;
> 
> ?  Or some other means of adding registering a RAII-style cleanup?

I agree this would be better than invoking the cleanup lambda
explicitly.

Going a step further would be to encapsulate all the data in a class
(eliminating the static variables) and make
tree_predictive_commoning_loop() its member function (along with
whatever others it calls), and have the dtor take care of
the cleanup.

Of course, if the data types were classes with ctors and dtors
(including, for example, auto_vec rather than vec for chains)
the cleanup would just happen and none of the explicit calls
would be necessary.

Martin

> I mean, we can't wrap it all in
> 
>    try {...}
>    finally {...}
> 
> because C++ doesn't have finally.
> 
> OK with this tiny part of the C++ refactoring delayed, but we can also simply
> discuss best options.  At least for looparound_phis a good cleanup would
> be to pass the bitmap around and use auto_bitmap local to
> tree_predictive_commoning_loop ...
> 
> Thanks,
> Richard.
> 
>> BR,
>> Kewen
>> -----
>> gcc/ChangeLog:
>>
>>          * tree-predcom.c (execute_pred_commoning): Remove update_ssa call.
>>          (tree_predictive_commoning_loop): Factor some cleanup stuffs into
>>          lambda function cleanup, remove scev_reset call, and adjust return
>>          value.
>>          (tree_predictive_commoning): Adjust for different changed values,
>>          only set flag TODO_update_ssa_only_virtuals if changed.
>>          (pass_data pass_data_predcom): Remove TODO_update_ssa_only_virtuals
>>          from todo_flags_finish.
>>


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-07 14:46 ` Richard Biener
  2021-06-07 15:20   ` Martin Sebor
@ 2021-06-08  9:30   ` Kewen.Lin
  2021-06-08 11:02     ` Richard Biener
  2021-06-08 14:26     ` [PATCH] predcom: Adjust some unnecessary update_ssa calls Martin Sebor
  1 sibling, 2 replies; 16+ messages in thread
From: Kewen.Lin @ 2021-06-08  9:30 UTC (permalink / raw)
  To: Richard Biener, msebor; +Cc: GCC Patches, Segher Boessenkool, Bill Schmidt

[-- Attachment #1: Type: text/plain, Size: 2204 bytes --]

on 2021/6/7 下午10:46, Richard Biener wrote:
> On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>>
>> Hi,
>>
>> As Richi suggested in PR100794, this patch is to remove
>> some unnecessary update_ssa calls with flag
>> TODO_update_ssa_only_virtuals, also do some refactoring.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
>> x86_64-redhat-linux and aarch64-linux-gnu, built well
>> on Power9 ppc64le with --with-build-config=bootstrap-O3,
>> and passed both P8 and P9 SPEC2017 full build with
>> {-O3, -Ofast} + {,-funroll-loops}.
>>
>> Is it ok for trunk?
> 
> LGTM, minor comment on the fancy C++:
> 
> +  auto cleanup = [&]() {
> +    release_chains (chains);
> +    free_data_refs (datarefs);
> +    BITMAP_FREE (looparound_phis);
> +    free_affine_expand_cache (&name_expansions);
> +  };
> 
> +      cleanup ();
> +      return 0;
> 
> so that could have been
> 
>   class cleanup {
>      ~cleanup()
>         {
>           release_chains (chains);
>           free_data_refs (datarefs);
>           BITMAP_FREE (looparound_phis);
>           free_affine_expand_cache (&name_expansions);
>         }
>   } cleanup;
> 
> ?  Or some other means of adding registering a RAII-style cleanup?
> I mean, we can't wrap it all in
> 
>   try {...}
>   finally {...}
> 
> because C++ doesn't have finally.
> 
> OK with this tiny part of the C++ refactoring delayed, but we can also simply
> discuss best options.  At least for looparound_phis a good cleanup would
> be to pass the bitmap around and use auto_bitmap local to
> tree_predictive_commoning_loop ...
> 

Thanks Richi!  One draft (not ready for review) is attached for the further
discussion.  It follows the idea of RAII-style cleanup.  I noticed that
Martin suggested stepping forward to make tree_predictive_commoning_loop
and its callees into one class (Thanks Martin), since there are not many
this kind of C++-style work functions, I want to double confirm which option
do you guys prefer?

One point you might have seen is that to make tree_predictive_commoning_loop
and its callees as member functions of one class can avoid to pass bitmap
looparound_phis all around what's in the draft.  :)

BR,
Kewen


[-- Attachment #2: draft.diff --]
[-- Type: text/plain, Size: 26196 bytes --]

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index ac1674d5486..75acc342c5a 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -375,13 +375,40 @@ struct component
   struct component *next;
 };
 
-/* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
+typedef hash_map<tree, name_expansion *> tree_expand_map_t;
+static void release_chains (vec<chain_p> chains);
 
-static bitmap looparound_phis;
+/* Class for predictive commoning data structure for one LOOP.  */
+class loop_pcom_info
+{
+public:
+  loop_pcom_info (loop_p l)
+    : loop (l), datarefs (vNULL), dependences (vNULL), chains (vNULL),
+      cache (NULL)
+  {
+    dependences.create (10);
+    datarefs.create (10);
+  }
 
-/* Cache used by tree_to_aff_combination_expand.  */
+  ~loop_pcom_info ()
+  {
+    free_data_refs (datarefs);
+    free_dependence_relations (dependences);
+    release_chains (chains);
+    free_affine_expand_cache (&cache);
+  }
 
-static hash_map<tree, name_expansion *> *name_expansions;
+  /* The pointer to the given loop.  */
+  loop_p loop;
+  /* All data references.  */
+  vec<data_reference_p> datarefs;
+  /* All data dependences.  */
+  vec<ddr_p> dependences;
+  /* All chains.  */
+  vec<chain_p> chains;
+  /* Cache used by tree_to_aff_combination_expand.  */
+  tree_expand_map_t *cache;
+};
 
 /* Dumps data reference REF to FILE.  */
 
@@ -673,13 +700,13 @@ suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
 
 static void
-aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
+aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset,
+			   tree_expand_map_t **cache_ptr)
 {
   tree type = TREE_TYPE (DR_OFFSET (dr));
   aff_tree delta;
 
-  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
-				  &name_expansions);
+  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, cache_ptr);
   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
   aff_combination_add (offset, &delta);
 }
@@ -692,7 +719,7 @@ aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
 
 static bool
 determine_offset (struct data_reference *a, struct data_reference *b,
-		  poly_widest_int *off)
+		  poly_widest_int *off, tree_expand_map_t **cache_ptr)
 {
   aff_tree diff, baseb, step;
   tree typea, typeb;
@@ -720,13 +747,13 @@ determine_offset (struct data_reference *a, struct data_reference *b,
 
   /* Compare the offsets of the addresses, and check whether the difference
      is a multiple of step.  */
-  aff_combination_dr_offset (a, &diff);
-  aff_combination_dr_offset (b, &baseb);
+  aff_combination_dr_offset (a, &diff, cache_ptr);
+  aff_combination_dr_offset (b, &baseb, cache_ptr);
   aff_combination_scale (&baseb, -1);
   aff_combination_add (&diff, &baseb);
 
-  tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
-				  &step, &name_expansions);
+  tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)), &step,
+				  cache_ptr);
   return aff_combination_constant_multiple_p (&diff, &step, off);
 }
 
@@ -750,9 +777,9 @@ last_always_executed_block (class loop *loop)
 /* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
 
 static struct component *
-split_data_refs_to_components (class loop *loop,
-			       vec<data_reference_p> datarefs,
-			       vec<ddr_p> depends)
+split_data_refs_to_components (class loop *loop, vec<data_reference_p> datarefs,
+			       vec<ddr_p> depends,
+			       tree_expand_map_t **cache_ptr)
 {
   unsigned i, n = datarefs.length ();
   unsigned ca, ia, ib, bad;
@@ -827,7 +854,7 @@ split_data_refs_to_components (class loop *loop,
       if (DR_IS_READ (dra) && DR_IS_READ (drb))
 	{
 	  if (ia == bad || ib == bad
-	      || !determine_offset (dra, drb, &dummy_off))
+	      || !determine_offset (dra, drb, &dummy_off, cache_ptr))
 	    continue;
 	}
       /* If A is read and B write or vice versa and there is unsuitable
@@ -842,7 +869,7 @@ split_data_refs_to_components (class loop *loop,
 	      bitmap_set_bit (no_store_store_comps, ib);
 	      continue;
 	    }
-	  else if (!determine_offset (dra, drb, &dummy_off))
+	  else if (!determine_offset (dra, drb, &dummy_off, cache_ptr))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ib);
 	      merge_comps (comp_father, comp_size, bad, ia);
@@ -856,7 +883,7 @@ split_data_refs_to_components (class loop *loop,
 	      bitmap_set_bit (no_store_store_comps, ia);
 	      continue;
 	    }
-	  else if (!determine_offset (dra, drb, &dummy_off))
+	  else if (!determine_offset (dra, drb, &dummy_off, cache_ptr))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ia);
 	      merge_comps (comp_father, comp_size, bad, ib);
@@ -865,7 +892,7 @@ split_data_refs_to_components (class loop *loop,
 	}
       else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
 	       && ia != bad && ib != bad
-	       && !determine_offset (dra, drb, &dummy_off))
+	       && !determine_offset (dra, drb, &dummy_off, cache_ptr))
 	{
 	  merge_comps (comp_father, comp_size, bad, ia);
 	  merge_comps (comp_father, comp_size, bad, ib);
@@ -949,7 +976,7 @@ end:
    loop.  */
 
 static bool
-suitable_component_p (class loop *loop, struct component *comp)
+suitable_component_p (class loop *loop, struct component *comp, tree_expand_map_t **cache_ptr)
 {
   unsigned i;
   dref a, first;
@@ -980,7 +1007,7 @@ suitable_component_p (class loop *loop, struct component *comp)
       /* Polynomial offsets are no use, since we need to know the
 	 gap between iteration numbers at compile time.  */
       poly_widest_int offset;
-      if (!determine_offset (first->ref, a->ref, &offset)
+      if (!determine_offset (first->ref, a->ref, &offset, cache_ptr)
 	  || !offset.is_constant (&a->offset))
 	return false;
 
@@ -1005,14 +1032,15 @@ suitable_component_p (class loop *loop, struct component *comp)
    the beginning of this file.  LOOP is the current loop.  */
 
 static struct component *
-filter_suitable_components (class loop *loop, struct component *comps)
+filter_suitable_components (class loop *loop, struct component *comps,
+			    tree_expand_map_t **cache_ptr)
 {
   struct component **comp, *act;
 
   for (comp = &comps; *comp; )
     {
       act = *comp;
-      if (suitable_component_p (loop, act))
+      if (suitable_component_p (loop, act, cache_ptr))
 	comp = &act->next;
       else
 	{
@@ -1206,8 +1234,8 @@ name_for_ref (dref ref)
    iterations of the innermost enclosing loop).  */
 
 static bool
-valid_initializer_p (struct data_reference *ref,
-		     unsigned distance, struct data_reference *root)
+valid_initializer_p (struct data_reference *ref, unsigned distance,
+		     struct data_reference *root, tree_expand_map_t **cache_ptr)
 {
   aff_tree diff, base, step;
   poly_widest_int off;
@@ -1228,13 +1256,13 @@ valid_initializer_p (struct data_reference *ref,
 
   /* Verify that this index of REF is equal to the root's index at
      -DISTANCE-th iteration.  */
-  aff_combination_dr_offset (root, &diff);
-  aff_combination_dr_offset (ref, &base);
+  aff_combination_dr_offset (root, &diff, cache_ptr);
+  aff_combination_dr_offset (ref, &base, cache_ptr);
   aff_combination_scale (&base, -1);
   aff_combination_add (&diff, &base);
 
   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
-				  &step, &name_expansions);
+				  &step, cache_ptr);
   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
     return false;
 
@@ -1250,7 +1278,8 @@ valid_initializer_p (struct data_reference *ref,
    is the root of the current chain.  */
 
 static gphi *
-find_looparound_phi (class loop *loop, dref ref, dref root)
+find_looparound_phi (class loop *loop, dref ref, dref root,
+		     tree_expand_map_t **cache_ptr)
 {
   tree name, init, init_ref;
   gphi *phi = NULL;
@@ -1303,7 +1332,7 @@ find_looparound_phi (class loop *loop, dref ref, dref root)
 			     init_stmt))
     return NULL;
 
-  if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
+  if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref, cache_ptr))
     return NULL;
 
   return phi;
@@ -1339,7 +1368,8 @@ insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
    (also, it may allow us to combine chains together).  */
 
 static void
-add_looparound_copies (class loop *loop, chain_p chain)
+add_looparound_copies (class loop *loop, chain_p chain, bitmap looparound_phis,
+		       tree_expand_map_t **cache_ptr)
 {
   unsigned i;
   dref ref, root = get_chain_root (chain);
@@ -1350,7 +1380,7 @@ add_looparound_copies (class loop *loop, chain_p chain)
 
   FOR_EACH_VEC_ELT (chain->refs, i, ref)
     {
-      phi = find_looparound_phi (loop, ref, root);
+      phi = find_looparound_phi (loop, ref, root, cache_ptr);
       if (!phi)
 	continue;
 
@@ -1364,9 +1394,9 @@ add_looparound_copies (class loop *loop, chain_p chain)
    loop.  */
 
 static void
-determine_roots_comp (class loop *loop,
-		      struct component *comp,
-		      vec<chain_p> *chains)
+determine_roots_comp (class loop *loop, struct component *comp,
+		      vec<chain_p> *chains, bitmap looparound_phis,
+		      tree_expand_map_t **cache_ptr)
 {
   unsigned i;
   dref a;
@@ -1422,7 +1452,7 @@ determine_roots_comp (class loop *loop,
 	{
 	  if (nontrivial_chain_p (chain))
 	    {
-	      add_looparound_copies (loop, chain);
+	      add_looparound_copies (loop, chain, looparound_phis, cache_ptr);
 	      chains->safe_push (chain);
 	    }
 	  else
@@ -1443,7 +1473,7 @@ determine_roots_comp (class loop *loop,
 
   if (nontrivial_chain_p (chain))
     {
-      add_looparound_copies (loop, chain);
+      add_looparound_copies (loop, chain, looparound_phis, cache_ptr);
       chains->safe_push (chain);
     }
   else
@@ -1454,13 +1484,14 @@ determine_roots_comp (class loop *loop,
    separates the references to CHAINS.  LOOP is the current loop.  */
 
 static void
-determine_roots (class loop *loop,
-		 struct component *comps, vec<chain_p> *chains)
+determine_roots (class loop *loop, struct component *comps,
+		 vec<chain_p> *chains, bitmap looparound_phis,
+		 tree_expand_map_t **cache_ptr)
 {
   struct component *comp;
 
   for (comp = comps; comp; comp = comp->next)
-    determine_roots_comp (loop, comp, chains);
+    determine_roots_comp (loop, comp, chains, looparound_phis, cache_ptr);
 }
 
 /* Replace the reference in statement STMT with temporary variable
@@ -2028,7 +2059,7 @@ execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
    such statement, or more statements, NULL is returned.  */
 
 static gimple *
-single_nonlooparound_use (tree name)
+single_nonlooparound_use (tree name, bitmap looparound_phis)
 {
   use_operand_p use;
   imm_use_iterator it;
@@ -2063,7 +2094,7 @@ single_nonlooparound_use (tree name)
    used.  */
 
 static void
-remove_stmt (gimple *stmt)
+remove_stmt (gimple *stmt, bitmap looparound_phis)
 {
   tree name;
   gimple *next;
@@ -2072,7 +2103,7 @@ remove_stmt (gimple *stmt)
   if (gimple_code (stmt) == GIMPLE_PHI)
     {
       name = PHI_RESULT (stmt);
-      next = single_nonlooparound_use (name);
+      next = single_nonlooparound_use (name, looparound_phis);
       reset_debug_uses (stmt);
       psi = gsi_for_stmt (stmt);
       remove_phi_node (&psi, true);
@@ -2094,7 +2125,7 @@ remove_stmt (gimple *stmt)
       name = gimple_assign_lhs (stmt);
       if (TREE_CODE (name) == SSA_NAME)
 	{
-	  next = single_nonlooparound_use (name);
+	  next = single_nonlooparound_use (name, looparound_phis);
 	  reset_debug_uses (stmt);
 	}
       else
@@ -2122,7 +2153,7 @@ remove_stmt (gimple *stmt)
 
 static void
 execute_pred_commoning_chain (class loop *loop, chain_p chain,
-			      bitmap tmp_vars)
+			      bitmap tmp_vars, bitmap looparound_phis)
 {
   unsigned i;
   dref a;
@@ -2172,7 +2203,7 @@ execute_pred_commoning_chain (class loop *loop, chain_p chain,
 	      if (last_store_p)
 		last_store_p = false;
 	      else
-		remove_stmt (a->stmt);
+		remove_stmt (a->stmt, looparound_phis);
 
 	      continue;
 	    }
@@ -2252,8 +2283,8 @@ determine_unroll_factor (vec<chain_p> chains)
    Uids of the newly created temporary variables are marked in TMP_VARS.  */
 
 static void
-execute_pred_commoning (class loop *loop, vec<chain_p> chains,
-			bitmap tmp_vars)
+execute_pred_commoning (class loop *loop, vec<chain_p> chains, bitmap tmp_vars,
+			bitmap looparound_phis)
 {
   chain_p chain;
   unsigned i;
@@ -2263,7 +2294,7 @@ execute_pred_commoning (class loop *loop, vec<chain_p> chains,
       if (chain->type == CT_INVARIANT)
 	execute_load_motion (loop, chain, tmp_vars);
       else
-	execute_pred_commoning_chain (loop, chain, tmp_vars);
+	execute_pred_commoning_chain (loop, chain, tmp_vars, looparound_phis);
     }
 
   FOR_EACH_VEC_ELT (chains, i, chain)
@@ -2277,7 +2308,7 @@ execute_pred_commoning (class loop *loop, vec<chain_p> chains,
 	  dref a;
 	  unsigned j;
 	  for (j = 1; chain->refs.iterate (j, &a); j++)
-	    remove_stmt (a->stmt);
+	    remove_stmt (a->stmt, looparound_phis);
 	}
     }
 }
@@ -2330,6 +2361,7 @@ struct epcc_data
 {
   vec<chain_p> chains;
   bitmap tmp_vars;
+  bitmap looparound_phis;
 };
 
 static void
@@ -2341,7 +2373,8 @@ execute_pred_commoning_cbck (class loop *loop, void *data)
      tree_transform_and_unroll_loop (see detailed description in
      tree_predictive_commoning_loop).  */
   replace_names_by_phis (dta->chains);
-  execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
+  execute_pred_commoning (loop, dta->chains, dta->tmp_vars,
+			  dta->looparound_phis);
 }
 
 /* Base NAME and all the names in the chain of phi nodes that use it
@@ -2434,7 +2467,7 @@ chain_can_be_combined_p (chain_p chain)
    statement.  */
 
 static gimple *
-find_use_stmt (tree *name)
+find_use_stmt (tree *name, bitmap looparound_phis)
 {
   gimple *stmt;
   tree rhs, lhs;
@@ -2442,7 +2475,7 @@ find_use_stmt (tree *name)
   /* Skip over assignments.  */
   while (1)
     {
-      stmt = single_nonlooparound_use (*name);
+      stmt = single_nonlooparound_use (*name, looparound_phis);
       if (!stmt)
 	return NULL;
 
@@ -2487,7 +2520,7 @@ may_reassociate_p (tree type, enum tree_code code)
    is stored in DISTANCE.  */
 
 static gimple *
-find_associative_operation_root (gimple *stmt, unsigned *distance)
+find_associative_operation_root (gimple *stmt, unsigned *distance, bitmap looparound_phis)
 {
   tree lhs;
   gimple *next;
@@ -2503,7 +2536,7 @@ find_associative_operation_root (gimple *stmt, unsigned *distance)
       lhs = gimple_assign_lhs (stmt);
       gcc_assert (TREE_CODE (lhs) == SSA_NAME);
 
-      next = find_use_stmt (&lhs);
+      next = find_use_stmt (&lhs, looparound_phis);
       if (!next
 	  || gimple_assign_rhs_code (next) != code)
 	break;
@@ -2524,25 +2557,25 @@ find_associative_operation_root (gimple *stmt, unsigned *distance)
    NAME2.  */
 
 static gimple *
-find_common_use_stmt (tree *name1, tree *name2)
+find_common_use_stmt (tree *name1, tree *name2, bitmap looparound_phis)
 {
   gimple *stmt1, *stmt2;
 
-  stmt1 = find_use_stmt (name1);
+  stmt1 = find_use_stmt (name1, looparound_phis);
   if (!stmt1)
     return NULL;
 
-  stmt2 = find_use_stmt (name2);
+  stmt2 = find_use_stmt (name2, looparound_phis);
   if (!stmt2)
     return NULL;
 
   if (stmt1 == stmt2)
     return stmt1;
 
-  stmt1 = find_associative_operation_root (stmt1, NULL);
+  stmt1 = find_associative_operation_root (stmt1, NULL, looparound_phis);
   if (!stmt1)
     return NULL;
-  stmt2 = find_associative_operation_root (stmt2, NULL);
+  stmt2 = find_associative_operation_root (stmt2, NULL, looparound_phis);
   if (!stmt2)
     return NULL;
 
@@ -2555,7 +2588,7 @@ find_common_use_stmt (tree *name1, tree *name2)
 
 static bool
 combinable_refs_p (dref r1, dref r2,
-		   enum tree_code *code, bool *swap, tree *rslt_type)
+		   enum tree_code *code, bool *swap, tree *rslt_type, bitmap looparound_phis)
 {
   enum tree_code acode;
   bool aswap;
@@ -2567,7 +2600,7 @@ combinable_refs_p (dref r1, dref r2,
   name2 = name_for_ref (r2);
   gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
 
-  stmt = find_common_use_stmt (&name1, &name2);
+  stmt = find_common_use_stmt (&name1, &name2, looparound_phis);
 
   if (!stmt
       /* A simple post-dominance check - make sure the combination
@@ -2623,7 +2656,7 @@ remove_name_from_operation (gimple *stmt, tree op)
    are combined in a single statement, and returns this statement.  */
 
 static gimple *
-reassociate_to_the_same_stmt (tree name1, tree name2)
+reassociate_to_the_same_stmt (tree name1, tree name2, bitmap looparound_phis)
 {
   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
   gassign *new_stmt, *tmp_stmt;
@@ -2633,10 +2666,10 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
   tree type = TREE_TYPE (name1);
   gimple_stmt_iterator bsi;
 
-  stmt1 = find_use_stmt (&name1);
-  stmt2 = find_use_stmt (&name2);
-  root1 = find_associative_operation_root (stmt1, &dist1);
-  root2 = find_associative_operation_root (stmt2, &dist2);
+  stmt1 = find_use_stmt (&name1, looparound_phis);
+  stmt2 = find_use_stmt (&name2, looparound_phis);
+  root1 = find_associative_operation_root (stmt1, &dist1, looparound_phis);
+  root2 = find_associative_operation_root (stmt2, &dist2, looparound_phis);
   code = gimple_assign_rhs_code (stmt1);
 
   gcc_assert (root1 && root2 && root1 == root2
@@ -2651,22 +2684,22 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
 
   while (dist1 > dist2)
     {
-      s1 = find_use_stmt (&r1);
+      s1 = find_use_stmt (&r1, looparound_phis);
       r1 = gimple_assign_lhs (s1);
       dist1--;
     }
   while (dist2 > dist1)
     {
-      s2 = find_use_stmt (&r2);
+      s2 = find_use_stmt (&r2, looparound_phis);
       r2 = gimple_assign_lhs (s2);
       dist2--;
     }
 
   while (s1 != s2)
     {
-      s1 = find_use_stmt (&r1);
+      s1 = find_use_stmt (&r1, looparound_phis);
       r1 = gimple_assign_lhs (s1);
-      s2 = find_use_stmt (&r2);
+      s2 = find_use_stmt (&r2, looparound_phis);
       r2 = gimple_assign_lhs (s2);
     }
 
@@ -2708,25 +2741,25 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
    the expression so that they are used in the same statement.  */
 
 static gimple *
-stmt_combining_refs (dref r1, dref r2)
+stmt_combining_refs (dref r1, dref r2, bitmap looparound_phis)
 {
   gimple *stmt1, *stmt2;
   tree name1 = name_for_ref (r1);
   tree name2 = name_for_ref (r2);
 
-  stmt1 = find_use_stmt (&name1);
-  stmt2 = find_use_stmt (&name2);
+  stmt1 = find_use_stmt (&name1, looparound_phis);
+  stmt2 = find_use_stmt (&name2, looparound_phis);
   if (stmt1 == stmt2)
     return stmt1;
 
-  return reassociate_to_the_same_stmt (name1, name2);
+  return reassociate_to_the_same_stmt (name1, name2, looparound_phis);
 }
 
 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
    description of the new chain is returned, otherwise we return NULL.  */
 
 static chain_p
-combine_chains (chain_p ch1, chain_p ch2)
+combine_chains (chain_p ch1, chain_p ch2, bitmap looparound_phis)
 {
   dref r1, r2, nw;
   enum tree_code op = ERROR_MARK;
@@ -2749,7 +2782,7 @@ combine_chains (chain_p ch1, chain_p ch2)
       if (r1->distance != r2->distance)
 	return NULL;
 
-      if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
+      if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type, looparound_phis))
 	return NULL;
     }
 
@@ -2768,7 +2801,7 @@ combine_chains (chain_p ch1, chain_p ch2)
 	       && ch2->refs.iterate (i, &r2)); i++)
     {
       nw = XCNEW (class dref_d);
-      nw->stmt = stmt_combining_refs (r1, r2);
+      nw->stmt = stmt_combining_refs (r1, r2, looparound_phis);
       nw->distance = r1->distance;
 
       new_chain->refs.safe_push (nw);
@@ -2817,7 +2850,8 @@ pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
 /* Try to combine the CHAINS in LOOP.  */
 
 static void
-try_combine_chains (class loop *loop, vec<chain_p> *chains)
+try_combine_chains (class loop *loop, vec<chain_p> *chains,
+		    bitmap looparound_phis)
 {
   unsigned i, j;
   chain_p ch1, ch2, cch;
@@ -2839,7 +2873,7 @@ try_combine_chains (class loop *loop, vec<chain_p> *chains)
 	  if (!chain_can_be_combined_p (ch2))
 	    continue;
 
-	  cch = combine_chains (ch1, ch2);
+	  cch = combine_chains (ch1, ch2, looparound_phis);
 	  if (cch)
 	    {
 	      worklist.safe_push (cch);
@@ -3172,6 +3206,7 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains)
       }
 }
 
+
 /* Performs predictive commoning for LOOP.  Sets bit 1<<1 of return value
    if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
    form was corrupted.  Non-zero return value indicates some changes were
@@ -3180,10 +3215,7 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains)
 static unsigned
 tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
 {
-  vec<data_reference_p> datarefs;
-  vec<ddr_p> dependences;
   struct component *components;
-  vec<chain_p> chains = vNULL;
   unsigned unroll_factor = 0;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
@@ -3202,31 +3234,23 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
 
   /* Find the data references and split them into components according to their
      dependence relations.  */
+  loop_pcom_info info (loop);
   auto_vec<loop_p, 3> loop_nest;
-  dependences.create (10);
-  datarefs.create (10);
-  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-					   &dependences))
+  if (!compute_data_dependences_for_loop (loop, true, &loop_nest,
+					  &info.datarefs, &info.dependences))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Cannot analyze data dependencies\n");
-      free_data_refs (datarefs);
-      free_dependence_relations (dependences);
       return 0;
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_data_dependence_relations (dump_file, dependences);
+    dump_data_dependence_relations (dump_file, info.dependences);
 
-  components = split_data_refs_to_components (loop, datarefs, dependences);
-  loop_nest.release ();
-  free_dependence_relations (dependences);
+  components = split_data_refs_to_components (loop, info.datarefs,
+					      info.dependences, &info.cache);
   if (!components)
-    {
-      free_data_refs (datarefs);
-      free_affine_expand_cache (&name_expansions);
       return 0;
-    }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -3235,47 +3259,40 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
     }
 
   /* Find the suitable components and split them into chains.  */
-  components = filter_suitable_components (loop, components);
+  components = filter_suitable_components (loop, components, &info.cache);
 
   auto_bitmap tmp_vars;
-  looparound_phis = BITMAP_ALLOC (NULL);
-  determine_roots (loop, components, &chains);
+  /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
+  auto_bitmap looparound_phis;
+  determine_roots (loop, components, &info.chains, looparound_phis, &info.cache);
   release_components (components);
 
-  auto cleanup = [&]() {
-    release_chains (chains);
-    free_data_refs (datarefs);
-    BITMAP_FREE (looparound_phis);
-    free_affine_expand_cache (&name_expansions);
-  };
-
-  if (!chains.exists ())
+  if (!info.chains.exists ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Predictive commoning failed: no suitable chains\n");
-      cleanup ();
       return 0;
     }
 
-  prepare_initializers (loop, chains);
-  loop_closed_ssa = prepare_finalizers (loop, chains);
+  prepare_initializers (loop, info.chains);
+  loop_closed_ssa = prepare_finalizers (loop, info.chains);
 
   /* Try to combine the chains that are always worked with together.  */
-  try_combine_chains (loop, &chains);
+  try_combine_chains (loop, &info.chains, looparound_phis);
 
-  insert_init_seqs (loop, chains);
+  insert_init_seqs (loop, info.chains);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "Before commoning:\n\n");
-      dump_chains (dump_file, chains);
+      dump_chains (dump_file, info.chains);
     }
 
   if (allow_unroll_p)
     /* Determine the unroll factor, and if the loop should be unrolled, ensure
        that its number of iterations is divisible by the factor.  */
-    unroll_factor = determine_unroll_factor (chains);
+    unroll_factor = determine_unroll_factor (info.chains);
 
   if (unroll_factor > 1)
     unroll = can_unroll_loop_p (loop, unroll_factor, &desc);
@@ -3289,8 +3306,9 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
 
-      dta.chains = chains;
+      dta.chains = info.chains;
       dta.tmp_vars = tmp_vars;
+      dta.looparound_phis = looparound_phis;
 
       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
 	 execute_pred_commoning_cbck is called may cause phi nodes to be
@@ -3298,7 +3316,7 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
 	 statements.  To fix this, we store the ssa names defined by the
 	 phi nodes here instead of the phi nodes themselves, and restore
 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
-      replace_phis_by_defined_names (chains);
+      replace_phis_by_defined_names (info.chains);
 
       edge exit = single_dom_exit (loop);
       tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
@@ -3310,11 +3328,9 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Executing predictive commoning without unrolling.\n");
-      execute_pred_commoning (loop, chains, tmp_vars);
+      execute_pred_commoning (loop, info.chains, tmp_vars, looparound_phis);
     }
 
-  cleanup ();
-
   return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
 }
 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-08  9:30   ` Kewen.Lin
@ 2021-06-08 11:02     ` Richard Biener
  2021-06-08 11:09       ` Richard Biener
  2021-06-22  2:35       ` predcom: Refactor more by encapsulating global states Kewen.Lin
  2021-06-08 14:26     ` [PATCH] predcom: Adjust some unnecessary update_ssa calls Martin Sebor
  1 sibling, 2 replies; 16+ messages in thread
From: Richard Biener @ 2021-06-08 11:02 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Martin Sebor, GCC Patches, Segher Boessenkool, Bill Schmidt

On Tue, Jun 8, 2021 at 11:31 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> on 2021/6/7 下午10:46, Richard Biener wrote:
> > On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
> >>
> >> Hi,
> >>
> >> As Richi suggested in PR100794, this patch is to remove
> >> some unnecessary update_ssa calls with flag
> >> TODO_update_ssa_only_virtuals, also do some refactoring.
> >>
> >> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> >> x86_64-redhat-linux and aarch64-linux-gnu, built well
> >> on Power9 ppc64le with --with-build-config=bootstrap-O3,
> >> and passed both P8 and P9 SPEC2017 full build with
> >> {-O3, -Ofast} + {,-funroll-loops}.
> >>
> >> Is it ok for trunk?
> >
> > LGTM, minor comment on the fancy C++:
> >
> > +  auto cleanup = [&]() {
> > +    release_chains (chains);
> > +    free_data_refs (datarefs);
> > +    BITMAP_FREE (looparound_phis);
> > +    free_affine_expand_cache (&name_expansions);
> > +  };
> >
> > +      cleanup ();
> > +      return 0;
> >
> > so that could have been
> >
> >   class cleanup {
> >      ~cleanup()
> >         {
> >           release_chains (chains);
> >           free_data_refs (datarefs);
> >           BITMAP_FREE (looparound_phis);
> >           free_affine_expand_cache (&name_expansions);
> >         }
> >   } cleanup;
> >
> > ?  Or some other means of adding registering a RAII-style cleanup?
> > I mean, we can't wrap it all in
> >
> >   try {...}
> >   finally {...}
> >
> > because C++ doesn't have finally.
> >
> > OK with this tiny part of the C++ refactoring delayed, but we can also simply
> > discuss best options.  At least for looparound_phis a good cleanup would
> > be to pass the bitmap around and use auto_bitmap local to
> > tree_predictive_commoning_loop ...
> >
>
> Thanks Richi!  One draft (not ready for review) is attached for the further
> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
> Martin suggested stepping forward to make tree_predictive_commoning_loop
> and its callees into one class (Thanks Martin), since there are not many
> this kind of C++-style work functions, I want to double confirm which option
> do you guys prefer?
>
> One point you might have seen is that to make tree_predictive_commoning_loop
> and its callees as member functions of one class can avoid to pass bitmap
> looparound_phis all around what's in the draft.  :)

Such general cleanup is of course desired - Giuliano started some of it within
GSoC two years ago in the attempt to thread the compilation process.  The
cleanup then helps to get rid of global state which of course interferes here
(and avoids unnecessary use of TLS vars).

So yes, encapsulating global state into a class and making accessors
member functions is something that is desired (but a lot of mechanical
work).

Thanks
Richard.

> BR,
> Kewen
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-08 11:02     ` Richard Biener
@ 2021-06-08 11:09       ` Richard Biener
  2021-06-22  2:35       ` predcom: Refactor more by encapsulating global states Kewen.Lin
  1 sibling, 0 replies; 16+ messages in thread
From: Richard Biener @ 2021-06-08 11:09 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Martin Sebor, GCC Patches, Segher Boessenkool, Bill Schmidt

On Tue, Jun 8, 2021 at 1:02 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Jun 8, 2021 at 11:31 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
> >
> > on 2021/6/7 下午10:46, Richard Biener wrote:
> > > On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
> > >>
> > >> Hi,
> > >>
> > >> As Richi suggested in PR100794, this patch is to remove
> > >> some unnecessary update_ssa calls with flag
> > >> TODO_update_ssa_only_virtuals, also do some refactoring.
> > >>
> > >> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> > >> x86_64-redhat-linux and aarch64-linux-gnu, built well
> > >> on Power9 ppc64le with --with-build-config=bootstrap-O3,
> > >> and passed both P8 and P9 SPEC2017 full build with
> > >> {-O3, -Ofast} + {,-funroll-loops}.
> > >>
> > >> Is it ok for trunk?
> > >
> > > LGTM, minor comment on the fancy C++:
> > >
> > > +  auto cleanup = [&]() {
> > > +    release_chains (chains);
> > > +    free_data_refs (datarefs);
> > > +    BITMAP_FREE (looparound_phis);
> > > +    free_affine_expand_cache (&name_expansions);
> > > +  };
> > >
> > > +      cleanup ();
> > > +      return 0;
> > >
> > > so that could have been
> > >
> > >   class cleanup {
> > >      ~cleanup()
> > >         {
> > >           release_chains (chains);
> > >           free_data_refs (datarefs);
> > >           BITMAP_FREE (looparound_phis);
> > >           free_affine_expand_cache (&name_expansions);
> > >         }
> > >   } cleanup;
> > >
> > > ?  Or some other means of adding registering a RAII-style cleanup?
> > > I mean, we can't wrap it all in
> > >
> > >   try {...}
> > >   finally {...}
> > >
> > > because C++ doesn't have finally.
> > >
> > > OK with this tiny part of the C++ refactoring delayed, but we can also simply
> > > discuss best options.  At least for looparound_phis a good cleanup would
> > > be to pass the bitmap around and use auto_bitmap local to
> > > tree_predictive_commoning_loop ...
> > >
> >
> > Thanks Richi!  One draft (not ready for review) is attached for the further
> > discussion.  It follows the idea of RAII-style cleanup.  I noticed that
> > Martin suggested stepping forward to make tree_predictive_commoning_loop
> > and its callees into one class (Thanks Martin), since there are not many
> > this kind of C++-style work functions, I want to double confirm which option
> > do you guys prefer?
> >
> > One point you might have seen is that to make tree_predictive_commoning_loop
> > and its callees as member functions of one class can avoid to pass bitmap
> > looparound_phis all around what's in the draft.  :)
>
> Such general cleanup is of course desired - Giuliano started some of it within
> GSoC two years ago in the attempt to thread the compilation process.  The
> cleanup then helps to get rid of global state which of course interferes here
> (and avoids unnecessary use of TLS vars).
>
> So yes, encapsulating global state into a class and making accessors
> member functions is something that is desired (but a lot of mechanical
> work).

Btw, the patch you posted is OK with me as well, it achieves the global
state removal, too.

Richard.

> Thanks
> Richard.
>
> > BR,
> > Kewen
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Adjust some unnecessary update_ssa calls
  2021-06-08  9:30   ` Kewen.Lin
  2021-06-08 11:02     ` Richard Biener
@ 2021-06-08 14:26     ` Martin Sebor
  1 sibling, 0 replies; 16+ messages in thread
From: Martin Sebor @ 2021-06-08 14:26 UTC (permalink / raw)
  To: Kewen.Lin, Richard Biener; +Cc: GCC Patches, Segher Boessenkool, Bill Schmidt

On 6/8/21 3:30 AM, Kewen.Lin wrote:
> on 2021/6/7 下午10:46, Richard Biener wrote:
>> On Wed, Jun 2, 2021 at 11:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>>>
>>> Hi,
>>>
>>> As Richi suggested in PR100794, this patch is to remove
>>> some unnecessary update_ssa calls with flag
>>> TODO_update_ssa_only_virtuals, also do some refactoring.
>>>
>>> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
>>> x86_64-redhat-linux and aarch64-linux-gnu, built well
>>> on Power9 ppc64le with --with-build-config=bootstrap-O3,
>>> and passed both P8 and P9 SPEC2017 full build with
>>> {-O3, -Ofast} + {,-funroll-loops}.
>>>
>>> Is it ok for trunk?
>>
>> LGTM, minor comment on the fancy C++:
>>
>> +  auto cleanup = [&]() {
>> +    release_chains (chains);
>> +    free_data_refs (datarefs);
>> +    BITMAP_FREE (looparound_phis);
>> +    free_affine_expand_cache (&name_expansions);
>> +  };
>>
>> +      cleanup ();
>> +      return 0;
>>
>> so that could have been
>>
>>    class cleanup {
>>       ~cleanup()
>>          {
>>            release_chains (chains);
>>            free_data_refs (datarefs);
>>            BITMAP_FREE (looparound_phis);
>>            free_affine_expand_cache (&name_expansions);
>>          }
>>    } cleanup;
>>
>> ?  Or some other means of adding registering a RAII-style cleanup?
>> I mean, we can't wrap it all in
>>
>>    try {...}
>>    finally {...}
>>
>> because C++ doesn't have finally.
>>
>> OK with this tiny part of the C++ refactoring delayed, but we can also simply
>> discuss best options.  At least for looparound_phis a good cleanup would
>> be to pass the bitmap around and use auto_bitmap local to
>> tree_predictive_commoning_loop ...
>>
> 
> Thanks Richi!  One draft (not ready for review) is attached for the further
> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
> Martin suggested stepping forward to make tree_predictive_commoning_loop
> and its callees into one class (Thanks Martin), since there are not many
> this kind of C++-style work functions, I want to double confirm which option
> do you guys prefer?

I meant that not necessarily as something to include in this patch
but as a suggestion for a future improvement.  If you'd like to
tackle it at any point that would be great of course :)  In any
event, thanks for double-checking!

The attached patch looks good to me as well (more for the sake of
style than anything else, declaring the class copy ctor and copy 
assignment = delete would make it clear it's not meant to be
copied, although in this case it's unlikely to make a practical
difference).

> 
> One point you might have seen is that to make tree_predictive_commoning_loop
> and its callees as member functions of one class can avoid to pass bitmap
> looparound_phis all around what's in the draft.  :)

Yes, that would simplify the interfaces of all the functions that
the info members are passed to as arguments.

Martin

> 
> BR,
> Kewen
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* predcom: Refactor more by encapsulating global states
  2021-06-08 11:02     ` Richard Biener
  2021-06-08 11:09       ` Richard Biener
@ 2021-06-22  2:35       ` Kewen.Lin
  2021-06-22 16:14         ` Martin Sebor
  2021-06-23  7:22         ` predcom: Refactor more by encapsulating global states Richard Biener
  1 sibling, 2 replies; 16+ messages in thread
From: Kewen.Lin @ 2021-06-22  2:35 UTC (permalink / raw)
  To: Richard Biener, Martin Sebor
  Cc: GCC Patches, Segher Boessenkool, Bill Schmidt

[-- Attachment #1: Type: text/plain, Size: 5031 bytes --]

Hi Richi and Martin,

>>
>> Thanks Richi!  One draft (not ready for review) is attached for the further
>> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
>> Martin suggested stepping forward to make tree_predictive_commoning_loop
>> and its callees into one class (Thanks Martin), since there are not many
>> this kind of C++-style work functions, I want to double confirm which option
>> do you guys prefer?
>>
> 
> Such general cleanup is of course desired - Giuliano started some of it within
> GSoC two years ago in the attempt to thread the compilation process.  The
> cleanup then helps to get rid of global state which of course interferes here
> (and avoids unnecessary use of TLS vars).
> 
> So yes, encapsulating global state into a class and making accessors
> member functions is something that is desired (but a lot of mechanical
> work).
> 
> Thanks
> Richard.
> 
> I meant that not necessarily as something to include in this patch
> but as a suggestion for a future improvement.  If you'd like to
> tackle it at any point that would be great of course   In any
> event, thanks for double-checking!
>
> The attached patch looks good to me as well (more for the sake of
> style than anything else, declaring the class copy ctor and copy assignment = delete would > make it clear it's not meant to be
> copied, although in this case it's unlikely to make a practical
> difference). 
> 
> Martin.


Thanks for your explanation!  Sorry for the late response.
As the way to encapsulate global state into a class and making accessors
member functions looks more complete, I gave up the RAII draft and
switched onto this way.

This patch is to encapsulate global states into a class and
making their accessors as member functions, remove some
consequent useless clean up code, and do some clean up with
RAII.

Bootstrapped/regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu, also
bootstrapped on ppc64le P9 with bootstrap-O3 config.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	* tree-predcom.c (class pcom_worker): New class.
	(release_chain): Renamed to...
	(pcom_worker::release_chain): ...this.
	(release_chains): Renamed to...
	(pcom_worker::release_chains): ...this.
	(aff_combination_dr_offset): Renamed to...
	(pcom_worker::aff_combination_dr_offset): ...this.
	(determine_offset): Renamed to...
	(pcom_worker::determine_offset): ...this.
	(class comp_ptrs): New class.
	(split_data_refs_to_components): Renamed to...
	(pcom_worker::split_data_refs_to_components): ...this,
	and update with class comp_ptrs.
	(suitable_component_p): Renamed to...
	(pcom_worker::suitable_component_p): ...this.
	(filter_suitable_components): Renamed to...
	(pcom_worker::filter_suitable_components): ...this.
	(valid_initializer_p): Renamed to...
	(pcom_worker::valid_initializer_p): ...this.
	(find_looparound_phi): Renamed to...
	(pcom_worker::find_looparound_phi): ...this.
	(add_looparound_copies): Renamed to...
	(pcom_worker::add_looparound_copies): ...this.
	(determine_roots_comp): Renamed to...
	(pcom_worker::determine_roots_comp): ...this.
	(determine_roots): Renamed to...
	(pcom_worker::determine_roots): ...this.
	(single_nonlooparound_use): Renamed to...
	(pcom_worker::single_nonlooparound_use): ...this.
	(remove_stmt): Renamed to...
	(pcom_worker::remove_stmt): ...this.
	(execute_pred_commoning_chain): Renamed to...
	(pcom_worker::execute_pred_commoning_chain): ...this.
	(execute_pred_commoning): Renamed to...
	(pcom_worker::execute_pred_commoning): ...this.
	(struct epcc_data): New member worker.
	(execute_pred_commoning_cbck): Call execute_pred_commoning
	with pcom_worker pointer.
	(find_use_stmt): Renamed to...
	(pcom_worker::find_use_stmt): ...this.
	(find_associative_operation_root): Renamed to...
	(pcom_worker::find_associative_operation_root): ...this.
	(find_common_use_stmt): Renamed to...
	(pcom_worker::find_common_use_stmt): ...this.
	(combinable_refs_p): Renamed to...
	(pcom_worker::combinable_refs_p): ...this.
	(reassociate_to_the_same_stmt): Renamed to...
	(pcom_worker::reassociate_to_the_same_stmt): ...this.
	(stmt_combining_refs): Renamed to...
	(pcom_worker::stmt_combining_refs): ...this.
	(combine_chains): Renamed to...
	(pcom_worker::combine_chains): ...this.
	(try_combine_chains): Renamed to...
	(pcom_worker::try_combine_chains): ...this.
	(prepare_initializers_chain): Renamed to...
	(pcom_worker::prepare_initializers_chain): ...this.
	(prepare_initializers): Renamed to...
	(pcom_worker::prepare_initializers): ...this.
	(prepare_finalizers_chain): Renamed to...
	(pcom_worker::prepare_finalizers_chain): ...this.
	(prepare_finalizers): Renamed to...
	(pcom_worker::prepare_finalizers): ...this.
	(tree_predictive_commoning_loop): Renamed to...
	(pcom_worker::tree_predictive_commoning_loop): ...this, adjust
	some calls and remove some cleanup code.
	(tree_predictive_commoning): Adjusted to use pcom_worker instance.
	(static variable looparound_phis): Remove.
	(static variable name_expansions): Remove.

[-- Attachment #2: predcom_refactoring.diff --]
[-- Type: text/plain, Size: 34408 bytes --]

 gcc/tree-predcom.c | 483 +++++++++++++++++++++++++++++----------------
 1 file changed, 309 insertions(+), 174 deletions(-)

diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index ac1674d5486..a4ebf2261b0 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -375,13 +375,156 @@ struct component
   struct component *next;
 };
 
-/* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
+/* A class to encapsulate the global states used for predictive
+   commoning work on top of one given LOOP.  */
 
-static bitmap looparound_phis;
+class pcom_worker
+{
+public:
+  pcom_worker (loop_p l) : loop (l), chains (vNULL), cache (NULL)
+  {
+    dependences.create (10);
+    datarefs.create (10);
+  }
+
+  ~pcom_worker ()
+  {
+    free_data_refs (datarefs);
+    free_dependence_relations (dependences);
+    free_affine_expand_cache (&cache);
+    release_chains ();
+  }
+
+  pcom_worker (const pcom_worker &) = delete;
+  pcom_worker &operator= (const pcom_worker &) = delete;
+
+  /* Performs predictive commoning.  */
+  unsigned tree_predictive_commoning_loop (bool allow_unroll_p);
+
+  /* Perform the predictive commoning optimization for chains, make this
+     public for being called in callback execute_pred_commoning_cbck.  */
+  void execute_pred_commoning (bitmap tmp_vars);
+
+private:
+  /* The pointer to the given loop.  */
+  loop_p loop;
+
+  /* All data references.  */
+  vec<data_reference_p> datarefs;
+
+  /* All data dependences.  */
+  vec<ddr_p> dependences;
+
+  /* All chains.  */
+  vec<chain_p> chains;
+
+  /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
+  auto_bitmap looparound_phis;
+
+  typedef hash_map<tree, name_expansion *> tree_expand_map_t;
+  /* Cache used by tree_to_aff_combination_expand.  */
+  tree_expand_map_t *cache;
+  /* Splits dependence graph to components.  */
+  struct component *split_data_refs_to_components ();
+
+  /* Check the conditions on references inside each of components COMPS,
+     and remove the unsuitable components from the list.  */
+  struct component *filter_suitable_components (struct component *comps);
+
+  /* Find roots of the values and determine distances in components COMPS,
+     and separates the references to chains.  */
+  void determine_roots (struct component *comps);
+
+  /* Prepare initializers for chains, and free chains that cannot
+     be used because the initializers might trap.  */
+  void prepare_initializers ();
+
+  /* Generates finalizer memory reference for chains.  Returns true if
+     finalizer code generation for chains breaks loop closed ssa form.  */
+  bool prepare_finalizers ();
+
+  /* Try to combine the chains.  */
+  void try_combine_chains ();
+
+  /* Frees CHAINS.  */
+  void release_chains ();
+
+  /* Frees a chain CHAIN.  */
+  void release_chain (chain_p chain);
 
-/* Cache used by tree_to_aff_combination_expand.  */
+  /* Prepare initializers for CHAIN.  Returns false if this is impossible
+     because one of these initializers may trap, true otherwise.  */
+  bool prepare_initializers_chain (chain_p chain);
 
-static hash_map<tree, name_expansion *> *name_expansions;
+  /* Generates finalizer memory references for CHAIN.  Returns true
+     if finalizer code for CHAIN can be generated, otherwise false.  */
+  bool prepare_finalizers_chain (chain_p chain);
+
+  /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
+  void aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset);
+
+  /* Determines number of iterations of the innermost enclosing loop before
+     B refers to exactly the same location as A and stores it to OFF.  */
+  bool determine_offset (struct data_reference *a, struct data_reference *b,
+			 poly_widest_int *off);
+
+  /* Returns true if the component COMP satisfies the conditions
+     described in 2) at the beginning of this file.  */
+  bool suitable_component_p (struct component *comp);
+
+  /* Returns true if REF is a valid initializer for ROOT with given
+     DISTANCE (in iterations of the innermost enclosing loop).  */
+  bool valid_initializer_p (struct data_reference *ref, unsigned distance,
+			    struct data_reference *root);
+
+  /* Finds looparound phi node of loop that copies the value of REF.  */
+  gphi *find_looparound_phi (dref ref, dref root);
+
+  /* Find roots of the values and determine distances in the component
+     COMP.  The references are redistributed into chains.  */
+  void determine_roots_comp (struct component *comp);
+
+  /* For references in CHAIN that are copied around the loop, add the
+     results of such copies to the chain.  */
+  void add_looparound_copies (chain_p chain);
+
+  /* Returns the single statement in that NAME is used, excepting
+     the looparound phi nodes contained in one of the chains.  */
+  gimple *single_nonlooparound_use (tree name);
+
+  /* Remove statement STMT, as well as the chain of assignments in that
+     it is used.  */
+  void remove_stmt (gimple *stmt);
+
+  /* Perform the predictive commoning optimization for a chain CHAIN.  */
+  void execute_pred_commoning_chain (chain_p chain, bitmap tmp_vars);
+
+  /* Returns the modify statement that uses NAME.  */
+  gimple *find_use_stmt (tree *name);
+
+  /* If the operation used in STMT is associative and commutative, go
+     through the tree of the same operations and returns its root.  */
+  gimple *find_associative_operation_root (gimple *stmt, unsigned *distance);
+
+  /* Returns the common statement in that NAME1 and NAME2 have a use.  */
+  gimple *find_common_use_stmt (tree *name1, tree *name2);
+
+  /* Checks whether R1 and R2 are combined together using CODE, with the
+     result in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order
+     R2 CODE R1 if it is true.  */
+  bool combinable_refs_p (dref r1, dref r2, enum tree_code *code, bool *swap,
+			  tree *rslt_type);
+
+  /* Reassociates the expression in that NAME1 and NAME2 are used so that
+     they are combined in a single statement, and returns this statement.  */
+  gimple *reassociate_to_the_same_stmt (tree name1, tree name2);
+
+  /* Returns the statement that combines references R1 and R2.  */
+  gimple *stmt_combining_refs (dref r1, dref r2);
+
+  /* Tries to combine chains CH1 and CH2 together.  */
+  chain_p combine_chains (chain_p ch1, chain_p ch2);
+};
 
 /* Dumps data reference REF to FILE.  */
 
@@ -540,8 +683,8 @@ dump_components (FILE *file, struct component *comps)
 
 /* Frees a chain CHAIN.  */
 
-static void
-release_chain (chain_p chain)
+void
+pcom_worker::release_chain (chain_p chain)
 {
   dref ref;
   unsigned i;
@@ -567,8 +710,8 @@ release_chain (chain_p chain)
 
 /* Frees CHAINS.  */
 
-static void
-release_chains (vec<chain_p> chains)
+void
+pcom_worker::release_chains ()
 {
   unsigned i;
   chain_p chain;
@@ -672,14 +815,14 @@ suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
 
 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
 
-static void
-aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
+void
+pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
+					aff_tree *offset)
 {
   tree type = TREE_TYPE (DR_OFFSET (dr));
   aff_tree delta;
 
-  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
-				  &name_expansions);
+  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &cache);
   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
   aff_combination_add (offset, &delta);
 }
@@ -690,9 +833,9 @@ aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
    returns false, otherwise returns true.  Both A and B are assumed to
    satisfy suitable_reference_p.  */
 
-static bool
-determine_offset (struct data_reference *a, struct data_reference *b,
-		  poly_widest_int *off)
+bool
+pcom_worker::determine_offset (struct data_reference *a,
+			       struct data_reference *b, poly_widest_int *off)
 {
   aff_tree diff, baseb, step;
   tree typea, typeb;
@@ -726,7 +869,7 @@ determine_offset (struct data_reference *a, struct data_reference *b,
   aff_combination_add (&diff, &baseb);
 
   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
-				  &step, &name_expansions);
+				  &step, &cache);
   return aff_combination_constant_multiple_p (&diff, &step, off);
 }
 
@@ -747,17 +890,39 @@ last_always_executed_block (class loop *loop)
   return last;
 }
 
-/* Splits dependence graph on DATAREFS described by DEPENDS to components.  */
+/* RAII class for comp_father and comp_size usage.  */
+
+class comp_ptrs
+{
+public:
+  unsigned *comp_father;
+  unsigned *comp_size;
+
+  comp_ptrs (unsigned n)
+  {
+    comp_father = XNEWVEC (unsigned, n + 1);
+    comp_size = XNEWVEC (unsigned, n + 1);
+  }
+
+  ~comp_ptrs ()
+  {
+    free (comp_father);
+    free (comp_size);
+  }
+
+  comp_ptrs (const comp_ptrs &) = delete;
+  comp_ptrs &operator= (const comp_ptrs &) = delete;
+};
+
+/* Splits dependence graph on DATAREFS described by DEPENDENCES to
+   components.  */
 
-static struct component *
-split_data_refs_to_components (class loop *loop,
-			       vec<data_reference_p> datarefs,
-			       vec<ddr_p> depends)
+struct component *
+pcom_worker::split_data_refs_to_components ()
 {
   unsigned i, n = datarefs.length ();
   unsigned ca, ia, ib, bad;
-  unsigned *comp_father = XNEWVEC (unsigned, n + 1);
-  unsigned *comp_size = XNEWVEC (unsigned, n + 1);
+  comp_ptrs ptrs (n);
   struct component **comps;
   struct data_reference *dr, *dra, *drb;
   struct data_dependence_relation *ddr;
@@ -771,22 +936,20 @@ split_data_refs_to_components (class loop *loop,
   FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       if (!DR_REF (dr))
-	{
 	  /* A fake reference for call or asm_expr that may clobber memory;
 	     just fail.  */
-	  goto end;
-	}
+	  return NULL;
       /* predcom pass isn't prepared to handle calls with data references.  */
       if (is_gimple_call (DR_STMT (dr)))
-	goto end;
+	return NULL;
       dr->aux = (void *) (size_t) i;
-      comp_father[i] = i;
-      comp_size[i] = 1;
+      ptrs.comp_father[i] = i;
+      ptrs.comp_size[i] = 1;
     }
 
   /* A component reserved for the "bad" data references.  */
-  comp_father[n] = n;
-  comp_size[n] = 1;
+  ptrs.comp_father[n] = n;
+  ptrs.comp_size[n] = 1;
 
   FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
@@ -795,11 +958,11 @@ split_data_refs_to_components (class loop *loop,
       if (!suitable_reference_p (dr, &dummy))
 	{
 	  ia = (unsigned) (size_t) dr->aux;
-	  merge_comps (comp_father, comp_size, n, ia);
+	  merge_comps (ptrs.comp_father, ptrs.comp_size, n, ia);
 	}
     }
 
-  FOR_EACH_VEC_ELT (depends, i, ddr)
+  FOR_EACH_VEC_ELT (dependences, i, ddr)
     {
       poly_widest_int dummy_off;
 
@@ -816,12 +979,12 @@ split_data_refs_to_components (class loop *loop,
 	      || DDR_NUM_DIST_VECTS (ddr) == 0))
 	eliminate_store_p = false;
 
-      ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
-      ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
+      ia = component_of (ptrs.comp_father, (unsigned) (size_t) dra->aux);
+      ib = component_of (ptrs.comp_father, (unsigned) (size_t) drb->aux);
       if (ia == ib)
 	continue;
 
-      bad = component_of (comp_father, n);
+      bad = component_of (ptrs.comp_father, n);
 
       /* If both A and B are reads, we may ignore unsuitable dependences.  */
       if (DR_IS_READ (dra) && DR_IS_READ (drb))
@@ -845,7 +1008,7 @@ split_data_refs_to_components (class loop *loop,
 	  else if (!determine_offset (dra, drb, &dummy_off))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ib);
-	      merge_comps (comp_father, comp_size, bad, ia);
+	      merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia);
 	      continue;
 	    }
 	}
@@ -859,7 +1022,7 @@ split_data_refs_to_components (class loop *loop,
 	  else if (!determine_offset (dra, drb, &dummy_off))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ia);
-	      merge_comps (comp_father, comp_size, bad, ib);
+	      merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib);
 	      continue;
 	    }
 	}
@@ -867,12 +1030,12 @@ split_data_refs_to_components (class loop *loop,
 	       && ia != bad && ib != bad
 	       && !determine_offset (dra, drb, &dummy_off))
 	{
-	  merge_comps (comp_father, comp_size, bad, ia);
-	  merge_comps (comp_father, comp_size, bad, ib);
+	  merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia);
+	  merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib);
 	  continue;
 	}
 
-      merge_comps (comp_father, comp_size, ia, ib);
+      merge_comps (ptrs.comp_father, ptrs.comp_size, ia, ib);
     }
 
   if (eliminate_store_p)
@@ -886,11 +1049,11 @@ split_data_refs_to_components (class loop *loop,
     }
 
   comps = XCNEWVEC (struct component *, n);
-  bad = component_of (comp_father, n);
+  bad = component_of (ptrs.comp_father, n);
   FOR_EACH_VEC_ELT (datarefs, i, dr)
     {
       ia = (unsigned) (size_t) dr->aux;
-      ca = component_of (comp_father, ia);
+      ca = component_of (ptrs.comp_father, ia);
       if (ca == bad)
 	continue;
 
@@ -898,7 +1061,7 @@ split_data_refs_to_components (class loop *loop,
       if (!comp)
 	{
 	  comp = XCNEW (struct component);
-	  comp->refs.create (comp_size[ca]);
+	  comp->refs.create (ptrs.comp_size[ca]);
 	  comp->eliminate_store_p = eliminate_store_p;
 	  comps[ca] = comp;
 	}
@@ -921,7 +1084,7 @@ split_data_refs_to_components (class loop *loop,
       bitmap_iterator bi;
       EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
 	{
-	  ca = component_of (comp_father, ia);
+	  ca = component_of (ptrs.comp_father, ia);
 	  if (ca != bad)
 	    comps[ca]->eliminate_store_p = false;
 	}
@@ -937,19 +1100,14 @@ split_data_refs_to_components (class loop *loop,
 	}
     }
   free (comps);
-
-end:
-  free (comp_father);
-  free (comp_size);
   return comp_list;
 }
 
 /* Returns true if the component COMP satisfies the conditions
-   described in 2) at the beginning of this file.  LOOP is the current
-   loop.  */
+   described in 2) at the beginning of this file.  */
 
-static bool
-suitable_component_p (class loop *loop, struct component *comp)
+bool
+pcom_worker::suitable_component_p (struct component *comp)
 {
   unsigned i;
   dref a, first;
@@ -1002,17 +1160,17 @@ suitable_component_p (class loop *loop, struct component *comp)
 /* Check the conditions on references inside each of components COMPS,
    and remove the unsuitable components from the list.  The new list
    of components is returned.  The conditions are described in 2) at
-   the beginning of this file.  LOOP is the current loop.  */
+   the beginning of this file.  */
 
-static struct component *
-filter_suitable_components (class loop *loop, struct component *comps)
+struct component *
+pcom_worker::filter_suitable_components (struct component *comps)
 {
   struct component **comp, *act;
 
   for (comp = &comps; *comp; )
     {
       act = *comp;
-      if (suitable_component_p (loop, act))
+      if (suitable_component_p (act))
 	comp = &act->next;
       else
 	{
@@ -1205,9 +1363,9 @@ name_for_ref (dref ref)
 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
    iterations of the innermost enclosing loop).  */
 
-static bool
-valid_initializer_p (struct data_reference *ref,
-		     unsigned distance, struct data_reference *root)
+bool
+pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
+				  struct data_reference *root)
 {
   aff_tree diff, base, step;
   poly_widest_int off;
@@ -1234,7 +1392,7 @@ valid_initializer_p (struct data_reference *ref,
   aff_combination_add (&diff, &base);
 
   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
-				  &step, &name_expansions);
+				  &step, &cache);
   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
     return false;
 
@@ -1244,13 +1402,13 @@ valid_initializer_p (struct data_reference *ref,
   return true;
 }
 
-/* Finds looparound phi node of LOOP that copies the value of REF, and if its
+/* Finds looparound phi node of loop that copies the value of REF, and if its
    initial value is correct (equal to initial value of REF shifted by one
    iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
    is the root of the current chain.  */
 
-static gphi *
-find_looparound_phi (class loop *loop, dref ref, dref root)
+gphi *
+pcom_worker::find_looparound_phi (dref ref, dref root)
 {
   tree name, init, init_ref;
   gphi *phi = NULL;
@@ -1333,13 +1491,13 @@ insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
     }
 }
 
-/* For references in CHAIN that are copied around the LOOP (created previously
+/* For references in CHAIN that are copied around the loop (created previously
    by PRE, or by user), add the results of such copies to the chain.  This
    enables us to remove the copies by unrolling, and may need less registers
    (also, it may allow us to combine chains together).  */
 
-static void
-add_looparound_copies (class loop *loop, chain_p chain)
+void
+pcom_worker::add_looparound_copies (chain_p chain)
 {
   unsigned i;
   dref ref, root = get_chain_root (chain);
@@ -1350,7 +1508,7 @@ add_looparound_copies (class loop *loop, chain_p chain)
 
   FOR_EACH_VEC_ELT (chain->refs, i, ref)
     {
-      phi = find_looparound_phi (loop, ref, root);
+      phi = find_looparound_phi (ref, root);
       if (!phi)
 	continue;
 
@@ -1360,13 +1518,10 @@ add_looparound_copies (class loop *loop, chain_p chain)
 }
 
 /* Find roots of the values and determine distances in the component COMP.
-   The references are redistributed into CHAINS.  LOOP is the current
-   loop.  */
+   The references are redistributed into chains.  */
 
-static void
-determine_roots_comp (class loop *loop,
-		      struct component *comp,
-		      vec<chain_p> *chains)
+void
+pcom_worker::determine_roots_comp (struct component *comp)
 {
   unsigned i;
   dref a;
@@ -1378,7 +1533,7 @@ determine_roots_comp (class loop *loop,
   if (comp->comp_step == RS_INVARIANT)
     {
       chain = make_invariant_chain (comp);
-      chains->safe_push (chain);
+      chains.safe_push (chain);
       return;
     }
 
@@ -1422,8 +1577,8 @@ determine_roots_comp (class loop *loop,
 	{
 	  if (nontrivial_chain_p (chain))
 	    {
-	      add_looparound_copies (loop, chain);
-	      chains->safe_push (chain);
+	      add_looparound_copies (chain);
+	      chains.safe_push (chain);
 	    }
 	  else
 	    release_chain (chain);
@@ -1443,24 +1598,23 @@ determine_roots_comp (class loop *loop,
 
   if (nontrivial_chain_p (chain))
     {
-      add_looparound_copies (loop, chain);
-      chains->safe_push (chain);
+      add_looparound_copies (chain);
+      chains.safe_push (chain);
     }
   else
     release_chain (chain);
 }
 
 /* Find roots of the values and determine distances in components COMPS, and
-   separates the references to CHAINS.  LOOP is the current loop.  */
+   separates the references to chains.  */
 
-static void
-determine_roots (class loop *loop,
-		 struct component *comps, vec<chain_p> *chains)
+void
+pcom_worker::determine_roots (struct component *comps)
 {
   struct component *comp;
 
   for (comp = comps; comp; comp = comp->next)
-    determine_roots_comp (loop, comp, chains);
+    determine_roots_comp (comp);
 }
 
 /* Replace the reference in statement STMT with temporary variable
@@ -2027,8 +2181,8 @@ execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
    the looparound phi nodes contained in one of the chains.  If there is no
    such statement, or more statements, NULL is returned.  */
 
-static gimple *
-single_nonlooparound_use (tree name)
+gimple *
+pcom_worker::single_nonlooparound_use (tree name)
 {
   use_operand_p use;
   imm_use_iterator it;
@@ -2062,8 +2216,8 @@ single_nonlooparound_use (tree name)
 /* Remove statement STMT, as well as the chain of assignments in that it is
    used.  */
 
-static void
-remove_stmt (gimple *stmt)
+void
+pcom_worker::remove_stmt (gimple *stmt)
 {
   tree name;
   gimple *next;
@@ -2120,8 +2274,8 @@ remove_stmt (gimple *stmt)
 /* Perform the predictive commoning optimization for a chain CHAIN.
    Uids of the newly created temporary variables are marked in TMP_VARS.*/
 
-static void
-execute_pred_commoning_chain (class loop *loop, chain_p chain,
+void
+pcom_worker::execute_pred_commoning_chain (chain_p chain,
 			      bitmap tmp_vars)
 {
   unsigned i;
@@ -2248,12 +2402,11 @@ determine_unroll_factor (vec<chain_p> chains)
   return factor;
 }
 
-/* Perform the predictive commoning optimization for CHAINS.
+/* Perform the predictive commoning optimization for chains.
    Uids of the newly created temporary variables are marked in TMP_VARS.  */
 
-static void
-execute_pred_commoning (class loop *loop, vec<chain_p> chains,
-			bitmap tmp_vars)
+void
+pcom_worker::execute_pred_commoning (bitmap tmp_vars)
 {
   chain_p chain;
   unsigned i;
@@ -2263,7 +2416,7 @@ execute_pred_commoning (class loop *loop, vec<chain_p> chains,
       if (chain->type == CT_INVARIANT)
 	execute_load_motion (loop, chain, tmp_vars);
       else
-	execute_pred_commoning_chain (loop, chain, tmp_vars);
+	execute_pred_commoning_chain (chain, tmp_vars);
     }
 
   FOR_EACH_VEC_ELT (chains, i, chain)
@@ -2330,18 +2483,20 @@ struct epcc_data
 {
   vec<chain_p> chains;
   bitmap tmp_vars;
+  pcom_worker *worker;
 };
 
 static void
-execute_pred_commoning_cbck (class loop *loop, void *data)
+execute_pred_commoning_cbck (class loop *loop ATTRIBUTE_UNUSED, void *data)
 {
   struct epcc_data *const dta = (struct epcc_data *) data;
+  pcom_worker *worker = dta->worker;
 
   /* Restore phi nodes that were replaced by ssa names before
      tree_transform_and_unroll_loop (see detailed description in
      tree_predictive_commoning_loop).  */
   replace_names_by_phis (dta->chains);
-  execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
+  worker->execute_pred_commoning (dta->tmp_vars);
 }
 
 /* Base NAME and all the names in the chain of phi nodes that use it
@@ -2433,8 +2588,8 @@ chain_can_be_combined_p (chain_p chain)
    statements, NAME is replaced with the actual name used in the returned
    statement.  */
 
-static gimple *
-find_use_stmt (tree *name)
+gimple *
+pcom_worker::find_use_stmt (tree *name)
 {
   gimple *stmt;
   tree rhs, lhs;
@@ -2486,8 +2641,8 @@ may_reassociate_p (tree type, enum tree_code code)
    tree of the same operations and returns its root.  Distance to the root
    is stored in DISTANCE.  */
 
-static gimple *
-find_associative_operation_root (gimple *stmt, unsigned *distance)
+gimple *
+pcom_worker::find_associative_operation_root (gimple *stmt, unsigned *distance)
 {
   tree lhs;
   gimple *next;
@@ -2523,8 +2678,8 @@ find_associative_operation_root (gimple *stmt, unsigned *distance)
    tree formed by this operation instead of the statement that uses NAME1 or
    NAME2.  */
 
-static gimple *
-find_common_use_stmt (tree *name1, tree *name2)
+gimple *
+pcom_worker::find_common_use_stmt (tree *name1, tree *name2)
 {
   gimple *stmt1, *stmt2;
 
@@ -2553,8 +2708,8 @@ find_common_use_stmt (tree *name1, tree *name2)
    in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
    if it is true.  If CODE is ERROR_MARK, set these values instead.  */
 
-static bool
-combinable_refs_p (dref r1, dref r2,
+bool
+pcom_worker::combinable_refs_p (dref r1, dref r2,
 		   enum tree_code *code, bool *swap, tree *rslt_type)
 {
   enum tree_code acode;
@@ -2622,8 +2777,8 @@ remove_name_from_operation (gimple *stmt, tree op)
 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
    are combined in a single statement, and returns this statement.  */
 
-static gimple *
-reassociate_to_the_same_stmt (tree name1, tree name2)
+gimple *
+pcom_worker::reassociate_to_the_same_stmt (tree name1, tree name2)
 {
   gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
   gassign *new_stmt, *tmp_stmt;
@@ -2707,8 +2862,8 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
    associative and commutative operation in the same expression, reassociate
    the expression so that they are used in the same statement.  */
 
-static gimple *
-stmt_combining_refs (dref r1, dref r2)
+gimple *
+pcom_worker::stmt_combining_refs (dref r1, dref r2)
 {
   gimple *stmt1, *stmt2;
   tree name1 = name_for_ref (r1);
@@ -2725,8 +2880,8 @@ stmt_combining_refs (dref r1, dref r2)
 /* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
    description of the new chain is returned, otherwise we return NULL.  */
 
-static chain_p
-combine_chains (chain_p ch1, chain_p ch2)
+chain_p
+pcom_worker::combine_chains (chain_p ch1, chain_p ch2)
 {
   dref r1, r2, nw;
   enum tree_code op = ERROR_MARK;
@@ -2814,17 +2969,17 @@ pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
 }
 
-/* Try to combine the CHAINS in LOOP.  */
+/* Try to combine the chains.  */
 
-static void
-try_combine_chains (class loop *loop, vec<chain_p> *chains)
+void
+pcom_worker::try_combine_chains ()
 {
   unsigned i, j;
   chain_p ch1, ch2, cch;
   auto_vec<chain_p> worklist;
   bool combined_p = false;
 
-  FOR_EACH_VEC_ELT (*chains, i, ch1)
+  FOR_EACH_VEC_ELT (chains, i, ch1)
     if (chain_can_be_combined_p (ch1))
       worklist.safe_push (ch1);
 
@@ -2834,7 +2989,7 @@ try_combine_chains (class loop *loop, vec<chain_p> *chains)
       if (!chain_can_be_combined_p (ch1))
 	continue;
 
-      FOR_EACH_VEC_ELT (*chains, j, ch2)
+      FOR_EACH_VEC_ELT (chains, j, ch2)
 	{
 	  if (!chain_can_be_combined_p (ch2))
 	    continue;
@@ -2843,7 +2998,7 @@ try_combine_chains (class loop *loop, vec<chain_p> *chains)
 	  if (cch)
 	    {
 	      worklist.safe_push (cch);
-	      chains->safe_push (cch);
+	      chains.safe_push (cch);
 	      combined_p = true;
 	      break;
 	    }
@@ -2867,7 +3022,7 @@ try_combine_chains (class loop *loop, vec<chain_p> *chains)
 
      We first update position information for all combined chains.  */
   dref ref;
-  for (i = 0; chains->iterate (i, &ch1); ++i)
+  for (i = 0; chains.iterate (i, &ch1); ++i)
     {
       if (ch1->type != CT_COMBINATION || ch1->combined)
 	continue;
@@ -2878,7 +3033,7 @@ try_combine_chains (class loop *loop, vec<chain_p> *chains)
       update_pos_for_combined_chains (ch1);
     }
   /* Then sort references according to newly updated position information.  */
-  for (i = 0; chains->iterate (i, &ch1); ++i)
+  for (i = 0; chains.iterate (i, &ch1); ++i)
     {
       if (ch1->type != CT_COMBINATION && !ch1->combined)
 	continue;
@@ -2990,11 +3145,11 @@ prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
   return true;
 }
 
-/* Prepare initializers for CHAIN in LOOP.  Returns false if this is
-   impossible because one of these initializers may trap, true otherwise.  */
+/* Prepare initializers for CHAIN.  Returns false if this is impossible
+   because one of these initializers may trap, true otherwise.  */
 
-static bool
-prepare_initializers_chain (class loop *loop, chain_p chain)
+bool
+pcom_worker::prepare_initializers_chain (chain_p chain)
 {
   unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
   struct data_reference *dr = get_chain_root (chain)->ref;
@@ -3046,11 +3201,11 @@ prepare_initializers_chain (class loop *loop, chain_p chain)
   return true;
 }
 
-/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
+/* Prepare initializers for chains, and free chains that cannot
    be used because the initializers might trap.  */
 
-static void
-prepare_initializers (class loop *loop, vec<chain_p> chains)
+void
+pcom_worker::prepare_initializers ()
 {
   chain_p chain;
   unsigned i;
@@ -3058,7 +3213,7 @@ prepare_initializers (class loop *loop, vec<chain_p> chains)
   for (i = 0; i < chains.length (); )
     {
       chain = chains[i];
-      if (prepare_initializers_chain (loop, chain))
+      if (prepare_initializers_chain (chain))
 	i++;
       else
 	{
@@ -3068,11 +3223,11 @@ prepare_initializers (class loop *loop, vec<chain_p> chains)
     }
 }
 
-/* Generates finalizer memory references for CHAIN in LOOP.  Returns true
+/* Generates finalizer memory references for CHAIN.  Returns true
    if finalizer code for CHAIN can be generated, otherwise false.  */
 
-static bool
-prepare_finalizers_chain (class loop *loop, chain_p chain)
+bool
+pcom_worker::prepare_finalizers_chain (chain_p chain)
 {
   unsigned i, n = chain->length;
   struct data_reference *dr = get_chain_root (chain)->ref;
@@ -3116,11 +3271,11 @@ prepare_finalizers_chain (class loop *loop, chain_p chain)
   return true;
 }
 
-/* Generates finalizer memory reference for CHAINS in LOOP.  Returns true
-   if finalizer code generation for CHAINS breaks loop closed ssa form.  */
+/* Generates finalizer memory reference for chains.  Returns true if
+   finalizer code generation for chains breaks loop closed ssa form.  */
 
-static bool
-prepare_finalizers (class loop *loop, vec<chain_p> chains)
+bool
+pcom_worker::prepare_finalizers ()
 {
   chain_p chain;
   unsigned i;
@@ -3138,7 +3293,7 @@ prepare_finalizers (class loop *loop, vec<chain_p> chains)
 	  continue;
 	}
 
-      if (prepare_finalizers_chain (loop, chain))
+      if (prepare_finalizers_chain (chain))
 	{
 	  i++;
 	  /* Be conservative, assume loop closed ssa form is corrupted
@@ -3156,7 +3311,7 @@ prepare_finalizers (class loop *loop, vec<chain_p> chains)
   return loop_closed_ssa;
 }
 
-/* Insert all initializing gimple stmts into loop's entry edge.  */
+/* Insert all initializing gimple stmts into LOOP's entry edge.  */
 
 static void
 insert_init_seqs (class loop *loop, vec<chain_p> chains)
@@ -3177,19 +3332,16 @@ insert_init_seqs (class loop *loop, vec<chain_p> chains)
    form was corrupted.  Non-zero return value indicates some changes were
    applied to this loop.  */
 
-static unsigned
-tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
+unsigned
+pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
 {
-  vec<data_reference_p> datarefs;
-  vec<ddr_p> dependences;
   struct component *components;
-  vec<chain_p> chains = vNULL;
   unsigned unroll_factor = 0;
   class tree_niter_desc desc;
   bool unroll = false, loop_closed_ssa = false;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Processing loop %d\n",  loop->num);
+    fprintf (dump_file, "Processing loop %d\n", loop->num);
 
   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
   if (get_max_loop_iterations_int (loop) == 0)
@@ -3203,30 +3355,22 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
   /* Find the data references and split them into components according to their
      dependence relations.  */
   auto_vec<loop_p, 3> loop_nest;
-  dependences.create (10);
-  datarefs.create (10);
-  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-					   &dependences))
+  if (!compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
+					  &dependences))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Cannot analyze data dependencies\n");
-      free_data_refs (datarefs);
-      free_dependence_relations (dependences);
       return 0;
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     dump_data_dependence_relations (dump_file, dependences);
 
-  components = split_data_refs_to_components (loop, datarefs, dependences);
+  components = split_data_refs_to_components ();
+
   loop_nest.release ();
-  free_dependence_relations (dependences);
   if (!components)
-    {
-      free_data_refs (datarefs);
-      free_affine_expand_cache (&name_expansions);
-      return 0;
-    }
+    return 0;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -3235,34 +3379,25 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
     }
 
   /* Find the suitable components and split them into chains.  */
-  components = filter_suitable_components (loop, components);
+  components = filter_suitable_components (components);
 
   auto_bitmap tmp_vars;
-  looparound_phis = BITMAP_ALLOC (NULL);
-  determine_roots (loop, components, &chains);
+  determine_roots (components);
   release_components (components);
 
-  auto cleanup = [&]() {
-    release_chains (chains);
-    free_data_refs (datarefs);
-    BITMAP_FREE (looparound_phis);
-    free_affine_expand_cache (&name_expansions);
-  };
-
   if (!chains.exists ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Predictive commoning failed: no suitable chains\n");
-      cleanup ();
       return 0;
     }
 
-  prepare_initializers (loop, chains);
-  loop_closed_ssa = prepare_finalizers (loop, chains);
+  prepare_initializers ();
+  loop_closed_ssa = prepare_finalizers ();
 
   /* Try to combine the chains that are always worked with together.  */
-  try_combine_chains (loop, &chains);
+  try_combine_chains ();
 
   insert_init_seqs (loop, chains);
 
@@ -3289,8 +3424,9 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
 
-      dta.chains = chains;
       dta.tmp_vars = tmp_vars;
+      dta.chains = chains;
+      dta.worker = this;
 
       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
 	 execute_pred_commoning_cbck is called may cause phi nodes to be
@@ -3310,11 +3446,9 @@ tree_predictive_commoning_loop (class loop *loop, bool allow_unroll_p)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Executing predictive commoning without unrolling.\n");
-      execute_pred_commoning (loop, chains, tmp_vars);
+      execute_pred_commoning (tmp_vars);
     }
 
-  cleanup ();
-
   return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
 }
 
@@ -3330,7 +3464,8 @@ tree_predictive_commoning (bool allow_unroll_p)
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     if (optimize_loop_for_speed_p (loop))
       {
-	changed |= tree_predictive_commoning_loop (loop, allow_unroll_p);
+	pcom_worker w(loop);
+	changed |= w.tree_predictive_commoning_loop (allow_unroll_p);
       }
   free_original_copy_tables ();
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: predcom: Refactor more by encapsulating global states
  2021-06-22  2:35       ` predcom: Refactor more by encapsulating global states Kewen.Lin
@ 2021-06-22 16:14         ` Martin Sebor
  2021-06-24  9:26           ` Kewen.Lin
  2021-06-23  7:22         ` predcom: Refactor more by encapsulating global states Richard Biener
  1 sibling, 1 reply; 16+ messages in thread
From: Martin Sebor @ 2021-06-22 16:14 UTC (permalink / raw)
  To: Kewen.Lin, Richard Biener; +Cc: GCC Patches, Segher Boessenkool, Bill Schmidt

On 6/21/21 8:35 PM, Kewen.Lin wrote:
> Hi Richi and Martin,
> 
>>>
>>> Thanks Richi!  One draft (not ready for review) is attached for the further
>>> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
>>> Martin suggested stepping forward to make tree_predictive_commoning_loop
>>> and its callees into one class (Thanks Martin), since there are not many
>>> this kind of C++-style work functions, I want to double confirm which option
>>> do you guys prefer?
>>>
>>
>> Such general cleanup is of course desired - Giuliano started some of it within
>> GSoC two years ago in the attempt to thread the compilation process.  The
>> cleanup then helps to get rid of global state which of course interferes here
>> (and avoids unnecessary use of TLS vars).
>>
>> So yes, encapsulating global state into a class and making accessors
>> member functions is something that is desired (but a lot of mechanical
>> work).
>>
>> Thanks
>> Richard.
>>
>> I meant that not necessarily as something to include in this patch
>> but as a suggestion for a future improvement.  If you'd like to
>> tackle it at any point that would be great of course   In any
>> event, thanks for double-checking!
>>
>> The attached patch looks good to me as well (more for the sake of
>> style than anything else, declaring the class copy ctor and copy assignment = delete would > make it clear it's not meant to be
>> copied, although in this case it's unlikely to make a practical
>> difference).
>>
>> Martin.
> 
> 
> Thanks for your explanation!  Sorry for the late response.
> As the way to encapsulate global state into a class and making accessors
> member functions looks more complete, I gave up the RAII draft and
> switched onto this way.
> 
> This patch is to encapsulate global states into a class and
> making their accessors as member functions, remove some
> consequent useless clean up code, and do some clean up with
> RAII.

Nice!

A further improvement worth considering (if you're so inclined :)
is replacing the pcom_worker vec members with auto_vec (obviating
having to explicitly release them) and for the same reason also
replacing the comp_ptrs bare pointer members with auto_vecs.
There may be other opportunities to do the same in individual
functions (I'd look to get rid of as many calls to functions
like XNEW()/XNEWVEC() and free() use auto_vec instead).

An unrelated but worthwhile change is to replace the FOR_EACH_
loops with C++ 11 range loops, analogously to:
https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html

Finally, the only loosely followed naming convention for member
variables is to start them with the m_ prefix.

These just suggestions that could be done in a followup, not
something I would consider prerequisite for accepting the patch
as is if I were in a position to make such a decision.

Martin

> 
> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu, also
> bootstrapped on ppc64le P9 with bootstrap-O3 config.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -----
> gcc/ChangeLog:
> 
> 	* tree-predcom.c (class pcom_worker): New class.
> 	(release_chain): Renamed to...
> 	(pcom_worker::release_chain): ...this.
> 	(release_chains): Renamed to...
> 	(pcom_worker::release_chains): ...this.
> 	(aff_combination_dr_offset): Renamed to...
> 	(pcom_worker::aff_combination_dr_offset): ...this.
> 	(determine_offset): Renamed to...
> 	(pcom_worker::determine_offset): ...this.
> 	(class comp_ptrs): New class.
> 	(split_data_refs_to_components): Renamed to...
> 	(pcom_worker::split_data_refs_to_components): ...this,
> 	and update with class comp_ptrs.
> 	(suitable_component_p): Renamed to...
> 	(pcom_worker::suitable_component_p): ...this.
> 	(filter_suitable_components): Renamed to...
> 	(pcom_worker::filter_suitable_components): ...this.
> 	(valid_initializer_p): Renamed to...
> 	(pcom_worker::valid_initializer_p): ...this.
> 	(find_looparound_phi): Renamed to...
> 	(pcom_worker::find_looparound_phi): ...this.
> 	(add_looparound_copies): Renamed to...
> 	(pcom_worker::add_looparound_copies): ...this.
> 	(determine_roots_comp): Renamed to...
> 	(pcom_worker::determine_roots_comp): ...this.
> 	(determine_roots): Renamed to...
> 	(pcom_worker::determine_roots): ...this.
> 	(single_nonlooparound_use): Renamed to...
> 	(pcom_worker::single_nonlooparound_use): ...this.
> 	(remove_stmt): Renamed to...
> 	(pcom_worker::remove_stmt): ...this.
> 	(execute_pred_commoning_chain): Renamed to...
> 	(pcom_worker::execute_pred_commoning_chain): ...this.
> 	(execute_pred_commoning): Renamed to...
> 	(pcom_worker::execute_pred_commoning): ...this.
> 	(struct epcc_data): New member worker.
> 	(execute_pred_commoning_cbck): Call execute_pred_commoning
> 	with pcom_worker pointer.
> 	(find_use_stmt): Renamed to...
> 	(pcom_worker::find_use_stmt): ...this.
> 	(find_associative_operation_root): Renamed to...
> 	(pcom_worker::find_associative_operation_root): ...this.
> 	(find_common_use_stmt): Renamed to...
> 	(pcom_worker::find_common_use_stmt): ...this.
> 	(combinable_refs_p): Renamed to...
> 	(pcom_worker::combinable_refs_p): ...this.
> 	(reassociate_to_the_same_stmt): Renamed to...
> 	(pcom_worker::reassociate_to_the_same_stmt): ...this.
> 	(stmt_combining_refs): Renamed to...
> 	(pcom_worker::stmt_combining_refs): ...this.
> 	(combine_chains): Renamed to...
> 	(pcom_worker::combine_chains): ...this.
> 	(try_combine_chains): Renamed to...
> 	(pcom_worker::try_combine_chains): ...this.
> 	(prepare_initializers_chain): Renamed to...
> 	(pcom_worker::prepare_initializers_chain): ...this.
> 	(prepare_initializers): Renamed to...
> 	(pcom_worker::prepare_initializers): ...this.
> 	(prepare_finalizers_chain): Renamed to...
> 	(pcom_worker::prepare_finalizers_chain): ...this.
> 	(prepare_finalizers): Renamed to...
> 	(pcom_worker::prepare_finalizers): ...this.
> 	(tree_predictive_commoning_loop): Renamed to...
> 	(pcom_worker::tree_predictive_commoning_loop): ...this, adjust
> 	some calls and remove some cleanup code.
> 	(tree_predictive_commoning): Adjusted to use pcom_worker instance.
> 	(static variable looparound_phis): Remove.
> 	(static variable name_expansions): Remove.
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: predcom: Refactor more by encapsulating global states
  2021-06-22  2:35       ` predcom: Refactor more by encapsulating global states Kewen.Lin
  2021-06-22 16:14         ` Martin Sebor
@ 2021-06-23  7:22         ` Richard Biener
  2021-06-24  9:28           ` Kewen.Lin
  1 sibling, 1 reply; 16+ messages in thread
From: Richard Biener @ 2021-06-23  7:22 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Martin Sebor, GCC Patches, Segher Boessenkool, Bill Schmidt

On Tue, Jun 22, 2021 at 4:35 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi Richi and Martin,
>
> >>
> >> Thanks Richi!  One draft (not ready for review) is attached for the further
> >> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
> >> Martin suggested stepping forward to make tree_predictive_commoning_loop
> >> and its callees into one class (Thanks Martin), since there are not many
> >> this kind of C++-style work functions, I want to double confirm which option
> >> do you guys prefer?
> >>
> >
> > Such general cleanup is of course desired - Giuliano started some of it within
> > GSoC two years ago in the attempt to thread the compilation process.  The
> > cleanup then helps to get rid of global state which of course interferes here
> > (and avoids unnecessary use of TLS vars).
> >
> > So yes, encapsulating global state into a class and making accessors
> > member functions is something that is desired (but a lot of mechanical
> > work).
> >
> > Thanks
> > Richard.
> >
> > I meant that not necessarily as something to include in this patch
> > but as a suggestion for a future improvement.  If you'd like to
> > tackle it at any point that would be great of course   In any
> > event, thanks for double-checking!
> >
> > The attached patch looks good to me as well (more for the sake of
> > style than anything else, declaring the class copy ctor and copy assignment = delete would > make it clear it's not meant to be
> > copied, although in this case it's unlikely to make a practical
> > difference).
> >
> > Martin.
>
>
> Thanks for your explanation!  Sorry for the late response.
> As the way to encapsulate global state into a class and making accessors
> member functions looks more complete, I gave up the RAII draft and
> switched onto this way.
>
> This patch is to encapsulate global states into a class and
> making their accessors as member functions, remove some
> consequent useless clean up code, and do some clean up with
> RAII.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu, also
> bootstrapped on ppc64le P9 with bootstrap-O3 config.
>
> Is it ok for trunk?

OK, thanks for the work - Martins suggestions are good but indeed can be
handled as followup.

Richard.

> BR,
> Kewen
> -----
> gcc/ChangeLog:
>
>         * tree-predcom.c (class pcom_worker): New class.
>         (release_chain): Renamed to...
>         (pcom_worker::release_chain): ...this.
>         (release_chains): Renamed to...
>         (pcom_worker::release_chains): ...this.
>         (aff_combination_dr_offset): Renamed to...
>         (pcom_worker::aff_combination_dr_offset): ...this.
>         (determine_offset): Renamed to...
>         (pcom_worker::determine_offset): ...this.
>         (class comp_ptrs): New class.
>         (split_data_refs_to_components): Renamed to...
>         (pcom_worker::split_data_refs_to_components): ...this,
>         and update with class comp_ptrs.
>         (suitable_component_p): Renamed to...
>         (pcom_worker::suitable_component_p): ...this.
>         (filter_suitable_components): Renamed to...
>         (pcom_worker::filter_suitable_components): ...this.
>         (valid_initializer_p): Renamed to...
>         (pcom_worker::valid_initializer_p): ...this.
>         (find_looparound_phi): Renamed to...
>         (pcom_worker::find_looparound_phi): ...this.
>         (add_looparound_copies): Renamed to...
>         (pcom_worker::add_looparound_copies): ...this.
>         (determine_roots_comp): Renamed to...
>         (pcom_worker::determine_roots_comp): ...this.
>         (determine_roots): Renamed to...
>         (pcom_worker::determine_roots): ...this.
>         (single_nonlooparound_use): Renamed to...
>         (pcom_worker::single_nonlooparound_use): ...this.
>         (remove_stmt): Renamed to...
>         (pcom_worker::remove_stmt): ...this.
>         (execute_pred_commoning_chain): Renamed to...
>         (pcom_worker::execute_pred_commoning_chain): ...this.
>         (execute_pred_commoning): Renamed to...
>         (pcom_worker::execute_pred_commoning): ...this.
>         (struct epcc_data): New member worker.
>         (execute_pred_commoning_cbck): Call execute_pred_commoning
>         with pcom_worker pointer.
>         (find_use_stmt): Renamed to...
>         (pcom_worker::find_use_stmt): ...this.
>         (find_associative_operation_root): Renamed to...
>         (pcom_worker::find_associative_operation_root): ...this.
>         (find_common_use_stmt): Renamed to...
>         (pcom_worker::find_common_use_stmt): ...this.
>         (combinable_refs_p): Renamed to...
>         (pcom_worker::combinable_refs_p): ...this.
>         (reassociate_to_the_same_stmt): Renamed to...
>         (pcom_worker::reassociate_to_the_same_stmt): ...this.
>         (stmt_combining_refs): Renamed to...
>         (pcom_worker::stmt_combining_refs): ...this.
>         (combine_chains): Renamed to...
>         (pcom_worker::combine_chains): ...this.
>         (try_combine_chains): Renamed to...
>         (pcom_worker::try_combine_chains): ...this.
>         (prepare_initializers_chain): Renamed to...
>         (pcom_worker::prepare_initializers_chain): ...this.
>         (prepare_initializers): Renamed to...
>         (pcom_worker::prepare_initializers): ...this.
>         (prepare_finalizers_chain): Renamed to...
>         (pcom_worker::prepare_finalizers_chain): ...this.
>         (prepare_finalizers): Renamed to...
>         (pcom_worker::prepare_finalizers): ...this.
>         (tree_predictive_commoning_loop): Renamed to...
>         (pcom_worker::tree_predictive_commoning_loop): ...this, adjust
>         some calls and remove some cleanup code.
>         (tree_predictive_commoning): Adjusted to use pcom_worker instance.
>         (static variable looparound_phis): Remove.
>         (static variable name_expansions): Remove.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: predcom: Refactor more by encapsulating global states
  2021-06-22 16:14         ` Martin Sebor
@ 2021-06-24  9:26           ` Kewen.Lin
  2021-07-19  6:28             ` [PATCH] predcom: Refactor more using auto_vec Kewen.Lin
  0 siblings, 1 reply; 16+ messages in thread
From: Kewen.Lin @ 2021-06-24  9:26 UTC (permalink / raw)
  To: Martin Sebor
  Cc: GCC Patches, Segher Boessenkool, Bill Schmidt, Richard Biener

Hi Martin,

on 2021/6/23 上午12:14, Martin Sebor wrote:
> On 6/21/21 8:35 PM, Kewen.Lin wrote:
>> Hi Richi and Martin,
>>
>>>>
>>>> Thanks Richi!  One draft (not ready for review) is attached for the further
>>>> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
>>>> Martin suggested stepping forward to make tree_predictive_commoning_loop
>>>> and its callees into one class (Thanks Martin), since there are not many
>>>> this kind of C++-style work functions, I want to double confirm which option
>>>> do you guys prefer?
>>>>
>>>
>>> Such general cleanup is of course desired - Giuliano started some of it within
>>> GSoC two years ago in the attempt to thread the compilation process.  The
>>> cleanup then helps to get rid of global state which of course interferes here
>>> (and avoids unnecessary use of TLS vars).
>>>
>>> So yes, encapsulating global state into a class and making accessors
>>> member functions is something that is desired (but a lot of mechanical
>>> work).
>>>
>>> Thanks
>>> Richard.
>>>
>>> I meant that not necessarily as something to include in this patch
>>> but as a suggestion for a future improvement.  If you'd like to
>>> tackle it at any point that would be great of course   In any
>>> event, thanks for double-checking!
>>>
>>> The attached patch looks good to me as well (more for the sake of
>>> style than anything else, declaring the class copy ctor and copy assignment = delete would > make it clear it's not meant to be
>>> copied, although in this case it's unlikely to make a practical
>>> difference).
>>>
>>> Martin.
>>
>>
>> Thanks for your explanation!  Sorry for the late response.
>> As the way to encapsulate global state into a class and making accessors
>> member functions looks more complete, I gave up the RAII draft and
>> switched onto this way.
>>
>> This patch is to encapsulate global states into a class and
>> making their accessors as member functions, remove some
>> consequent useless clean up code, and do some clean up with
>> RAII.
> 
> Nice!
> 
> A further improvement worth considering (if you're so inclined :)
> is replacing the pcom_worker vec members with auto_vec (obviating
> having to explicitly release them) and for the same reason also
> replacing the comp_ptrs bare pointer members with auto_vecs.
> There may be other opportunities to do the same in individual
> functions (I'd look to get rid of as many calls to functions
> like XNEW()/XNEWVEC() and free() use auto_vec instead).
> 
> An unrelated but worthwhile change is to replace the FOR_EACH_
> loops with C++ 11 range loops, analogously to:
> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html
> 
> Finally, the only loosely followed naming convention for member
> variables is to start them with the m_ prefix.
> 
> These just suggestions that could be done in a followup, not
> something I would consider prerequisite for accepting the patch
> as is if I were in a position to make such a decision.
> 

Many thanks for all the great suggestions!  I'll deal with them
in a follow up patch.


BR,
Kewen

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: predcom: Refactor more by encapsulating global states
  2021-06-23  7:22         ` predcom: Refactor more by encapsulating global states Richard Biener
@ 2021-06-24  9:28           ` Kewen.Lin
  0 siblings, 0 replies; 16+ messages in thread
From: Kewen.Lin @ 2021-06-24  9:28 UTC (permalink / raw)
  To: Richard Biener
  Cc: Martin Sebor, GCC Patches, Segher Boessenkool, Bill Schmidt

on 2021/6/23 下午3:22, Richard Biener wrote:
> On Tue, Jun 22, 2021 at 4:35 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>>
>> Hi Richi and Martin,
>>
>>>>
>>>> Thanks Richi!  One draft (not ready for review) is attached for the further
>>>> discussion.  It follows the idea of RAII-style cleanup.  I noticed that
>>>> Martin suggested stepping forward to make tree_predictive_commoning_loop
>>>> and its callees into one class (Thanks Martin), since there are not many
>>>> this kind of C++-style work functions, I want to double confirm which option
>>>> do you guys prefer?
>>>>
>>>
>>> Such general cleanup is of course desired - Giuliano started some of it within
>>> GSoC two years ago in the attempt to thread the compilation process.  The
>>> cleanup then helps to get rid of global state which of course interferes here
>>> (and avoids unnecessary use of TLS vars).
>>>
>>> So yes, encapsulating global state into a class and making accessors
>>> member functions is something that is desired (but a lot of mechanical
>>> work).
>>>
>>> Thanks
>>> Richard.
>>>
>>> I meant that not necessarily as something to include in this patch
>>> but as a suggestion for a future improvement.  If you'd like to
>>> tackle it at any point that would be great of course   In any
>>> event, thanks for double-checking!
>>>
>>> The attached patch looks good to me as well (more for the sake of
>>> style than anything else, declaring the class copy ctor and copy assignment = delete would > make it clear it's not meant to be
>>> copied, although in this case it's unlikely to make a practical
>>> difference).
>>>
>>> Martin.
>>
>>
>> Thanks for your explanation!  Sorry for the late response.
>> As the way to encapsulate global state into a class and making accessors
>> member functions looks more complete, I gave up the RAII draft and
>> switched onto this way.
>>
>> This patch is to encapsulate global states into a class and
>> making their accessors as member functions, remove some
>> consequent useless clean up code, and do some clean up with
>> RAII.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P9,
>> x86_64-redhat-linux and aarch64-linux-gnu, also
>> bootstrapped on ppc64le P9 with bootstrap-O3 config.
>>
>> Is it ok for trunk?
> 
> OK, thanks for the work - Martins suggestions are good but indeed can be
> handled as followup.
> 

Thanks Richi!  Re-tested and committed in r12-1767, I will make up a
follow up patch for Martin's suggestions.

BR,
Kewen

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] predcom: Refactor more using auto_vec
  2021-06-24  9:26           ` Kewen.Lin
@ 2021-07-19  6:28             ` Kewen.Lin
  2021-07-19 20:45               ` Martin Sebor
  2021-07-20 11:19               ` Richard Biener
  0 siblings, 2 replies; 16+ messages in thread
From: Kewen.Lin @ 2021-07-19  6:28 UTC (permalink / raw)
  To: Martin Sebor, Richard Biener
  Cc: Bill Schmidt, GCC Patches, Segher Boessenkool

[-- Attachment #1: Type: text/plain, Size: 4192 bytes --]

Hi Martin & Richard,

>> A further improvement worth considering (if you're so inclined :)
>> is replacing the pcom_worker vec members with auto_vec (obviating
>> having to explicitly release them) and for the same reason also
>> replacing the comp_ptrs bare pointer members with auto_vecs.
>> There may be other opportunities to do the same in individual
>> functions (I'd look to get rid of as many calls to functions
>> like XNEW()/XNEWVEC() and free() use auto_vec instead).
>>
>> An unrelated but worthwhile change is to replace the FOR_EACH_
>> loops with C++ 11 range loops, analogously to:
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html
>>
>> Finally, the only loosely followed naming convention for member
>> variables is to start them with the m_ prefix.
>>
>> These just suggestions that could be done in a followup, not
>> something I would consider prerequisite for accepting the patch
>> as is if I were in a position to make such a decision.
>>

Sorry for the late update, this patch follows your previous
advices to refactor it more by:
  - Adding m_ prefix for class pcom_worker member variables.
  - Using auto_vec instead of vec among class pcom_worker,
    chain, component and comp_ptrs.

btw, the changes in tree-data-ref.[ch] is required, without
it the destruction of auto_vec instance could try to double
free the memory pointed by m_vec.

The suggestion on range loops is addressed by one separated
patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575536.html

Bootstrapped and regtested on powerpc64le-linux-gnu P9,
x86_64-redhat-linux and aarch64-linux-gnu, also
bootstrapped on ppc64le P9 with bootstrap-O3 config.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	* tree-data-ref.c (free_dependence_relations): Adjust to pass vec by
	reference.
	(free_data_refs): Likewise.
	* tree-data-ref.h (free_dependence_relations): Likewise.
	(free_data_refs): Likewise.
	* tree-predcom.c (struct chain): Use auto_vec instead of vec for
	members.
	(struct component): Likewise.
	(pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes.
	(pcom_worker::~pcom_worker): Likewise.
	(pcom_worker::release_chain): Adjust as auto_vec changes.
	(pcom_worker::loop): Rename to ...
	(pcom_worker::m_loop): ... this.
	(pcom_worker::datarefs): Rename to ...
	(pcom_worker::m_datarefs): ... this.  Use auto_vec instead of vec.
	(pcom_worker::dependences): Rename to ...
	(pcom_worker::m_dependences): ... this.  Use auto_vec instead of vec.
	(pcom_worker::chains): Rename to ...
	(pcom_worker::m_chains): ... this.  Use auto_vec instead of vec.
	(pcom_worker::looparound_phis): Rename to ...
	(pcom_worker::m_looparound_phis): ... this.  Use auto_vec instead of
	vec.
	(pcom_worker::cache): Rename to ...
	(pcom_worker::m_cache): ... this.  Use auto_vec instead of vec.
	(pcom_worker::release_chain): Adjust for auto_vec changes.
	(pcom_worker::release_chains): Adjust for auto_vec and renaming
	changes.
	(release_component): Remove.
	(release_components): Adjust for release_component removal.
	(component_of): Adjust to use vec.
	(merge_comps): Likewise.
	(pcom_worker::aff_combination_dr_offset): Adjust for renaming changes.
	(pcom_worker::determine_offset): Likewise.
	(class comp_ptrs): Remove.
	(pcom_worker::split_data_refs_to_components): Adjust for renaming
	changes, for comp_ptrs removal with auto_vec.
	(pcom_worker::suitable_component_p): Adjust for renaming changes.
	(pcom_worker::filter_suitable_components): Adjust for release_component
	removal.
	(pcom_worker::valid_initializer_p): Adjust for renaming changes.
	(pcom_worker::find_looparound_phi): Likewise.
	(pcom_worker::add_looparound_copies): Likewise.
	(pcom_worker::determine_roots_comp): Likewise.
	(pcom_worker::single_nonlooparound_use): Likewise.
	(pcom_worker::execute_pred_commoning_chain): Likewise.
	(pcom_worker::execute_pred_commoning): Likewise.
	(pcom_worker::try_combine_chains): Likewise.
	(pcom_worker::prepare_initializers_chain): Likewise.
	(pcom_worker::prepare_initializers): Likewise.
	(pcom_worker::prepare_finalizers_chain): Likewise.
	(pcom_worker::prepare_finalizers): Likewise.
	(pcom_worker::tree_predictive_commoning_loop): Likewise.

[-- Attachment #2: 0001-predcom-Refactor-more-using-auto_vec.patch --]
[-- Type: text/plain, Size: 26272 bytes --]

---
 gcc/tree-data-ref.c |   4 +-
 gcc/tree-data-ref.h |   4 +-
 gcc/tree-predcom.c  | 248 +++++++++++++++++++-------------------------
 3 files changed, 108 insertions(+), 148 deletions(-)

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index b6abd8b8de7..d78ddb0472e 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -6208,7 +6208,7 @@ free_dependence_relation (struct data_dependence_relation *ddr)
    DEPENDENCE_RELATIONS.  */
 
 void
-free_dependence_relations (vec<ddr_p> dependence_relations)
+free_dependence_relations (vec<ddr_p>& dependence_relations)
 {
   for (data_dependence_relation *ddr : dependence_relations)
     if (ddr)
@@ -6220,7 +6220,7 @@ free_dependence_relations (vec<ddr_p> dependence_relations)
 /* Free the memory used by the data references from DATAREFS.  */
 
 void
-free_data_refs (vec<data_reference_p> datarefs)
+free_data_refs (vec<data_reference_p>& datarefs)
 {
   for (data_reference *dr : datarefs)
     free_data_ref (dr);
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 8001cc54f51..a43cd52df8c 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -534,9 +534,9 @@ extern void debug (vec<ddr_p> &ref);
 extern void debug (vec<ddr_p> *ptr);
 extern void debug_data_dependence_relations (vec<ddr_p> );
 extern void free_dependence_relation (struct data_dependence_relation *);
-extern void free_dependence_relations (vec<ddr_p> );
+extern void free_dependence_relations (vec<ddr_p>& );
 extern void free_data_ref (data_reference_p);
-extern void free_data_refs (vec<data_reference_p> );
+extern void free_data_refs (vec<data_reference_p>& );
 extern opt_result find_data_references_in_stmt (class loop *, gimple *,
 						vec<data_reference_p> *);
 extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *,
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index a4ebf2261b0..cf85517e1c7 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -306,19 +306,19 @@ typedef struct chain
   struct chain *ch1, *ch2;
 
   /* The references in the chain.  */
-  vec<dref> refs;
+  auto_vec<dref> refs;
 
   /* The maximum distance of the reference in the chain from the root.  */
   unsigned length;
 
   /* The variables used to copy the value throughout iterations.  */
-  vec<tree> vars;
+  auto_vec<tree> vars;
 
   /* Initializers for the variables.  */
-  vec<tree> inits;
+  auto_vec<tree> inits;
 
   /* Finalizers for the eliminated stores.  */
-  vec<tree> finis;
+  auto_vec<tree> finis;
 
   /* gimple stmts intializing the initial variables of the chain.  */
   gimple_seq init_seq;
@@ -362,7 +362,7 @@ enum ref_step_type
 struct component
 {
   /* The references in the component.  */
-  vec<dref> refs;
+  auto_vec<dref> refs;
 
   /* What we know about the step of the references in the component.  */
   enum ref_step_type comp_step;
@@ -381,17 +381,13 @@ struct component
 class pcom_worker
 {
 public:
-  pcom_worker (loop_p l) : loop (l), chains (vNULL), cache (NULL)
-  {
-    dependences.create (10);
-    datarefs.create (10);
-  }
+  pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {}
 
   ~pcom_worker ()
   {
-    free_data_refs (datarefs);
-    free_dependence_relations (dependences);
-    free_affine_expand_cache (&cache);
+    free_data_refs (m_datarefs);
+    free_dependence_relations (m_dependences);
+    free_affine_expand_cache (&m_cache);
     release_chains ();
   }
 
@@ -407,23 +403,24 @@ public:
 
 private:
   /* The pointer to the given loop.  */
-  loop_p loop;
+  loop_p m_loop;
 
   /* All data references.  */
-  vec<data_reference_p> datarefs;
+  auto_vec<data_reference_p, 10> m_datarefs;
 
   /* All data dependences.  */
-  vec<ddr_p> dependences;
+  auto_vec<ddr_p, 10> m_dependences;
 
   /* All chains.  */
-  vec<chain_p> chains;
+  auto_vec<chain_p> m_chains;
 
   /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
-  auto_bitmap looparound_phis;
+  auto_bitmap m_looparound_phis;
 
   typedef hash_map<tree, name_expansion *> tree_expand_map_t;
   /* Cache used by tree_to_aff_combination_expand.  */
-  tree_expand_map_t *cache;
+  tree_expand_map_t *m_cache;
+
   /* Splits dependence graph to components.  */
   struct component *split_data_refs_to_components ();
 
@@ -695,13 +692,9 @@ pcom_worker::release_chain (chain_p chain)
   FOR_EACH_VEC_ELT (chain->refs, i, ref)
     free (ref);
 
-  chain->refs.release ();
-  chain->vars.release ();
-  chain->inits.release ();
   if (chain->init_seq)
     gimple_seq_discard (chain->init_seq);
 
-  chain->finis.release ();
   if (chain->fini_seq)
     gimple_seq_discard (chain->fini_seq);
 
@@ -716,18 +709,8 @@ pcom_worker::release_chains ()
   unsigned i;
   chain_p chain;
 
-  FOR_EACH_VEC_ELT (chains, i, chain)
+  FOR_EACH_VEC_ELT (m_chains, i, chain)
     release_chain (chain);
-  chains.release ();
-}
-
-/* Frees a component COMP.  */
-
-static void
-release_component (struct component *comp)
-{
-  comp->refs.release ();
-  free (comp);
 }
 
 /* Frees list of components COMPS.  */
@@ -740,7 +723,7 @@ release_components (struct component *comps)
   for (act = comps; act; act = next)
     {
       next = act->next;
-      release_component (act);
+      XDELETE (act);
     }
 }
 
@@ -748,7 +731,7 @@ release_components (struct component *comps)
    shortening.  */
 
 static unsigned
-component_of (unsigned fathers[], unsigned a)
+component_of (vec<unsigned> &fathers, unsigned a)
 {
   unsigned root, n;
 
@@ -768,7 +751,8 @@ component_of (unsigned fathers[], unsigned a)
    components, A and B are components to merge.  */
 
 static void
-merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
+merge_comps (vec<unsigned> &fathers, vec<unsigned> &sizes,
+	     unsigned a, unsigned b)
 {
   unsigned ca = component_of (fathers, a);
   unsigned cb = component_of (fathers, b);
@@ -822,7 +806,7 @@ pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
   tree type = TREE_TYPE (DR_OFFSET (dr));
   aff_tree delta;
 
-  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &cache);
+  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache);
   aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
   aff_combination_add (offset, &delta);
 }
@@ -869,7 +853,7 @@ pcom_worker::determine_offset (struct data_reference *a,
   aff_combination_add (&diff, &baseb);
 
   tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
-				  &step, &cache);
+				  &step, &m_cache);
   return aff_combination_constant_multiple_p (&diff, &step, off);
 }
 
@@ -890,50 +874,28 @@ last_always_executed_block (class loop *loop)
   return last;
 }
 
-/* RAII class for comp_father and comp_size usage.  */
-
-class comp_ptrs
-{
-public:
-  unsigned *comp_father;
-  unsigned *comp_size;
-
-  comp_ptrs (unsigned n)
-  {
-    comp_father = XNEWVEC (unsigned, n + 1);
-    comp_size = XNEWVEC (unsigned, n + 1);
-  }
-
-  ~comp_ptrs ()
-  {
-    free (comp_father);
-    free (comp_size);
-  }
-
-  comp_ptrs (const comp_ptrs &) = delete;
-  comp_ptrs &operator= (const comp_ptrs &) = delete;
-};
-
 /* Splits dependence graph on DATAREFS described by DEPENDENCES to
    components.  */
 
 struct component *
 pcom_worker::split_data_refs_to_components ()
 {
-  unsigned i, n = datarefs.length ();
+  unsigned i, n = m_datarefs.length ();
   unsigned ca, ia, ib, bad;
-  comp_ptrs ptrs (n);
-  struct component **comps;
   struct data_reference *dr, *dra, *drb;
   struct data_dependence_relation *ddr;
   struct component *comp_list = NULL, *comp;
   dref dataref;
   /* Don't do store elimination if loop has multiple exit edges.  */
-  bool eliminate_store_p = single_exit (loop) != NULL;
-  basic_block last_always_executed = last_always_executed_block (loop);
+  bool eliminate_store_p = single_exit (m_loop) != NULL;
+  basic_block last_always_executed = last_always_executed_block (m_loop);
   auto_bitmap no_store_store_comps;
+  auto_vec<unsigned> comp_father (n + 1);
+  auto_vec<unsigned> comp_size (n + 1);
+  comp_father.quick_grow (n + 1);
+  comp_size.quick_grow (n + 1);
 
-  FOR_EACH_VEC_ELT (datarefs, i, dr)
+  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
     {
       if (!DR_REF (dr))
 	  /* A fake reference for call or asm_expr that may clobber memory;
@@ -943,26 +905,26 @@ pcom_worker::split_data_refs_to_components ()
       if (is_gimple_call (DR_STMT (dr)))
 	return NULL;
       dr->aux = (void *) (size_t) i;
-      ptrs.comp_father[i] = i;
-      ptrs.comp_size[i] = 1;
+      comp_father[i] = i;
+      comp_size[i] = 1;
     }
 
   /* A component reserved for the "bad" data references.  */
-  ptrs.comp_father[n] = n;
-  ptrs.comp_size[n] = 1;
+  comp_father[n] = n;
+  comp_size[n] = 1;
 
-  FOR_EACH_VEC_ELT (datarefs, i, dr)
+  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
     {
       enum ref_step_type dummy;
 
       if (!suitable_reference_p (dr, &dummy))
 	{
 	  ia = (unsigned) (size_t) dr->aux;
-	  merge_comps (ptrs.comp_father, ptrs.comp_size, n, ia);
+	  merge_comps (comp_father, comp_size, n, ia);
 	}
     }
 
-  FOR_EACH_VEC_ELT (dependences, i, ddr)
+  FOR_EACH_VEC_ELT (m_dependences, i, ddr)
     {
       poly_widest_int dummy_off;
 
@@ -979,12 +941,12 @@ pcom_worker::split_data_refs_to_components ()
 	      || DDR_NUM_DIST_VECTS (ddr) == 0))
 	eliminate_store_p = false;
 
-      ia = component_of (ptrs.comp_father, (unsigned) (size_t) dra->aux);
-      ib = component_of (ptrs.comp_father, (unsigned) (size_t) drb->aux);
+      ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
+      ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
       if (ia == ib)
 	continue;
 
-      bad = component_of (ptrs.comp_father, n);
+      bad = component_of (comp_father, n);
 
       /* If both A and B are reads, we may ignore unsuitable dependences.  */
       if (DR_IS_READ (dra) && DR_IS_READ (drb))
@@ -1008,7 +970,7 @@ pcom_worker::split_data_refs_to_components ()
 	  else if (!determine_offset (dra, drb, &dummy_off))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ib);
-	      merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia);
+	      merge_comps (comp_father, comp_size, bad, ia);
 	      continue;
 	    }
 	}
@@ -1022,7 +984,7 @@ pcom_worker::split_data_refs_to_components ()
 	  else if (!determine_offset (dra, drb, &dummy_off))
 	    {
 	      bitmap_set_bit (no_store_store_comps, ia);
-	      merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib);
+	      merge_comps (comp_father, comp_size, bad, ib);
 	      continue;
 	    }
 	}
@@ -1030,17 +992,17 @@ pcom_worker::split_data_refs_to_components ()
 	       && ia != bad && ib != bad
 	       && !determine_offset (dra, drb, &dummy_off))
 	{
-	  merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ia);
-	  merge_comps (ptrs.comp_father, ptrs.comp_size, bad, ib);
+	  merge_comps (comp_father, comp_size, bad, ia);
+	  merge_comps (comp_father, comp_size, bad, ib);
 	  continue;
 	}
 
-      merge_comps (ptrs.comp_father, ptrs.comp_size, ia, ib);
+      merge_comps (comp_father, comp_size, ia, ib);
     }
 
   if (eliminate_store_p)
     {
-      tree niters = number_of_latch_executions (loop);
+      tree niters = number_of_latch_executions (m_loop);
 
       /* Don't do store elimination if niters info is unknown because stores
 	 in the last iteration can't be eliminated and we need to recover it
@@ -1048,12 +1010,13 @@ pcom_worker::split_data_refs_to_components ()
       eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
     }
 
-  comps = XCNEWVEC (struct component *, n);
-  bad = component_of (ptrs.comp_father, n);
-  FOR_EACH_VEC_ELT (datarefs, i, dr)
+  auto_vec<struct component *> comps;
+  comps.safe_grow_cleared (n, true);
+  bad = component_of (comp_father, n);
+  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
     {
       ia = (unsigned) (size_t) dr->aux;
-      ca = component_of (ptrs.comp_father, ia);
+      ca = component_of (comp_father, ia);
       if (ca == bad)
 	continue;
 
@@ -1061,7 +1024,7 @@ pcom_worker::split_data_refs_to_components ()
       if (!comp)
 	{
 	  comp = XCNEW (struct component);
-	  comp->refs.create (ptrs.comp_size[ca]);
+	  comp->refs.create (comp_size[ca]);
 	  comp->eliminate_store_p = eliminate_store_p;
 	  comps[ca] = comp;
 	}
@@ -1084,7 +1047,7 @@ pcom_worker::split_data_refs_to_components ()
       bitmap_iterator bi;
       EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
 	{
-	  ca = component_of (ptrs.comp_father, ia);
+	  ca = component_of (comp_father, ia);
 	  if (ca != bad)
 	    comps[ca]->eliminate_store_p = false;
 	}
@@ -1099,7 +1062,6 @@ pcom_worker::split_data_refs_to_components ()
 	  comp_list = comp;
 	}
     }
-  free (comps);
   return comp_list;
 }
 
@@ -1111,14 +1073,14 @@ pcom_worker::suitable_component_p (struct component *comp)
 {
   unsigned i;
   dref a, first;
-  basic_block ba, bp = loop->header;
+  basic_block ba, bp = m_loop->header;
   bool ok, has_write = false;
 
   FOR_EACH_VEC_ELT (comp->refs, i, a)
     {
       ba = gimple_bb (a->stmt);
 
-      if (!just_once_each_iteration_p (loop, ba))
+      if (!just_once_each_iteration_p (m_loop, ba))
 	return false;
 
       gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
@@ -1180,7 +1142,7 @@ pcom_worker::filter_suitable_components (struct component *comps)
 	  *comp = act->next;
 	  FOR_EACH_VEC_ELT (act->refs, i, ref)
 	    free (ref);
-	  release_component (act);
+	  XDELETE (act);
 	}
     }
 
@@ -1392,7 +1354,7 @@ pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
   aff_combination_add (&diff, &base);
 
   tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
-				  &step, &cache);
+				  &step, &m_cache);
   if (!aff_combination_constant_multiple_p (&diff, &step, &off))
     return false;
 
@@ -1413,7 +1375,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root)
   tree name, init, init_ref;
   gphi *phi = NULL;
   gimple *init_stmt;
-  edge latch = loop_latch_edge (loop);
+  edge latch = loop_latch_edge (m_loop);
   struct data_reference init_dr;
   gphi_iterator psi;
 
@@ -1429,7 +1391,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root)
   if (!name)
     return NULL;
 
-  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
+  for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (psi); gsi_next (&psi))
     {
       phi = psi.phi ();
       if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
@@ -1439,7 +1401,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root)
   if (gsi_end_p (psi))
     return NULL;
 
-  init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
+  init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
   if (TREE_CODE (init) != SSA_NAME)
     return NULL;
   init_stmt = SSA_NAME_DEF_STMT (init);
@@ -1457,7 +1419,7 @@ pcom_worker::find_looparound_phi (dref ref, dref root)
   memset (&init_dr, 0, sizeof (struct data_reference));
   DR_REF (&init_dr) = init_ref;
   DR_STMT (&init_dr) = phi;
-  if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
+  if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop,
 			     init_stmt))
     return NULL;
 
@@ -1512,7 +1474,7 @@ pcom_worker::add_looparound_copies (chain_p chain)
       if (!phi)
 	continue;
 
-      bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
+      bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
       insert_looparound_copy (chain, ref, phi);
     }
 }
@@ -1533,7 +1495,7 @@ pcom_worker::determine_roots_comp (struct component *comp)
   if (comp->comp_step == RS_INVARIANT)
     {
       chain = make_invariant_chain (comp);
-      chains.safe_push (chain);
+      m_chains.safe_push (chain);
       return;
     }
 
@@ -1578,7 +1540,7 @@ pcom_worker::determine_roots_comp (struct component *comp)
 	  if (nontrivial_chain_p (chain))
 	    {
 	      add_looparound_copies (chain);
-	      chains.safe_push (chain);
+	      m_chains.safe_push (chain);
 	    }
 	  else
 	    release_chain (chain);
@@ -1599,7 +1561,7 @@ pcom_worker::determine_roots_comp (struct component *comp)
   if (nontrivial_chain_p (chain))
     {
       add_looparound_copies (chain);
-      chains.safe_push (chain);
+      m_chains.safe_push (chain);
     }
   else
     release_chain (chain);
@@ -2196,7 +2158,7 @@ pcom_worker::single_nonlooparound_use (tree name)
 	{
 	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
 	     could not be processed anyway, so just fail for them.  */
-	  if (bitmap_bit_p (looparound_phis,
+	  if (bitmap_bit_p (m_looparound_phis,
 			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
 	    continue;
 
@@ -2305,14 +2267,14 @@ pcom_worker::execute_pred_commoning_chain (chain_p chain,
 	      /* If dead stores in this chain store loop variant values,
 		 we need to set up the variables by loading from memory
 		 before loop and propagating it with PHI nodes.  */
-	      initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
+	      initialize_root_vars_store_elim_2 (m_loop, chain, tmp_vars);
 	    }
 
 	  /* For inter-iteration store elimination chain, stores at each
 	     distance in loop's last (chain->length - 1) iterations can't
 	     be eliminated, because there is no following killing store.
 	     We need to generate these stores after loop.  */
-	  finalize_eliminated_stores (loop, chain);
+	  finalize_eliminated_stores (m_loop, chain);
 	}
 
       bool last_store_p = true;
@@ -2342,7 +2304,7 @@ pcom_worker::execute_pred_commoning_chain (chain_p chain,
   else
     {
       /* For non-combined chains, set up the variables that hold its value.  */
-      initialize_root_vars (loop, chain, tmp_vars);
+      initialize_root_vars (m_loop, chain, tmp_vars);
       a = get_chain_root (chain);
       in_lhs = (chain->type == CT_STORE_LOAD
 		|| chain->type == CT_COMBINATION);
@@ -2411,15 +2373,15 @@ pcom_worker::execute_pred_commoning (bitmap tmp_vars)
   chain_p chain;
   unsigned i;
 
-  FOR_EACH_VEC_ELT (chains, i, chain)
+  FOR_EACH_VEC_ELT (m_chains, i, chain)
     {
       if (chain->type == CT_INVARIANT)
-	execute_load_motion (loop, chain, tmp_vars);
+	execute_load_motion (m_loop, chain, tmp_vars);
       else
 	execute_pred_commoning_chain (chain, tmp_vars);
     }
 
-  FOR_EACH_VEC_ELT (chains, i, chain)
+  FOR_EACH_VEC_ELT (m_chains, i, chain)
     {
       if (chain->type == CT_INVARIANT)
 	;
@@ -2979,7 +2941,7 @@ pcom_worker::try_combine_chains ()
   auto_vec<chain_p> worklist;
   bool combined_p = false;
 
-  FOR_EACH_VEC_ELT (chains, i, ch1)
+  FOR_EACH_VEC_ELT (m_chains, i, ch1)
     if (chain_can_be_combined_p (ch1))
       worklist.safe_push (ch1);
 
@@ -2989,7 +2951,7 @@ pcom_worker::try_combine_chains ()
       if (!chain_can_be_combined_p (ch1))
 	continue;
 
-      FOR_EACH_VEC_ELT (chains, j, ch2)
+      FOR_EACH_VEC_ELT (m_chains, j, ch2)
 	{
 	  if (!chain_can_be_combined_p (ch2))
 	    continue;
@@ -2998,7 +2960,7 @@ pcom_worker::try_combine_chains ()
 	  if (cch)
 	    {
 	      worklist.safe_push (cch);
-	      chains.safe_push (cch);
+	      m_chains.safe_push (cch);
 	      combined_p = true;
 	      break;
 	    }
@@ -3008,8 +2970,8 @@ pcom_worker::try_combine_chains ()
     return;
 
   /* Setup UID for all statements in dominance order.  */
-  basic_block *bbs = get_loop_body_in_dom_order (loop);
-  renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
+  basic_block *bbs = get_loop_body_in_dom_order (m_loop);
+  renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes);
   free (bbs);
 
   /* Re-association in combined chains may generate statements different to
@@ -3022,7 +2984,7 @@ pcom_worker::try_combine_chains ()
 
      We first update position information for all combined chains.  */
   dref ref;
-  for (i = 0; chains.iterate (i, &ch1); ++i)
+  for (i = 0; m_chains.iterate (i, &ch1); ++i)
     {
       if (ch1->type != CT_COMBINATION || ch1->combined)
 	continue;
@@ -3033,7 +2995,7 @@ pcom_worker::try_combine_chains ()
       update_pos_for_combined_chains (ch1);
     }
   /* Then sort references according to newly updated position information.  */
-  for (i = 0; chains.iterate (i, &ch1); ++i)
+  for (i = 0; m_chains.iterate (i, &ch1); ++i)
     {
       if (ch1->type != CT_COMBINATION && !ch1->combined)
 	continue;
@@ -3155,10 +3117,10 @@ pcom_worker::prepare_initializers_chain (chain_p chain)
   struct data_reference *dr = get_chain_root (chain)->ref;
   tree init;
   dref laref;
-  edge entry = loop_preheader_edge (loop);
+  edge entry = loop_preheader_edge (m_loop);
 
   if (chain->type == CT_STORE_STORE)
-    return prepare_initializers_chain_store_elim (loop, chain);
+    return prepare_initializers_chain_store_elim (m_loop, chain);
 
   /* Find the initializers for the variables, and check that they cannot
      trap.  */
@@ -3210,15 +3172,15 @@ pcom_worker::prepare_initializers ()
   chain_p chain;
   unsigned i;
 
-  for (i = 0; i < chains.length (); )
+  for (i = 0; i < m_chains.length (); )
     {
-      chain = chains[i];
+      chain = m_chains[i];
       if (prepare_initializers_chain (chain))
 	i++;
       else
 	{
 	  release_chain (chain);
-	  chains.unordered_remove (i);
+	  m_chains.unordered_remove (i);
 	}
     }
 }
@@ -3231,7 +3193,7 @@ pcom_worker::prepare_finalizers_chain (chain_p chain)
 {
   unsigned i, n = chain->length;
   struct data_reference *dr = get_chain_root (chain)->ref;
-  tree fini, niters = number_of_latch_executions (loop);
+  tree fini, niters = number_of_latch_executions (m_loop);
 
   /* For now we can't eliminate stores if some of them are conditional
      executed.  */
@@ -3281,9 +3243,9 @@ pcom_worker::prepare_finalizers ()
   unsigned i;
   bool loop_closed_ssa = false;
 
-  for (i = 0; i < chains.length ();)
+  for (i = 0; i < m_chains.length ();)
     {
-      chain = chains[i];
+      chain = m_chains[i];
 
       /* Finalizer is only necessary for inter-iteration store elimination
 	 chains.  */
@@ -3305,7 +3267,7 @@ pcom_worker::prepare_finalizers ()
       else
 	{
 	  release_chain (chain);
-	  chains.unordered_remove (i);
+	  m_chains.unordered_remove (i);
 	}
     }
   return loop_closed_ssa;
@@ -3341,10 +3303,10 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
   bool unroll = false, loop_closed_ssa = false;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Processing loop %d\n", loop->num);
+    fprintf (dump_file, "Processing loop %d\n", m_loop->num);
 
   /* Nothing for predicitive commoning if loop only iterates 1 time.  */
-  if (get_max_loop_iterations_int (loop) == 0)
+  if (get_max_loop_iterations_int (m_loop) == 0)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
@@ -3355,8 +3317,8 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
   /* Find the data references and split them into components according to their
      dependence relations.  */
   auto_vec<loop_p, 3> loop_nest;
-  if (!compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
-					  &dependences))
+  if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs,
+					  &m_dependences))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Cannot analyze data dependencies\n");
@@ -3364,7 +3326,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_data_dependence_relations (dump_file, dependences);
+    dump_data_dependence_relations (dump_file, m_dependences);
 
   components = split_data_refs_to_components ();
 
@@ -3385,7 +3347,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
   determine_roots (components);
   release_components (components);
 
-  if (!chains.exists ())
+  if (!m_chains.exists ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
@@ -3399,21 +3361,21 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
   /* Try to combine the chains that are always worked with together.  */
   try_combine_chains ();
 
-  insert_init_seqs (loop, chains);
+  insert_init_seqs (m_loop, m_chains);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "Before commoning:\n\n");
-      dump_chains (dump_file, chains);
+      dump_chains (dump_file, m_chains);
     }
 
   if (allow_unroll_p)
     /* Determine the unroll factor, and if the loop should be unrolled, ensure
        that its number of iterations is divisible by the factor.  */
-    unroll_factor = determine_unroll_factor (chains);
+    unroll_factor = determine_unroll_factor (m_chains);
 
   if (unroll_factor > 1)
-    unroll = can_unroll_loop_p (loop, unroll_factor, &desc);
+    unroll = can_unroll_loop_p (m_loop, unroll_factor, &desc);
 
   /* Execute the predictive commoning transformations, and possibly unroll the
      loop.  */
@@ -3425,7 +3387,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
 	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
 
       dta.tmp_vars = tmp_vars;
-      dta.chains = chains;
+      dta.chains = m_chains;
       dta.worker = this;
 
       /* Cfg manipulations performed in tree_transform_and_unroll_loop before
@@ -3434,12 +3396,12 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
 	 statements.  To fix this, we store the ssa names defined by the
 	 phi nodes here instead of the phi nodes themselves, and restore
 	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
-      replace_phis_by_defined_names (chains);
+      replace_phis_by_defined_names (m_chains);
 
-      edge exit = single_dom_exit (loop);
-      tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
+      edge exit = single_dom_exit (m_loop);
+      tree_transform_and_unroll_loop (m_loop, unroll_factor, exit, &desc,
 				      execute_pred_commoning_cbck, &dta);
-      eliminate_temp_copies (loop, tmp_vars);
+      eliminate_temp_copies (m_loop, tmp_vars);
     }
   else
     {
@@ -3554,5 +3516,3 @@ make_pass_predcom (gcc::context *ctxt)
 {
   return new pass_predcom (ctxt);
 }
-
-


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Refactor more using auto_vec
  2021-07-19  6:28             ` [PATCH] predcom: Refactor more using auto_vec Kewen.Lin
@ 2021-07-19 20:45               ` Martin Sebor
  2021-07-20  2:04                 ` Kewen.Lin
  2021-07-20 11:19               ` Richard Biener
  1 sibling, 1 reply; 16+ messages in thread
From: Martin Sebor @ 2021-07-19 20:45 UTC (permalink / raw)
  To: Kewen.Lin, Richard Biener; +Cc: Bill Schmidt, GCC Patches, Segher Boessenkool

On 7/19/21 12:28 AM, Kewen.Lin wrote:
> Hi Martin & Richard,
> 
>>> A further improvement worth considering (if you're so inclined :)
>>> is replacing the pcom_worker vec members with auto_vec (obviating
>>> having to explicitly release them) and for the same reason also
>>> replacing the comp_ptrs bare pointer members with auto_vecs.
>>> There may be other opportunities to do the same in individual
>>> functions (I'd look to get rid of as many calls to functions
>>> like XNEW()/XNEWVEC() and free() use auto_vec instead).
>>>
>>> An unrelated but worthwhile change is to replace the FOR_EACH_
>>> loops with C++ 11 range loops, analogously to:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html
>>>
>>> Finally, the only loosely followed naming convention for member
>>> variables is to start them with the m_ prefix.
>>>
>>> These just suggestions that could be done in a followup, not
>>> something I would consider prerequisite for accepting the patch
>>> as is if I were in a position to make such a decision.
>>>
> 
> Sorry for the late update, this patch follows your previous
> advices to refactor it more by:
>    - Adding m_ prefix for class pcom_worker member variables.
>    - Using auto_vec instead of vec among class pcom_worker,
>      chain, component and comp_ptrs.
> 
> btw, the changes in tree-data-ref.[ch] is required, without
> it the destruction of auto_vec instance could try to double
> free the memory pointed by m_vec.

Making the vec parameters in tree-data-ref.[ch] references is in line
with other changes like those that we have agreed on in a separate
review recently, so they look good to me.

> 
> The suggestion on range loops is addressed by one separated
> patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575536.html
> 
> Bootstrapped and regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu, also
> bootstrapped on ppc64le P9 with bootstrap-O3 config.
> 
> Is it ok for trunk?

Thanks for taking the suggestions!  At a high-level the patch looks
good.  I spotted a couple of new instances of XDELETE that I couldn't
find corresponding XNEW() calls for but I'll leave that to someone
more familiar with the code, along with a formal review and approval.

Martin

> 
> BR,
> Kewen
> -----
> gcc/ChangeLog:
> 
> 	* tree-data-ref.c (free_dependence_relations): Adjust to pass vec by
> 	reference.
> 	(free_data_refs): Likewise.
> 	* tree-data-ref.h (free_dependence_relations): Likewise.
> 	(free_data_refs): Likewise.
> 	* tree-predcom.c (struct chain): Use auto_vec instead of vec for
> 	members.
> 	(struct component): Likewise.
> 	(pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes.
> 	(pcom_worker::~pcom_worker): Likewise.
> 	(pcom_worker::release_chain): Adjust as auto_vec changes.
> 	(pcom_worker::loop): Rename to ...
> 	(pcom_worker::m_loop): ... this.
> 	(pcom_worker::datarefs): Rename to ...
> 	(pcom_worker::m_datarefs): ... this.  Use auto_vec instead of vec.
> 	(pcom_worker::dependences): Rename to ...
> 	(pcom_worker::m_dependences): ... this.  Use auto_vec instead of vec.
> 	(pcom_worker::chains): Rename to ...
> 	(pcom_worker::m_chains): ... this.  Use auto_vec instead of vec.
> 	(pcom_worker::looparound_phis): Rename to ...
> 	(pcom_worker::m_looparound_phis): ... this.  Use auto_vec instead of
> 	vec.
> 	(pcom_worker::cache): Rename to ...
> 	(pcom_worker::m_cache): ... this.  Use auto_vec instead of vec.
> 	(pcom_worker::release_chain): Adjust for auto_vec changes.
> 	(pcom_worker::release_chains): Adjust for auto_vec and renaming
> 	changes.
> 	(release_component): Remove.
> 	(release_components): Adjust for release_component removal.
> 	(component_of): Adjust to use vec.
> 	(merge_comps): Likewise.
> 	(pcom_worker::aff_combination_dr_offset): Adjust for renaming changes.
> 	(pcom_worker::determine_offset): Likewise.
> 	(class comp_ptrs): Remove.
> 	(pcom_worker::split_data_refs_to_components): Adjust for renaming
> 	changes, for comp_ptrs removal with auto_vec.
> 	(pcom_worker::suitable_component_p): Adjust for renaming changes.
> 	(pcom_worker::filter_suitable_components): Adjust for release_component
> 	removal.
> 	(pcom_worker::valid_initializer_p): Adjust for renaming changes.
> 	(pcom_worker::find_looparound_phi): Likewise.
> 	(pcom_worker::add_looparound_copies): Likewise.
> 	(pcom_worker::determine_roots_comp): Likewise.
> 	(pcom_worker::single_nonlooparound_use): Likewise.
> 	(pcom_worker::execute_pred_commoning_chain): Likewise.
> 	(pcom_worker::execute_pred_commoning): Likewise.
> 	(pcom_worker::try_combine_chains): Likewise.
> 	(pcom_worker::prepare_initializers_chain): Likewise.
> 	(pcom_worker::prepare_initializers): Likewise.
> 	(pcom_worker::prepare_finalizers_chain): Likewise.
> 	(pcom_worker::prepare_finalizers): Likewise.
> 	(pcom_worker::tree_predictive_commoning_loop): Likewise.
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Refactor more using auto_vec
  2021-07-19 20:45               ` Martin Sebor
@ 2021-07-20  2:04                 ` Kewen.Lin
  0 siblings, 0 replies; 16+ messages in thread
From: Kewen.Lin @ 2021-07-20  2:04 UTC (permalink / raw)
  To: Martin Sebor, Richard Biener
  Cc: Bill Schmidt, GCC Patches, Segher Boessenkool

on 2021/7/20 上午4:45, Martin Sebor wrote:
> On 7/19/21 12:28 AM, Kewen.Lin wrote:
>> Hi Martin & Richard,
>>
>>>> A further improvement worth considering (if you're so inclined :)
>>>> is replacing the pcom_worker vec members with auto_vec (obviating
>>>> having to explicitly release them) and for the same reason also
>>>> replacing the comp_ptrs bare pointer members with auto_vecs.
>>>> There may be other opportunities to do the same in individual
>>>> functions (I'd look to get rid of as many calls to functions
>>>> like XNEW()/XNEWVEC() and free() use auto_vec instead).
>>>>
>>>> An unrelated but worthwhile change is to replace the FOR_EACH_
>>>> loops with C++ 11 range loops, analogously to:
>>>> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html
>>>>
>>>> Finally, the only loosely followed naming convention for member
>>>> variables is to start them with the m_ prefix.
>>>>
>>>> These just suggestions that could be done in a followup, not
>>>> something I would consider prerequisite for accepting the patch
>>>> as is if I were in a position to make such a decision.
>>>>
>>
>> Sorry for the late update, this patch follows your previous
>> advices to refactor it more by:
>>    - Adding m_ prefix for class pcom_worker member variables.
>>    - Using auto_vec instead of vec among class pcom_worker,
>>      chain, component and comp_ptrs.
>>
>> btw, the changes in tree-data-ref.[ch] is required, without
>> it the destruction of auto_vec instance could try to double
>> free the memory pointed by m_vec.
> 
> Making the vec parameters in tree-data-ref.[ch] references is in line
> with other changes like those that we have agreed on in a separate
> review recently, so they look good to me.
> 

Nice, thanks for the information.

>>
>> The suggestion on range loops is addressed by one separated
>> patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575536.html
>>
>> Bootstrapped and regtested on powerpc64le-linux-gnu P9,
>> x86_64-redhat-linux and aarch64-linux-gnu, also
>> bootstrapped on ppc64le P9 with bootstrap-O3 config.
>>
>> Is it ok for trunk?
> 
> Thanks for taking the suggestions!  At a high-level the patch looks
> good.  I spotted a couple of new instances of XDELETE that I couldn't
> find corresponding XNEW() calls for but I'll leave that to someone
> more familiar with the code, along with a formal review and approval.
> 

The new XDELETEs are for struct component releasing, which uses "free"
in removed function release_component before.  As its original "new"
adopts "XCNEW" as below:

  comp = XCNEW (struct component);

I thought it might be good to use XDELETE to match when I touched it.

BR,
Kewen

> Martin
> 
>>
>> BR,
>> Kewen
>> -----
>> gcc/ChangeLog:
>>
>>     * tree-data-ref.c (free_dependence_relations): Adjust to pass vec by
>>     reference.
>>     (free_data_refs): Likewise.
>>     * tree-data-ref.h (free_dependence_relations): Likewise.
>>     (free_data_refs): Likewise.
>>     * tree-predcom.c (struct chain): Use auto_vec instead of vec for
>>     members.
>>     (struct component): Likewise.
>>     (pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes.
>>     (pcom_worker::~pcom_worker): Likewise.
>>     (pcom_worker::release_chain): Adjust as auto_vec changes.
>>     (pcom_worker::loop): Rename to ...
>>     (pcom_worker::m_loop): ... this.
>>     (pcom_worker::datarefs): Rename to ...
>>     (pcom_worker::m_datarefs): ... this.  Use auto_vec instead of vec.
>>     (pcom_worker::dependences): Rename to ...
>>     (pcom_worker::m_dependences): ... this.  Use auto_vec instead of vec.
>>     (pcom_worker::chains): Rename to ...
>>     (pcom_worker::m_chains): ... this.  Use auto_vec instead of vec.
>>     (pcom_worker::looparound_phis): Rename to ...
>>     (pcom_worker::m_looparound_phis): ... this.  Use auto_vec instead of
>>     vec.
>>     (pcom_worker::cache): Rename to ...
>>     (pcom_worker::m_cache): ... this.  Use auto_vec instead of vec.
>>     (pcom_worker::release_chain): Adjust for auto_vec changes.
>>     (pcom_worker::release_chains): Adjust for auto_vec and renaming
>>     changes.
>>     (release_component): Remove.
>>     (release_components): Adjust for release_component removal.
>>     (component_of): Adjust to use vec.
>>     (merge_comps): Likewise.
>>     (pcom_worker::aff_combination_dr_offset): Adjust for renaming changes.
>>     (pcom_worker::determine_offset): Likewise.
>>     (class comp_ptrs): Remove.
>>     (pcom_worker::split_data_refs_to_components): Adjust for renaming
>>     changes, for comp_ptrs removal with auto_vec.
>>     (pcom_worker::suitable_component_p): Adjust for renaming changes.
>>     (pcom_worker::filter_suitable_components): Adjust for release_component
>>     removal.
>>     (pcom_worker::valid_initializer_p): Adjust for renaming changes.
>>     (pcom_worker::find_looparound_phi): Likewise.
>>     (pcom_worker::add_looparound_copies): Likewise.
>>     (pcom_worker::determine_roots_comp): Likewise.
>>     (pcom_worker::single_nonlooparound_use): Likewise.
>>     (pcom_worker::execute_pred_commoning_chain): Likewise.
>>     (pcom_worker::execute_pred_commoning): Likewise.
>>     (pcom_worker::try_combine_chains): Likewise.
>>     (pcom_worker::prepare_initializers_chain): Likewise.
>>     (pcom_worker::prepare_initializers): Likewise.
>>     (pcom_worker::prepare_finalizers_chain): Likewise.
>>     (pcom_worker::prepare_finalizers): Likewise.
>>     (pcom_worker::tree_predictive_commoning_loop): Likewise.
>>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] predcom: Refactor more using auto_vec
  2021-07-19  6:28             ` [PATCH] predcom: Refactor more using auto_vec Kewen.Lin
  2021-07-19 20:45               ` Martin Sebor
@ 2021-07-20 11:19               ` Richard Biener
  1 sibling, 0 replies; 16+ messages in thread
From: Richard Biener @ 2021-07-20 11:19 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Martin Sebor, Bill Schmidt, GCC Patches, Segher Boessenkool

On Mon, Jul 19, 2021 at 8:29 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi Martin & Richard,
>
> >> A further improvement worth considering (if you're so inclined :)
> >> is replacing the pcom_worker vec members with auto_vec (obviating
> >> having to explicitly release them) and for the same reason also
> >> replacing the comp_ptrs bare pointer members with auto_vecs.
> >> There may be other opportunities to do the same in individual
> >> functions (I'd look to get rid of as many calls to functions
> >> like XNEW()/XNEWVEC() and free() use auto_vec instead).
> >>
> >> An unrelated but worthwhile change is to replace the FOR_EACH_
> >> loops with C++ 11 range loops, analogously to:
> >> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html
> >>
> >> Finally, the only loosely followed naming convention for member
> >> variables is to start them with the m_ prefix.
> >>
> >> These just suggestions that could be done in a followup, not
> >> something I would consider prerequisite for accepting the patch
> >> as is if I were in a position to make such a decision.
> >>
>
> Sorry for the late update, this patch follows your previous
> advices to refactor it more by:
>   - Adding m_ prefix for class pcom_worker member variables.
>   - Using auto_vec instead of vec among class pcom_worker,
>     chain, component and comp_ptrs.
>
> btw, the changes in tree-data-ref.[ch] is required, without
> it the destruction of auto_vec instance could try to double
> free the memory pointed by m_vec.
>
> The suggestion on range loops is addressed by one separated
> patch: https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575536.html
>
> Bootstrapped and regtested on powerpc64le-linux-gnu P9,
> x86_64-redhat-linux and aarch64-linux-gnu, also
> bootstrapped on ppc64le P9 with bootstrap-O3 config.
>
> Is it ok for trunk?

OK.

Thanks,
Richard.

> BR,
> Kewen
> -----
> gcc/ChangeLog:
>
>         * tree-data-ref.c (free_dependence_relations): Adjust to pass vec by
>         reference.
>         (free_data_refs): Likewise.
>         * tree-data-ref.h (free_dependence_relations): Likewise.
>         (free_data_refs): Likewise.
>         * tree-predcom.c (struct chain): Use auto_vec instead of vec for
>         members.
>         (struct component): Likewise.
>         (pcom_worker::pcom_worker): Adjust for auto_vec and renaming changes.
>         (pcom_worker::~pcom_worker): Likewise.
>         (pcom_worker::release_chain): Adjust as auto_vec changes.
>         (pcom_worker::loop): Rename to ...
>         (pcom_worker::m_loop): ... this.
>         (pcom_worker::datarefs): Rename to ...
>         (pcom_worker::m_datarefs): ... this.  Use auto_vec instead of vec.
>         (pcom_worker::dependences): Rename to ...
>         (pcom_worker::m_dependences): ... this.  Use auto_vec instead of vec.
>         (pcom_worker::chains): Rename to ...
>         (pcom_worker::m_chains): ... this.  Use auto_vec instead of vec.
>         (pcom_worker::looparound_phis): Rename to ...
>         (pcom_worker::m_looparound_phis): ... this.  Use auto_vec instead of
>         vec.
>         (pcom_worker::cache): Rename to ...
>         (pcom_worker::m_cache): ... this.  Use auto_vec instead of vec.
>         (pcom_worker::release_chain): Adjust for auto_vec changes.
>         (pcom_worker::release_chains): Adjust for auto_vec and renaming
>         changes.
>         (release_component): Remove.
>         (release_components): Adjust for release_component removal.
>         (component_of): Adjust to use vec.
>         (merge_comps): Likewise.
>         (pcom_worker::aff_combination_dr_offset): Adjust for renaming changes.
>         (pcom_worker::determine_offset): Likewise.
>         (class comp_ptrs): Remove.
>         (pcom_worker::split_data_refs_to_components): Adjust for renaming
>         changes, for comp_ptrs removal with auto_vec.
>         (pcom_worker::suitable_component_p): Adjust for renaming changes.
>         (pcom_worker::filter_suitable_components): Adjust for release_component
>         removal.
>         (pcom_worker::valid_initializer_p): Adjust for renaming changes.
>         (pcom_worker::find_looparound_phi): Likewise.
>         (pcom_worker::add_looparound_copies): Likewise.
>         (pcom_worker::determine_roots_comp): Likewise.
>         (pcom_worker::single_nonlooparound_use): Likewise.
>         (pcom_worker::execute_pred_commoning_chain): Likewise.
>         (pcom_worker::execute_pred_commoning): Likewise.
>         (pcom_worker::try_combine_chains): Likewise.
>         (pcom_worker::prepare_initializers_chain): Likewise.
>         (pcom_worker::prepare_initializers): Likewise.
>         (pcom_worker::prepare_finalizers_chain): Likewise.
>         (pcom_worker::prepare_finalizers): Likewise.
>         (pcom_worker::tree_predictive_commoning_loop): Likewise.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-07-20 11:20 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-02  9:29 [PATCH] predcom: Adjust some unnecessary update_ssa calls Kewen.Lin
2021-06-07 14:46 ` Richard Biener
2021-06-07 15:20   ` Martin Sebor
2021-06-08  9:30   ` Kewen.Lin
2021-06-08 11:02     ` Richard Biener
2021-06-08 11:09       ` Richard Biener
2021-06-22  2:35       ` predcom: Refactor more by encapsulating global states Kewen.Lin
2021-06-22 16:14         ` Martin Sebor
2021-06-24  9:26           ` Kewen.Lin
2021-07-19  6:28             ` [PATCH] predcom: Refactor more using auto_vec Kewen.Lin
2021-07-19 20:45               ` Martin Sebor
2021-07-20  2:04                 ` Kewen.Lin
2021-07-20 11:19               ` Richard Biener
2021-06-23  7:22         ` predcom: Refactor more by encapsulating global states Richard Biener
2021-06-24  9:28           ` Kewen.Lin
2021-06-08 14:26     ` [PATCH] predcom: Adjust some unnecessary update_ssa calls Martin Sebor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).