From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1011) id 22D1E3857716; Wed, 26 Apr 2023 19:26:02 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 22D1E3857716 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682537162; bh=IMfwWMDeQwQ8vT0lZrHoGqJ06DiVxDEs4ZHoCs4yOCs=; h=From:To:Subject:Date:From; b=RdR3xUbGSYp5Mva+YfvzojtPlFk2SYnOW+7dLS4ZdclafJwKva67nTnkg2iOLtEsC ToF+a6Sfq1PwBB49WHBR7CzdZvZNyS608Hbhpa9ivsMl3yWL861/8+KpeoNu+nM7PJ AJglRwPqfPJXG8HjuwU2NUeDMQFAv/I23ldZAki0= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Andrew Macleod To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-273] Add sbr_lazy_vector and adjust (e)vrp sparse cache X-Act-Checkin: gcc X-Git-Author: Andrew MacLeod X-Git-Refname: refs/heads/master X-Git-Oldrev: bf50499a14943b64f19bd72c60422683f7ecd0ee X-Git-Newrev: b6dea04fca6f249c553cb18d670a0845cd0579f8 Message-Id: <20230426192602.22D1E3857716@sourceware.org> Date: Wed, 26 Apr 2023 19:26:02 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b6dea04fca6f249c553cb18d670a0845cd0579f8 commit r14-273-gb6dea04fca6f249c553cb18d670a0845cd0579f8 Author: Andrew MacLeod Date: Thu Apr 13 14:47:47 2023 -0400 Add sbr_lazy_vector and adjust (e)vrp sparse cache Add a sparse vector class for cache and use if by default. Rename the evrp_* params to vrp_*, and add a param for small CFGS which use just the original basic vector. * gimple-range-cache.cc (sbr_vector::sbr_vector): Add parameter and local to optionally zero memory. (br_vector::grow): Only zero memory if flag is set. (class sbr_lazy_vector): New. (sbr_lazy_vector::sbr_lazy_vector): New. (sbr_lazy_vector::set_bb_range): New. (sbr_lazy_vector::get_bb_range): New. (sbr_lazy_vector::bb_range_p): New. (block_range_cache::set_bb_range): Check flags and Use sbr_lazy_vector. * gimple-range-gori.cc (gori_map::calculate_gori): Use param_vrp_switch_limit. (gori_compute::gori_compute): Use param_vrp_switch_limit. * params.opt (vrp_sparse_threshold): Rename from evrp_sparse_threshold. (vrp_switch_limit): Rename from evrp_switch_limit. (vrp_vector_threshold): New. Diff: --- gcc/gimple-range-cache.cc | 72 +++++++++++++++++++++++++++++++++++++++++------ gcc/gimple-range-gori.cc | 4 +-- gcc/params.opt | 20 +++++++------ 3 files changed, 78 insertions(+), 18 deletions(-) diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc index 2314478d558..868d2dda424 100644 --- a/gcc/gimple-range-cache.cc +++ b/gcc/gimple-range-cache.cc @@ -79,7 +79,7 @@ ssa_block_ranges::dump (FILE *f) class sbr_vector : public ssa_block_ranges { public: - sbr_vector (tree t, vrange_allocator *allocator); + sbr_vector (tree t, vrange_allocator *allocator, bool zero_p = true); virtual bool set_bb_range (const_basic_block bb, const vrange &r) override; virtual bool get_bb_range (vrange &r, const_basic_block bb) override; @@ -91,22 +91,25 @@ protected: vrange *m_undefined; tree m_type; vrange_allocator *m_range_allocator; + bool m_zero_p; void grow (); }; // Initialize a block cache for an ssa_name of type T. -sbr_vector::sbr_vector (tree t, vrange_allocator *allocator) +sbr_vector::sbr_vector (tree t, vrange_allocator *allocator, bool zero_p) : ssa_block_ranges (t) { gcc_checking_assert (TYPE_P (t)); m_type = t; + m_zero_p = zero_p; m_range_allocator = allocator; m_tab_size = last_basic_block_for_fn (cfun) + 1; m_tab = static_cast (allocator->alloc (m_tab_size * sizeof (vrange *))); - memset (m_tab, 0, m_tab_size * sizeof (vrange *)); + if (zero_p) + memset (m_tab, 0, m_tab_size * sizeof (vrange *)); // Create the cached type range. m_varying = m_range_allocator->alloc_vrange (t); @@ -132,7 +135,8 @@ sbr_vector::grow () vrange **t = static_cast (m_range_allocator->alloc (new_size * sizeof (vrange *))); memcpy (t, m_tab, m_tab_size * sizeof (vrange *)); - memset (t + m_tab_size, 0, (new_size - m_tab_size) * sizeof (vrange *)); + if (m_zero_p) + memset (t + m_tab_size, 0, (new_size - m_tab_size) * sizeof (vrange *)); m_tab = t; m_tab_size = new_size; @@ -183,6 +187,50 @@ sbr_vector::bb_range_p (const_basic_block bb) return false; } +// Like an sbr_vector, except it uses a bitmap to manage whetehr vale is set +// or not rather than cleared memory. + +class sbr_lazy_vector : public sbr_vector +{ +public: + sbr_lazy_vector (tree t, vrange_allocator *allocator, bitmap_obstack *bm); + + virtual bool set_bb_range (const_basic_block bb, const vrange &r) override; + virtual bool get_bb_range (vrange &r, const_basic_block bb) override; + virtual bool bb_range_p (const_basic_block bb) override; +protected: + bitmap m_has_value; +}; + +sbr_lazy_vector::sbr_lazy_vector (tree t, vrange_allocator *allocator, + bitmap_obstack *bm) + : sbr_vector (t, allocator, false) +{ + m_has_value = BITMAP_ALLOC (bm); +} + +bool +sbr_lazy_vector::set_bb_range (const_basic_block bb, const vrange &r) +{ + sbr_vector::set_bb_range (bb, r); + bitmap_set_bit (m_has_value, bb->index); + return true; +} + +bool +sbr_lazy_vector::get_bb_range (vrange &r, const_basic_block bb) +{ + if (bitmap_bit_p (m_has_value, bb->index)) + return sbr_vector::get_bb_range (r, bb); + return false; +} + +bool +sbr_lazy_vector::bb_range_p (const_basic_block bb) +{ + return bitmap_bit_p (m_has_value, bb->index); +} + // This class implements the on entry cache via a sparse bitmap. // It uses the quad bit routines to access 4 bits at a time. // A value of 0 (the default) means there is no entry, and a value of @@ -347,21 +395,29 @@ block_range_cache::set_bb_range (tree name, const_basic_block bb, if (!m_ssa_ranges[v]) { - // Use sparse representation if there are too many basic blocks. - if (last_basic_block_for_fn (cfun) > param_evrp_sparse_threshold) + // Use sparse bitmap representation if there are too many basic blocks. + if (last_basic_block_for_fn (cfun) > param_vrp_sparse_threshold) { void *r = m_range_allocator->alloc (sizeof (sbr_sparse_bitmap)); m_ssa_ranges[v] = new (r) sbr_sparse_bitmap (TREE_TYPE (name), m_range_allocator, &m_bitmaps); } - else + else if (last_basic_block_for_fn (cfun) < param_vrp_vector_threshold) { - // Otherwise use the default vector implementation. + // For small CFGs use the basic vector implemntation. void *r = m_range_allocator->alloc (sizeof (sbr_vector)); m_ssa_ranges[v] = new (r) sbr_vector (TREE_TYPE (name), m_range_allocator); } + else + { + // Otherwise use the sparse vector implementation. + void *r = m_range_allocator->alloc (sizeof (sbr_lazy_vector)); + m_ssa_ranges[v] = new (r) sbr_lazy_vector (TREE_TYPE (name), + m_range_allocator, + &m_bitmaps); + } } return m_ssa_ranges[v]->set_bb_range (bb, r); } diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc index 5bba77c7b7b..9d0cc97bf8c 100644 --- a/gcc/gimple-range-gori.cc +++ b/gcc/gimple-range-gori.cc @@ -486,7 +486,7 @@ gori_map::calculate_gori (basic_block bb) else { // Do not process switches if they are too large. - if (EDGE_COUNT (bb->succs) > (unsigned)param_evrp_switch_limit) + if (EDGE_COUNT (bb->succs) > (unsigned)param_vrp_switch_limit) return; gswitch *gs = as_a(stmt); name = gimple_range_ssa_p (gimple_switch_index (gs)); @@ -558,7 +558,7 @@ debug (gori_map &g) // Construct a gori_compute object. gori_compute::gori_compute (int not_executable_flag) - : outgoing (param_evrp_switch_limit), tracer ("GORI ") + : outgoing (param_vrp_switch_limit), tracer ("GORI ") { m_not_executable_flag = not_executable_flag; // Create a boolean_type true and false range. diff --git a/gcc/params.opt b/gcc/params.opt index 823cdb2ff85..66f1c99036a 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -130,14 +130,6 @@ Maximum size (in bytes) of objects tracked bytewise by dead store elimination. Common Joined UInteger Var(param_early_inlining_insns) Init(6) Optimization Param Maximal estimated growth of function body caused by early inlining of single call. --param=evrp-sparse-threshold= -Common Joined UInteger Var(param_evrp_sparse_threshold) Init(800) Optimization Param -Maximum number of basic blocks before EVRP uses a sparse cache. - --param=evrp-switch-limit= -Common Joined UInteger Var(param_evrp_switch_limit) Init(50) Optimization Param -Maximum number of outgoing edges in a switch before EVRP will not process it. - -param=fsm-scale-path-stmts= Common Joined UInteger Var(param_fsm_scale_path_stmts) Init(2) IntegerRange(1, 10) Param Optimization Scale factor to apply to the number of statements in a threading path crossing a loop backedge when comparing to max-jump-thread-duplication-stmts. @@ -1182,4 +1174,16 @@ The maximum factor which the loop vectorizer applies to the cost of statements i Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 1) Param Optimization Enable loop vectorization of floating point inductions. +-param=vrp-sparse-threshold= +Common Joined UInteger Var(param_vrp_sparse_threshold) Init(3000) Optimization Param +Maximum number of basic blocks before VRP uses a sparse bitmap cache. + +-param=vrp-switch-limit= +Common Joined UInteger Var(param_vrp_switch_limit) Init(50) Optimization Param +Maximum number of outgoing edges in a switch before VRP will not process it. + +-param=vrp-vector-threshold= +Common Joined UInteger Var(param_vrp_vector_threshold) Init(250) Optimization Param +Maximum number of basic blocks for VRP to use a basic cache vector. + ; This comment is to ensure we retain the blank line above.