From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ed1-x52a.google.com (mail-ed1-x52a.google.com [IPv6:2a00:1450:4864:20::52a]) by sourceware.org (Postfix) with ESMTPS id AC56F3858030 for ; Mon, 8 Nov 2021 11:02:30 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org AC56F3858030 Received: by mail-ed1-x52a.google.com with SMTP id w1so61068951edd.10 for ; Mon, 08 Nov 2021 03:02:30 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to; bh=px5w9Gmj1l/bL+qm+ItF+2nuax7ySJgN9E00cPwGrSc=; b=BeONtOjhnk6dVEt8G+J1SBV9iAV9juVnoHNBceMXA0ZsqSWczSMskOZTfocXg/tdey DR6Np7gEgjgSnr0Y2mvwHKG5xaESz9u9kPh3lfzCz/wGClM8aLSiKDD1MbGT8w2llpkU texjlPyopFsKnkF4Z1Pm4Z3/8ekuayl4+Gc5WrfQdFgAmXJEz3OTFwa/UAsJ+mLLxG91 zvLxyuKHKf/j8rmC4iWlXuqguDpCCc1BZsdf0/tVrnuhfAFsQC+RNormm979ETWiElec 7pQ2IGntX3m8rXyu3RA7q04m37tZYj2uJ0CBZ+xd6V6z8s4L2+Y+0hMKm7AxsePeCOd0 ylHw== X-Gm-Message-State: AOAM532LWOSkS4q+yBXAfe0mAz3FkS7qIyCXpS8GpmhMJc8eHwAKINXH lYfFNr3nlYxuQwaaW986fju44Hi6aEI7PUtul+M= X-Google-Smtp-Source: ABdhPJxppdiRxQeyPbUUfGpF3sV3h1o9Pdm4+jTYiZOBt2mhSX5x59H0Qt6mDf7E1YadPXxmLB1TWpf0N5bVVJLNqbM= X-Received: by 2002:a05:6402:50d0:: with SMTP id h16mr30856166edb.70.1636369349485; Mon, 08 Nov 2021 03:02:29 -0800 (PST) MIME-Version: 1.0 References: In-Reply-To: From: Richard Biener Date: Mon, 8 Nov 2021 12:02:18 +0100 Message-ID: Subject: Re: [PATCH] vect: Keep scalar costs around longer To: Richard Sandiford , GCC Patches Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-8.3 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Nov 2021 11:02:32 -0000 On Mon, Nov 8, 2021 at 11:47 AM Richard Sandiford via Gcc-patches wrote: > > The scalar costs for a loop are fleeting, with only the final > single_scalar_iteration_cost being kept for later comparison. > This patch replaces single_scalar_iteration_cost with the cost > structure, so that (with later patches) it's possible for targets > to examine other target-specific cost properties as well. This will > be done by passing the scalar costs to hooks where appropriate; > targets shouldn't try to read the information directly from > loop_vec_infos. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? OK. I wondered if we can put this cost into vec_info_shared but we seem to look at per-stmt info in vect_compute_single_scalar_iteration_cost though quite possibly the relevant bits should not change. So we could eventually compute it lazily once. Something to think about later. Richard. > Richard > > > gcc/ > * tree-vectorizer.h (_loop_vec_info::scalar_costs): New member > variable. > (_loop_vec_info::single_scalar_iteration_cost): Delete. > (LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST): Delete. > (vector_costs::total_cost): New function. > * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update > after above changes. > (_loop_vec_info::~_loop_vec_info): Delete scalar_costs. > (vect_compute_single_scalar_iteration_cost): Store the costs > in loop_vinfo->scalar_costs. > (vect_estimate_min_profitable_iters): Get the scalar cost from > loop_vinfo->scalar_costs. > --- > gcc/tree-vect-loop.c | 17 ++++++----------- > gcc/tree-vectorizer.h | 17 +++++++++++++---- > 2 files changed, 19 insertions(+), 15 deletions(-) > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index c9ee2e15e35..887275a5071 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -822,6 +822,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) > num_iters_unchanged (NULL_TREE), > num_iters_assumptions (NULL_TREE), > vector_costs (nullptr), > + scalar_costs (nullptr), > th (0), > versioning_threshold (0), > vectorization_factor (0), > @@ -839,7 +840,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) > ivexpr_map (NULL), > scan_map (NULL), > slp_unrolling_factor (1), > - single_scalar_iteration_cost (0), > inner_loop_cost_factor (param_vect_inner_loop_cost_factor), > vectorizable (false), > can_use_partial_vectors_p (param_vect_partial_vector_usage != 0), > @@ -931,6 +931,7 @@ _loop_vec_info::~_loop_vec_info () > delete ivexpr_map; > delete scan_map; > epilogue_vinfos.release (); > + delete scalar_costs; > delete vector_costs; > > /* When we release an epiloge vinfo that we do not intend to use > @@ -1292,20 +1293,15 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) > } > > /* Now accumulate cost. */ > - vector_costs *target_cost_data = init_cost (loop_vinfo, true); > + loop_vinfo->scalar_costs = init_cost (loop_vinfo, true); > stmt_info_for_cost *si; > int j; > FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), > j, si) > - (void) add_stmt_cost (target_cost_data, si->count, > + (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count, > si->kind, si->stmt_info, si->vectype, > si->misalign, si->where); > - unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0; > - finish_cost (target_cost_data, &prologue_cost, &body_cost, > - &epilogue_cost); > - delete target_cost_data; > - LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) > - = prologue_cost + body_cost + epilogue_cost; > + loop_vinfo->scalar_costs->finish_cost (); > } > > > @@ -3868,8 +3864,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, > TODO: Consider assigning different costs to different scalar > statements. */ > > - scalar_single_iter_cost > - = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo); > + scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost (); > > /* Add additional cost for the peeled instructions in prologue and epilogue > loop. (For fully-masked loops there will be no peeling.) > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 0e3aad590e8..8dba3a34aa9 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -590,6 +590,9 @@ public: > /* The cost of the vector code. */ > class vector_costs *vector_costs; > > + /* The cost of the scalar code. */ > + class vector_costs *scalar_costs; > + > /* Threshold of number of iterations below which vectorization will not be > performed. It is calculated from MIN_PROFITABLE_ITERS and > param_min_vect_loop_bound. */ > @@ -721,9 +724,6 @@ public: > applied to the loop, i.e., no unrolling is needed, this is 1. */ > poly_uint64 slp_unrolling_factor; > > - /* Cost of a single scalar iteration. */ > - int single_scalar_iteration_cost; > - > /* The factor used to over weight those statements in an inner loop > relative to the loop being vectorized. */ > unsigned int inner_loop_cost_factor; > @@ -843,7 +843,6 @@ public: > #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling > #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store > #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec > -#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost > #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info > #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond > #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor > @@ -1438,6 +1437,7 @@ public: > unsigned int body_cost () const; > unsigned int epilogue_cost () const; > unsigned int outside_cost () const; > + unsigned int total_cost () const; > > protected: > unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location, > @@ -1508,6 +1508,15 @@ vector_costs::outside_cost () const > return prologue_cost () + epilogue_cost (); > } > > +/* Return the cost of the prologue, body and epilogue code > + (in abstract units). */ > + > +inline unsigned int > +vector_costs::total_cost () const > +{ > + return body_cost () + outside_cost (); > +} > + > #define VECT_MAX_COST 1000 > > /* The maximum number of intermediate steps required in multi-step type > -- > 2.25.1 >