From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27510 invoked by alias); 23 Oct 2019 09:42:59 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 27501 invoked by uid 89); 23 Oct 2019 09:42:59 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-8.5 required=5.0 tests=AWL,BAYES_00,FREEMAIL_FROM,GIT_PATCH_2,GIT_PATCH_3,RCVD_IN_DNSWL_NONE,SPF_PASS autolearn=ham version=3.3.1 spammy=COST, rat X-HELO: mail-il1-f194.google.com Received: from mail-il1-f194.google.com (HELO mail-il1-f194.google.com) (209.85.166.194) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 23 Oct 2019 09:42:57 +0000 Received: by mail-il1-f194.google.com with SMTP id s75so8530845ilc.3 for ; Wed, 23 Oct 2019 02:42:56 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=mime-version:references:in-reply-to:from:date:message-id:subject:to :cc; bh=LRxb+1mdQV8F5hjt8Kyuhwvm23nbzXLbAnNitnfVn7M=; b=XGtVk6MNBos6xnuiP7Kt52K8wkzcyG1Aj9e2nPlN//JHyMqcAHyFOeX0n7DuT5ZfKH wYtnOPp1vFc27+/1V5xbtsgg9HUEDyj1WgJATVUEWVrotguOv7DQ4gzurUsdVUnaV7s+ pubJFW5jhOZiwGwcyLibjsLSN1lGo2EsRAVKSK8lMzMuOnZbBQNHbgcBMc2dgV9IDCr1 IRPkiYYXcCqZHlUmxmvUSGYPYrgES+vF4D3qN6FvZsilJjK3jdU4gfAvwtwgqo0qyIcM RaFhn2KUXElIm4Zd2Qg5SgKUg+WH7K9tkCY+Y2gCK2P8tAFVTNKX+7oZsWM4ms0JNITy NYCQ== MIME-Version: 1.0 References: <20191019062731.GL2116@tucnak> <20191021112430.GT2116@tucnak> <20191022073248.GY2116@tucnak> In-Reply-To: <20191022073248.GY2116@tucnak> From: "Bin.Cheng" Date: Wed, 23 Oct 2019 09:43:00 -0000 Message-ID: Subject: Re: [PATCH] Improve debug info in ivopts optimized loops (PR debug/90231) To: Jakub Jelinek Cc: Richard Biener , Alexandre Oliva , gcc-patches List Content-Type: text/plain; charset="UTF-8" X-IsSubscribed: yes X-SW-Source: 2019-10/txt/msg01630.txt.bz2 On Tue, Oct 22, 2019 at 3:32 PM Jakub Jelinek wrote: > > On Mon, Oct 21, 2019 at 01:24:30PM +0200, Jakub Jelinek wrote: > > So I wonder if for correctness I don't need to add: > > > > if (!use->iv->no_overflow > > && !cand->iv->no_overflow > > && !integer_pow2p (cstep)) > > return NULL_TREE; > > > > with some of the above as comment explaining why. > > > > On the other side, if cand->iv->no_overflow, couldn't we bypass the extra > > precision test? > > Here are these two in patch form. > > 2019-10-22 Jakub Jelinek > > PR debug/90231 > * tree-ssa-loop-ivopts.c (get_debug_computation_at): New function. > (remove_unused_ivs): Use it instead of get_computation_at. When > choosing best candidate, only consider candidates where > get_debug_computation_at actually returns non-NULL. > > --- gcc/tree-ssa-loop-ivopts.c.jj 2019-10-21 14:17:57.598198162 +0200 > +++ gcc/tree-ssa-loop-ivopts.c 2019-10-22 09:30:09.782238157 +0200 > @@ -4089,6 +4089,94 @@ get_computation_at (class loop *loop, gi > return fold_convert (type, aff_combination_to_tree (&aff)); > } > > +/* Like get_computation_at, but try harder, even if the computation > + is more expensive. Intended for debug stmts. */ > + > +static tree > +get_debug_computation_at (class loop *loop, gimple *at, > + struct iv_use *use, struct iv_cand *cand) > +{ > + if (tree ret = get_computation_at (loop, at, use, cand)) > + return ret; > + > + tree ubase = use->iv->base, ustep = use->iv->step; > + tree cbase = cand->iv->base, cstep = cand->iv->step; > + tree var; > + tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); > + widest_int rat; > + > + /* We must have a precision to express the values of use. */ > + if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype)) > + return NULL_TREE; > + > + /* Try to handle the case that get_computation_at doesn't, > + try to express > + use = ubase + (var - cbase) / ratio. */ > + if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep), > + &rat)) > + return NULL_TREE; > + > + bool neg_p = false; > + if (wi::neg_p (rat)) > + { > + if (TYPE_UNSIGNED (ctype)) > + return NULL_TREE; > + neg_p = true; > + rat = wi::neg (rat); > + } > + > + /* If both IVs can wrap around and CAND doesn't have a power of two step, > + it is unsafe. Consider uint16_t CAND with step 9, when wrapping around, > + the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say > + uint8_t with step 3, those values divided by 3 cast to uint8_t will be > + ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */ Interesting, so we can still get correct debug info for iter in such special cases. > + if (!use->iv->no_overflow > + && !cand->iv->no_overflow > + && !integer_pow2p (cstep)) > + return NULL_TREE; > + > + int bits = wi::exact_log2 (rat); > + if (bits == -1) > + bits = wi::floor_log2 (rat) + 1; > + if (!cand->iv->no_overflow > + && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) > + return NULL_TREE; The patch is fine for me. Just for the record, guess we may try to find (by recording information in early phase) the correct/bijection candidate in computing the iv in debuginfo in the future, then those checks would be unnecessary. Thanks, bin > + > + var = var_at_stmt (loop, cand, at); > + > + if (POINTER_TYPE_P (ctype)) > + { > + ctype = unsigned_type_for (ctype); > + cbase = fold_convert (ctype, cbase); > + cstep = fold_convert (ctype, cstep); > + var = fold_convert (ctype, var); > + } > + > + ubase = unshare_expr (ubase); > + cbase = unshare_expr (cbase); > + if (stmt_after_increment (loop, cand, at)) > + var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, > + unshare_expr (cstep)); > + > + var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); > + var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, > + wide_int_to_tree (TREE_TYPE (var), rat)); > + if (POINTER_TYPE_P (utype)) > + { > + var = fold_convert (sizetype, var); > + if (neg_p) > + var = fold_build1 (NEGATE_EXPR, sizetype, var); > + var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); > + } > + else > + { > + var = fold_convert (utype, var); > + var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, > + ubase, var); > + } > + return var; > +} > + > /* Adjust the cost COST for being in loop setup rather than loop body. > If we're optimizing for space, the loop setup overhead is constant; > if we're optimizing for speed, amortize it over the per-iteration cost. > @@ -7523,6 +7611,7 @@ remove_unused_ivs (struct ivopts_data *d > struct iv_use dummy_use; > struct iv_cand *best_cand = NULL, *cand; > unsigned i, best_pref = 0, cand_pref; > + tree comp = NULL_TREE; > > memset (&dummy_use, 0, sizeof (dummy_use)); > dummy_use.iv = info->iv; > @@ -7543,20 +7632,22 @@ remove_unused_ivs (struct ivopts_data *d > ? 1 : 0; > if (best_cand == NULL || best_pref < cand_pref) > { > - best_cand = cand; > - best_pref = cand_pref; > + tree this_comp > + = get_debug_computation_at (data->current_loop, > + SSA_NAME_DEF_STMT (def), > + &dummy_use, cand); > + if (this_comp) > + { > + best_cand = cand; > + best_pref = cand_pref; > + comp = this_comp; > + } > } > } > > if (!best_cand) > continue; > > - tree comp = get_computation_at (data->current_loop, > - SSA_NAME_DEF_STMT (def), > - &dummy_use, best_cand); > - if (!comp) > - continue; > - > if (count > 1) > { > tree vexpr = make_node (DEBUG_EXPR_DECL); > > > Jakub >