From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 35095 invoked by alias); 22 Oct 2019 07:33:02 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 34957 invoked by uid 89); 22 Oct 2019 07:33:02 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-7.6 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_2,GIT_PATCH_3 autolearn=ham version=3.3.1 spammy= X-HELO: us-smtp-1.mimecast.com Received: from us-smtp-delivery-1.mimecast.com (HELO us-smtp-1.mimecast.com) (205.139.110.120) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 22 Oct 2019 07:33:00 +0000 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1571729578; h=from:from:reply-to:reply-to:subject:subject:date:date: message-id:message-id:to:to:cc:cc:mime-version:mime-version: content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=UFIpjkYX++yYJYJkyR6tQfOFPpgbGoM4GSxV5GrescU=; b=eiF00IjfnCIOGjv3J8+zO9lDzeg2KxWCNj3UfknGA09Rp4sVBRzWCTuXNtHZ6kwHupaEkO IZQGmUTlDk6xliqHGJGMybwm2Owyj00NqPxCcy2bpkgT4UfYcgyxiLbuPTpaWTNuYlO6Nc nsVFXBt+eG7mzRUKTwIbSIgrozpuQSs= Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-74-TU176cXQOAKLm0VKCKUVTw-1; Tue, 22 Oct 2019 03:32:55 -0400 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 7F25A1005509; Tue, 22 Oct 2019 07:32:54 +0000 (UTC) Received: from tucnak.zalov.cz (unknown [10.36.118.135]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 0CAA05C1D4; Tue, 22 Oct 2019 07:32:53 +0000 (UTC) Received: from tucnak.zalov.cz (localhost [127.0.0.1]) by tucnak.zalov.cz (8.15.2/8.15.2) with ESMTP id x9M7WpLD013262; Tue, 22 Oct 2019 09:32:51 +0200 Received: (from jakub@localhost) by tucnak.zalov.cz (8.15.2/8.15.2/Submit) id x9M7WnES013261; Tue, 22 Oct 2019 09:32:49 +0200 Date: Tue, 22 Oct 2019 07:43:00 -0000 From: Jakub Jelinek To: "Bin.Cheng" Cc: Richard Biener , Alexandre Oliva , gcc-patches List Subject: Re: [PATCH] Improve debug info in ivopts optimized loops (PR debug/90231) Message-ID: <20191022073248.GY2116@tucnak> Reply-To: Jakub Jelinek References: <20191019062731.GL2116@tucnak> <20191021112430.GT2116@tucnak> MIME-Version: 1.0 In-Reply-To: <20191021112430.GT2116@tucnak> User-Agent: Mutt/1.11.3 (2019-02-01) X-Mimecast-Spam-Score: 0 Content-Type: text/plain; charset=WINDOWS-1252 Content-Transfer-Encoding: quoted-printable Content-Disposition: inline X-IsSubscribed: yes X-SW-Source: 2019-10/txt/msg01543.txt.bz2 On Mon, Oct 21, 2019 at 01:24:30PM +0200, Jakub Jelinek wrote: > So I wonder if for correctness I don't need to add: >=20 > if (!use->iv->no_overflow > && !cand->iv->no_overflow > && !integer_pow2p (cstep)) > return NULL_TREE; >=20 > with some of the above as comment explaining why. >=20 > On the other side, if cand->iv->no_overflow, couldn't we bypass the extra > precision test? Here are these two in patch form. 2019-10-22 Jakub Jelinek PR debug/90231 * tree-ssa-loop-ivopts.c (get_debug_computation_at): New function. (remove_unused_ivs): Use it instead of get_computation_at. When choosing best candidate, only consider candidates where get_debug_computation_at actually returns non-NULL. --- gcc/tree-ssa-loop-ivopts.c.jj 2019-10-21 14:17:57.598198162 +0200 +++ gcc/tree-ssa-loop-ivopts.c 2019-10-22 09:30:09.782238157 +0200 @@ -4089,6 +4089,94 @@ get_computation_at (class loop *loop, gi return fold_convert (type, aff_combination_to_tree (&aff)); } =20 +/* Like get_computation_at, but try harder, even if the computation + is more expensive. Intended for debug stmts. */ + +static tree +get_debug_computation_at (class loop *loop, gimple *at, + struct iv_use *use, struct iv_cand *cand) +{ + if (tree ret =3D get_computation_at (loop, at, use, cand)) + return ret; + + tree ubase =3D use->iv->base, ustep =3D use->iv->step; + tree cbase =3D cand->iv->base, cstep =3D cand->iv->step; + tree var; + tree utype =3D TREE_TYPE (ubase), ctype =3D TREE_TYPE (cbase); + widest_int rat; + + /* We must have a precision to express the values of use. */ + if (TYPE_PRECISION (utype) >=3D TYPE_PRECISION (ctype)) + return NULL_TREE; + + /* Try to handle the case that get_computation_at doesn't, + try to express + use =3D ubase + (var - cbase) / ratio. */ + if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep= ), + &rat)) + return NULL_TREE; + + bool neg_p =3D false; + if (wi::neg_p (rat)) + { + if (TYPE_UNSIGNED (ctype)) + return NULL_TREE; + neg_p =3D true; + rat =3D wi::neg (rat); + } + + /* If both IVs can wrap around and CAND doesn't have a power of two step, + it is unsafe. Consider uint16_t CAND with step 9, when wrapping arou= nd, + the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say + uint8_t with step 3, those values divided by 3 cast to uint8_t will be + ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.= */ + if (!use->iv->no_overflow + && !cand->iv->no_overflow + && !integer_pow2p (cstep)) + return NULL_TREE; + + int bits =3D wi::exact_log2 (rat); + if (bits =3D=3D -1) + bits =3D wi::floor_log2 (rat) + 1; + if (!cand->iv->no_overflow + && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) + return NULL_TREE; + + var =3D var_at_stmt (loop, cand, at); + + if (POINTER_TYPE_P (ctype)) + { + ctype =3D unsigned_type_for (ctype); + cbase =3D fold_convert (ctype, cbase); + cstep =3D fold_convert (ctype, cstep); + var =3D fold_convert (ctype, var); + } + + ubase =3D unshare_expr (ubase); + cbase =3D unshare_expr (cbase); + if (stmt_after_increment (loop, cand, at)) + var =3D fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, + unshare_expr (cstep)); + + var =3D fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); + var =3D fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, + wide_int_to_tree (TREE_TYPE (var), rat)); + if (POINTER_TYPE_P (utype)) + { + var =3D fold_convert (sizetype, var); + if (neg_p) + var =3D fold_build1 (NEGATE_EXPR, sizetype, var); + var =3D fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); + } + else + { + var =3D fold_convert (utype, var); + var =3D fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, + ubase, var); + } + return var; +} + /* Adjust the cost COST for being in loop setup rather than loop body. If we're optimizing for space, the loop setup overhead is constant; if we're optimizing for speed, amortize it over the per-iteration cost. @@ -7523,6 +7611,7 @@ remove_unused_ivs (struct ivopts_data *d struct iv_use dummy_use; struct iv_cand *best_cand =3D NULL, *cand; unsigned i, best_pref =3D 0, cand_pref; + tree comp =3D NULL_TREE; =20 memset (&dummy_use, 0, sizeof (dummy_use)); dummy_use.iv =3D info->iv; @@ -7543,20 +7632,22 @@ remove_unused_ivs (struct ivopts_data *d ? 1 : 0; if (best_cand =3D=3D NULL || best_pref < cand_pref) { - best_cand =3D cand; - best_pref =3D cand_pref; + tree this_comp + =3D get_debug_computation_at (data->current_loop, + SSA_NAME_DEF_STMT (def), + &dummy_use, cand); + if (this_comp) + { + best_cand =3D cand; + best_pref =3D cand_pref; + comp =3D this_comp; + } } } =20 if (!best_cand) continue; =20 - tree comp =3D get_computation_at (data->current_loop, - SSA_NAME_DEF_STMT (def), - &dummy_use, best_cand); - if (!comp) - continue; - if (count > 1) { tree vexpr =3D make_node (DEBUG_EXPR_DECL); Jakub