From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 17464 invoked by alias); 10 May 2011 09:56:02 -0000 Received: (qmail 17443 invoked by uid 22791); 10 May 2011 09:55:56 -0000 X-SWARE-Spam-Status: No, hits=-5.8 required=5.0 tests=AWL,BAYES_00,RCVD_IN_DNSWL_HI,TW_CF,TW_TM,T_FRT_FREE,T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from cantor.suse.de (HELO mx1.suse.de) (195.135.220.2) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 10 May 2011 09:55:34 +0000 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.221.2]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.suse.de (Postfix) with ESMTP id 72998947EC for ; Tue, 10 May 2011 11:55:32 +0200 (CEST) Date: Tue, 10 May 2011 11:52:00 -0000 From: Richard Guenther To: gcc-patches@gcc.gnu.org Subject: [PATCH] Teach forwprop how to optimize successive conversions Message-ID: User-Agent: Alpine 2.00 (LNX 1167 2008-08-23) MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2011-05/txt/msg00723.txt.bz2 This moves fold-consts conversion folding to forwprop, our tree pattern matcher & optimizer. In addition to what fold-const does this makes us canonicalize unsigned truncations to use bitwise and of a mask, this form is more readily handled by passes like VRP and this also makes it easier to combine with existing bitwise operations (or thise I will introduce with bitfield lowering which this patch incidentially will be required for). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2011-05-10 Richard Guenther * tree-ssa-forwprop.c (combine_conversions): Pattern-match a series of conversions and apply foldings similar to what fold-const does. (tree_ssa_forward_propagate_single_use_vars): Call it. * gcc.dg/tree-ssa/ssa-fre-2.c: Disable forwprop. * gcc.dg/tree-ssa/ssa-fre-3.c: Likewise. * gcc.dg/tree-ssa/ssa-fre-4.c: Likewise. * gcc.dg/tree-ssa/ssa-fre-5.c: Likewise. * gcc.dg/tree-ssa/scev-cast.c: Adjust. Note what transformation applies. Index: gcc/tree-ssa-forwprop.c =================================================================== *** gcc/tree-ssa-forwprop.c (revision 173575) --- gcc/tree-ssa-forwprop.c (working copy) *************** out: *** 1938,1943 **** --- 1938,2103 ---- return false; } + /* Combine two conversions in a row for the second conversion at *GSI. + Returns true if there were any changes made. */ + + static bool + combine_conversions (gimple_stmt_iterator *gsi) + { + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op0, lhs; + enum tree_code code = gimple_assign_rhs_code (stmt); + + gcc_checking_assert (CONVERT_EXPR_CODE_P (code) + || code == FLOAT_EXPR + || code == FIX_TRUNC_EXPR); + + lhs = gimple_assign_lhs (stmt); + op0 = gimple_assign_rhs1 (stmt); + if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (op0))) + { + gimple_assign_set_rhs_code (stmt, TREE_CODE (op0)); + return true; + } + + if (TREE_CODE (op0) != SSA_NAME) + return false; + + def_stmt = SSA_NAME_DEF_STMT (op0); + if (!is_gimple_assign (def_stmt)) + return false; + + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt))) + { + tree defop0 = gimple_assign_rhs1 (def_stmt); + tree type = TREE_TYPE (lhs); + tree inside_type = TREE_TYPE (defop0); + tree inter_type = TREE_TYPE (op0); + int inside_int = INTEGRAL_TYPE_P (inside_type); + int inside_ptr = POINTER_TYPE_P (inside_type); + int inside_float = FLOAT_TYPE_P (inside_type); + int inside_vec = TREE_CODE (inside_type) == VECTOR_TYPE; + unsigned int inside_prec = TYPE_PRECISION (inside_type); + int inside_unsignedp = TYPE_UNSIGNED (inside_type); + int inter_int = INTEGRAL_TYPE_P (inter_type); + int inter_ptr = POINTER_TYPE_P (inter_type); + int inter_float = FLOAT_TYPE_P (inter_type); + int inter_vec = TREE_CODE (inter_type) == VECTOR_TYPE; + unsigned int inter_prec = TYPE_PRECISION (inter_type); + int inter_unsignedp = TYPE_UNSIGNED (inter_type); + int final_int = INTEGRAL_TYPE_P (type); + int final_ptr = POINTER_TYPE_P (type); + int final_float = FLOAT_TYPE_P (type); + int final_vec = TREE_CODE (type) == VECTOR_TYPE; + unsigned int final_prec = TYPE_PRECISION (type); + int final_unsignedp = TYPE_UNSIGNED (type); + + /* In addition to the cases of two conversions in a row + handled below, if we are converting something to its own + type via an object of identical or wider precision, neither + conversion is needed. */ + if (useless_type_conversion_p (type, inside_type) + && (((inter_int || inter_ptr) && final_int) + || (inter_float && final_float)) + && inter_prec >= final_prec) + { + gimple_assign_set_rhs1 (stmt, unshare_expr (defop0)); + gimple_assign_set_rhs_code (stmt, TREE_CODE (defop0)); + update_stmt (stmt); + return true; + } + + /* Likewise, if the intermediate and initial types are either both + float or both integer, we don't need the middle conversion if the + former is wider than the latter and doesn't change the signedness + (for integers). Avoid this if the final type is a pointer since + then we sometimes need the middle conversion. Likewise if the + final type has a precision not equal to the size of its mode. */ + if (((inter_int && inside_int) + || (inter_float && inside_float) + || (inter_vec && inside_vec)) + && inter_prec >= inside_prec + && (inter_float || inter_vec + || inter_unsignedp == inside_unsignedp) + && ! (final_prec != GET_MODE_BITSIZE (TYPE_MODE (type)) + && TYPE_MODE (type) == TYPE_MODE (inter_type)) + && ! final_ptr + && (! final_vec || inter_prec == inside_prec)) + { + gimple_assign_set_rhs1 (stmt, defop0); + update_stmt (stmt); + return true; + } + + /* If we have a sign-extension of a zero-extended value, we can + replace that by a single zero-extension. */ + if (inside_int && inter_int && final_int + && inside_prec < inter_prec && inter_prec < final_prec + && inside_unsignedp && !inter_unsignedp) + { + gimple_assign_set_rhs1 (stmt, defop0); + update_stmt (stmt); + return true; + } + + /* Two conversions in a row are not needed unless: + - some conversion is floating-point (overstrict for now), or + - some conversion is a vector (overstrict for now), or + - the intermediate type is narrower than both initial and + final, or + - the intermediate type and innermost type differ in signedness, + and the outermost type is wider than the intermediate, or + - the initial type is a pointer type and the precisions of the + intermediate and final types differ, or + - the final type is a pointer type and the precisions of the + initial and intermediate types differ. */ + if (! inside_float && ! inter_float && ! final_float + && ! inside_vec && ! inter_vec && ! final_vec + && (inter_prec >= inside_prec || inter_prec >= final_prec) + && ! (inside_int && inter_int + && inter_unsignedp != inside_unsignedp + && inter_prec < final_prec) + && ((inter_unsignedp && inter_prec > inside_prec) + == (final_unsignedp && final_prec > inter_prec)) + && ! (inside_ptr && inter_prec != final_prec) + && ! (final_ptr && inside_prec != inter_prec) + && ! (final_prec != GET_MODE_BITSIZE (TYPE_MODE (type)) + && TYPE_MODE (type) == TYPE_MODE (inter_type))) + { + gimple_assign_set_rhs1 (stmt, defop0); + update_stmt (stmt); + return true; + } + + /* A truncation to an unsigned type should be canonicalized as + bitwise and of a mask. */ + if (final_int && inter_int && inside_int + && final_prec == inside_prec + && final_prec > inter_prec + && inter_unsignedp) + { + tree tem; + tem = fold_build2 (BIT_AND_EXPR, inside_type, + defop0, + double_int_to_tree + (inside_type, double_int_mask (inter_prec))); + if (!useless_type_conversion_p (type, inside_type)) + { + tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE, true, + GSI_SAME_STMT); + gimple_assign_set_rhs1 (stmt, tem); + } + else + gimple_assign_set_rhs_from_tree (gsi, tem); + update_stmt (gsi_stmt (*gsi)); + return true; + } + } + + return false; + } + /* Main entry point for the forward propagation optimizer. */ static unsigned int *************** tree_ssa_forward_propagate_single_use_va *** 2061,2066 **** --- 2221,2233 ---- cfg_changed |= associate_plusminus (stmt); gsi_next (&gsi); } + else if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) + || gimple_assign_rhs_code (stmt) == FLOAT_EXPR + || gimple_assign_rhs_code (stmt) == FIX_TRUNC_EXPR) + { + if (!combine_conversions (&gsi)) + gsi_next (&gsi); + } else gsi_next (&gsi); } Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-2.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-2.c (revision 173575) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-2.c (working copy) *************** *** 1,5 **** /* { dg-do compile } */ ! /* { dg-options "-O -fdump-tree-fre1-details" } */ /* From PR14287. */ --- 1,5 ---- /* { dg-do compile } */ ! /* { dg-options "-O -fno-tree-forwprop -fdump-tree-fre1-details" } */ /* From PR14287. */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c (revision 173575) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c (working copy) *************** *** 6,12 **** When the condition is true, we distribute "(int) (a + b)" as "(int) a + (int) b", otherwise we keep the original. */ /* { dg-do compile { target { { ! mips64 } && { ! spu-*-* } } } } */ ! /* { dg-options "-O -fwrapv -fdump-tree-fre1-details" } */ /* From PR14844. */ --- 6,12 ---- When the condition is true, we distribute "(int) (a + b)" as "(int) a + (int) b", otherwise we keep the original. */ /* { dg-do compile { target { { ! mips64 } && { ! spu-*-* } } } } */ ! /* { dg-options "-O -fno-tree-forwprop -fwrapv -fdump-tree-fre1-details" } */ /* From PR14844. */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c (revision 173575) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-4.c (working copy) *************** *** 1,7 **** /* If the target returns false for TARGET_PROMOTE_PROTOTYPES, then there will be no casts for FRE to eliminate and the test will fail. */ /* { dg-do compile { target i?86-*-* x86_64-*-* hppa*-*-* mips*-*-* m68k*-*-* } } */ ! /* { dg-options "-O -fdump-tree-fre1-details" } */ /* From PR21608. */ --- 1,7 ---- /* If the target returns false for TARGET_PROMOTE_PROTOTYPES, then there will be no casts for FRE to eliminate and the test will fail. */ /* { dg-do compile { target i?86-*-* x86_64-*-* hppa*-*-* mips*-*-* m68k*-*-* } } */ ! /* { dg-options "-O -fno-tree-forwprop -fdump-tree-fre1-details" } */ /* From PR21608. */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-5.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-5.c (revision 173575) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-5.c (working copy) *************** *** 1,5 **** /* { dg-do compile } */ ! /* { dg-options "-O -fdump-tree-fre1-details" } */ /* From PR19792. */ --- 1,5 ---- /* { dg-do compile } */ ! /* { dg-options "-O -fno-tree-forwprop -fdump-tree-fre1-details" } */ /* From PR19792. */ Index: gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c (revision 173575) --- gcc/testsuite/gcc.dg/tree-ssa/scev-cast.c (working copy) *************** *** 3,26 **** /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ ! void blas (char xxx); void blau (unsigned char xxx); void tst(void) { unsigned i; ! for (i = 0; i < 128; i++) /* This cast to char has to be preserved. */ ! blas ((char) i); ! for (i = 0; i < 127; i++) /* And this one does not. */ ! blas ((char) i); ! for (i = 0; i < 255; i++) /* This cast is not necessary. */ blau ((unsigned char) i); - for (i = 0; i < 256; i++) - blau ((unsigned char) i); /* This one is necessary. */ } ! /* { dg-final { scan-tree-dump-times "= \\(unsigned char\\)" 1 "optimized" } } */ ! /* { dg-final { scan-tree-dump-times "= \\(char\\)" 1 "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ --- 3,28 ---- /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ ! void blas (signed char xxx); void blau (unsigned char xxx); void tst(void) { unsigned i; ! for (i = 0; i < 129; i++) /* This truncation to char has to be preserved. */ ! blas ((signed char) i); ! for (i = 0; i < 128; i++) /* This one is not necessary, but nothing eliminates it. */ ! blas ((signed char) i); ! for (i = 0; i < 127; i++) /* This one is not necessary, IVOPTS eliminates it. */ ! blas ((signed char) i); ! for (i = 0; i < 256; i++) /* This one is not necessary, VRP eliminates it. */ ! blau ((unsigned char) i); ! for (i = 0; i < 257; i++) /* This one is necessary. */ blau ((unsigned char) i); } ! /* { dg-final { scan-tree-dump-times "& 255" 1 "optimized" } } */ ! /* { dg-final { scan-tree-dump-times "= \\(signed char\\)" 2 "optimized" } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */