From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 2266 invoked by alias); 27 Oct 2011 23:36:12 -0000 Received: (qmail 3978 invoked by uid 22791); 27 Oct 2011 23:33:58 -0000 X-SWARE-Spam-Status: No, hits=0.6 required=5.0 tests=AWL,BAYES_50,KAM_STOCKGEN,RP_MATCHES_RCVD,TW_TM X-Spam-Check-By: sourceware.org Received: from smtp25.services.sfr.fr (HELO smtp25.services.sfr.fr) (93.17.128.120) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 27 Oct 2011 23:33:41 +0000 Received: from filter.sfr.fr (localhost [127.0.0.1]) by msfrf2512.sfr.fr (SMTP Server) with ESMTP id 879957000044; Fri, 28 Oct 2011 01:33:40 +0200 (CEST) Received: from gimli.local (145.15.72.86.rev.sfr.net [86.72.15.145]) by msfrf2512.sfr.fr (SMTP Server) with ESMTP id E1BD97000040; Fri, 28 Oct 2011 01:33:39 +0200 (CEST) X-SFR-UUID: 20111027233339924.E1BD97000040@msfrf2512.sfr.fr Content-Type: multipart/mixed; boundary="===============6664296882618565386==" MIME-Version: 1.0 From: Mikael Morin To: gfortran , GCC patches Message-ID: <20111027233339.18581.86093@gimli.local> In-Reply-To: <20111027232818.18581.901@gimli.local> References: <20111027232818.18581.901@gimli.local> Subject: [Patch, fortran] [62..66/66] inline sum and product: Inline sum. Date: Thu, 27 Oct 2011 23:36:00 -0000 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2011-10/txt/msg02566.txt.bz2 --===============6664296882618565386== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-length: 442 This is the purpose of all the serie of patches: inline sum and product. Patches 62..65, possibly don't deserve a patch of their own, they just make the changes in gfc_conv_intrinsic_arith easier to read. The combined diff (context, ignoring indent changes) also attached here. Patch 62: Change conditions. Patch 63: Update argument handling. Patch 64: Change loop usage. Patch 65: Change gfc_se structs initializers. Patch 66: Inline sum. --===============6664296882618565386== Content-Type: text/x-diff; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="pr43829-62..66.diff" Content-length: 13917 diff --git a/trans-array.c b/trans-array.c index acd9aec18fefc6631ad443c5dff2d3014a9d6565..262743d0d3779b4f02a63d604bd9a621401ae84e 100644 *** a/trans-array.c --- b/trans-array.c *************** gfc_conv_expr_descriptor (gfc_se * se, g *** 6187,6193 **** gcc_assert ((expr->value.function.esym != NULL && expr->value.function.esym->attr.elemental) || (expr->value.function.isym != NULL ! && expr->value.function.isym->elemental)); else gcc_assert (ss_type == GFC_SS_INTRINSIC); --- 6187,6194 ---- gcc_assert ((expr->value.function.esym != NULL && expr->value.function.esym->attr.elemental) || (expr->value.function.isym != NULL ! && expr->value.function.isym->elemental) ! || gfc_inline_intrinsic_function_p (expr)); else gcc_assert (ss_type == GFC_SS_INTRINSIC); diff --git a/trans-intrinsic.c b/trans-intrinsic.c index 3cdc1e0970aa4f787a60c96e50572a0bb1cb2a6e..973f912a624bdd442b3851471432c358118438d6 100644 *** a/trans-intrinsic.c --- b/trans-intrinsic.c *************** gfc_conv_intrinsic_count (gfc_se * se, g *** 2557,2562 **** --- 2557,2576 ---- se->expr = resvar; } + + /* Update given gfc_se to have ss component pointing to the nested gfc_ss + struct and return the corresponding loopinfo. */ + + static gfc_loopinfo * + enter_nested_loop (gfc_se *se) + { + se->ss = se->ss->nested_ss; + gcc_assert (se->ss == se->ss->loop->ss); + + return se->ss->loop; + } + + /* Inline implementation of the sum and product intrinsics. */ static void gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2568,2587 **** stmtblock_t body; stmtblock_t block; tree tmp; ! gfc_loopinfo loop; ! gfc_actual_arglist *actual; ! gfc_ss *arrayss; ! gfc_ss *maskss; gfc_se arrayse; gfc_se maskse; gfc_expr *arrayexpr; gfc_expr *maskexpr; ! if (se->ss) { ! gfc_conv_intrinsic_funcall (se, expr); ! return; } type = gfc_typenode_for_spec (&expr->ts); /* Initialize the result. */ --- 2582,2604 ---- stmtblock_t body; stmtblock_t block; tree tmp; ! gfc_loopinfo loop, *ploop; ! gfc_actual_arglist *arg_array, *arg_mask; ! gfc_ss *arrayss = NULL; ! gfc_ss *maskss = NULL; gfc_se arrayse; gfc_se maskse; + gfc_se *parent_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; ! if (expr->rank > 0) { ! gcc_assert (gfc_inline_intrinsic_function_p (expr)); ! parent_se = se; } + else + parent_se = NULL; type = gfc_typenode_for_spec (&expr->ts); /* Initialize the result. */ *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2608,2630 **** gfc_add_modify (&se->pre, resvar, tmp); ! /* Walk the arguments. */ ! actual = expr->value.function.actual; ! arrayexpr = actual->expr; ! arrayss = gfc_walk_expr (arrayexpr); ! gcc_assert (arrayss != gfc_ss_terminator); if (op == NE_EXPR || norm2) /* PARITY and NORM2. */ maskexpr = NULL; else { ! actual = actual->next->next; ! gcc_assert (actual); ! maskexpr = actual->expr; } ! if (maskexpr && maskexpr->rank != 0) { maskss = gfc_walk_expr (maskexpr); gcc_assert (maskss != gfc_ss_terminator); --- 2625,2651 ---- gfc_add_modify (&se->pre, resvar, tmp); ! arg_array = expr->value.function.actual; ! ! arrayexpr = arg_array->expr; if (op == NE_EXPR || norm2) /* PARITY and NORM2. */ maskexpr = NULL; else { ! arg_mask = arg_array->next->next; ! gcc_assert (arg_mask != NULL); ! maskexpr = arg_mask->expr; } ! if (expr->rank == 0) ! { ! /* Walk the arguments. */ ! arrayss = gfc_walk_expr (arrayexpr); ! gcc_assert (arrayss != gfc_ss_terminator); ! ! if (maskexpr && maskexpr->rank > 0) { maskss = gfc_walk_expr (maskexpr); gcc_assert (maskss != gfc_ss_terminator); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2635,2641 **** /* Initialize the scalarizer. */ gfc_init_loopinfo (&loop); gfc_add_ss_to_loop (&loop, arrayss); ! if (maskss) gfc_add_ss_to_loop (&loop, maskss); /* Initialize the loop. */ --- 2656,2662 ---- /* Initialize the scalarizer. */ gfc_init_loopinfo (&loop); gfc_add_ss_to_loop (&loop, arrayss); ! if (maskexpr && maskexpr->rank > 0) gfc_add_ss_to_loop (&loop, maskss); /* Initialize the loop. */ *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2643,2658 **** gfc_conv_loop_setup (&loop, &expr->where); gfc_mark_ss_chain_used (arrayss, 1); ! if (maskss) gfc_mark_ss_chain_used (maskss, 1); /* Generate the loop body. */ ! gfc_start_scalarized_body (&loop, &body); /* If we have a mask, only add this element if the mask is set. */ ! if (maskss) { ! gfc_init_se (&maskse, NULL); ! gfc_copy_loopinfo_to_se (&maskse, &loop); maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); --- 2664,2689 ---- gfc_conv_loop_setup (&loop, &expr->where); gfc_mark_ss_chain_used (arrayss, 1); ! if (maskexpr && maskexpr->rank > 0) gfc_mark_ss_chain_used (maskss, 1); + + ploop = &loop; + } + else + /* All the work has been done in the parent loops. */ + ploop = enter_nested_loop (se); + + gcc_assert (ploop); + /* Generate the loop body. */ ! gfc_start_scalarized_body (ploop, &body); /* If we have a mask, only add this element if the mask is set. */ ! if (maskexpr && maskexpr->rank > 0) { ! gfc_init_se (&maskse, parent_se); ! gfc_copy_loopinfo_to_se (&maskse, ploop); ! if (expr->rank == 0) maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2663,2670 **** gfc_init_block (&block); /* Do the actual summation/product. */ ! gfc_init_se (&arrayse, NULL); ! gfc_copy_loopinfo_to_se (&arrayse, &loop); arrayse.ss = arrayss; gfc_conv_expr_val (&arrayse, arrayexpr); gfc_add_block_to_block (&block, &arrayse.pre); --- 2694,2702 ---- gfc_init_block (&block); /* Do the actual summation/product. */ ! gfc_init_se (&arrayse, parent_se); ! gfc_copy_loopinfo_to_se (&arrayse, ploop); ! if (expr->rank == 0) arrayse.ss = arrayss; gfc_conv_expr_val (&arrayse, arrayexpr); gfc_add_block_to_block (&block, &arrayse.pre); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2740,2746 **** gfc_add_block_to_block (&block, &arrayse.post); ! if (maskss) { /* We enclose the above in if (mask) {...} . */ --- 2772,2778 ---- gfc_add_block_to_block (&block, &arrayse.post); ! if (maskexpr && maskexpr->rank > 0) { /* We enclose the above in if (mask) {...} . */ *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2752,2781 **** tmp = gfc_finish_block (&block); gfc_add_expr_to_block (&body, tmp); ! gfc_trans_scalarizing_loops (&loop, &body); /* For a scalar mask, enclose the loop in an if statement. */ ! if (maskexpr && maskss == NULL) { - gfc_init_se (&maskse, NULL); - gfc_conv_expr_val (&maskse, maskexpr); gfc_init_block (&block); ! gfc_add_block_to_block (&block, &loop.pre); ! gfc_add_block_to_block (&block, &loop.post); tmp = gfc_finish_block (&block); tmp = build3_v (COND_EXPR, maskse.expr, tmp, build_empty_stmt (input_location)); gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&se->pre, &block); } else { ! gfc_add_block_to_block (&se->pre, &loop.pre); ! gfc_add_block_to_block (&se->pre, &loop.post); } ! gfc_cleanup_loop (&loop); if (norm2) { --- 2784,2826 ---- tmp = gfc_finish_block (&block); gfc_add_expr_to_block (&body, tmp); ! gfc_trans_scalarizing_loops (ploop, &body); /* For a scalar mask, enclose the loop in an if statement. */ ! if (maskexpr && maskexpr->rank == 0) { gfc_init_block (&block); ! gfc_add_block_to_block (&block, &ploop->pre); ! gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); + if (expr->rank > 0) + { + tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp, + build_empty_stmt (input_location)); + gfc_advance_se_ss_chain (se); + } + else + { + gcc_assert (expr->rank == 0); + gfc_init_se (&maskse, NULL); + gfc_conv_expr_val (&maskse, maskexpr); tmp = build3_v (COND_EXPR, maskse.expr, tmp, build_empty_stmt (input_location)); + } + gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&se->pre, &block); + gcc_assert (se->post.head == NULL); } else { ! gfc_add_block_to_block (&se->pre, &ploop->pre); ! gfc_add_block_to_block (&se->pre, &ploop->post); } ! if (expr->rank == 0) ! gfc_cleanup_loop (ploop); if (norm2) { *************** walk_inline_intrinsic_transpose (gfc_ss *** 6795,6806 **** --- 6840,6966 ---- } + /* Move the given dimension of the given gfc_ss list to a nested gfc_ss list. + This has the side effect of reversing the nested list, so there is no + need to call gfc_reverse_ss on it (the given list is assumed not to be + reversed yet). */ + + static gfc_ss * + nest_loop_dimension (gfc_ss *ss, int dim) + { + int ss_dim, i; + gfc_ss *new_ss, *prev_ss = gfc_ss_terminator; + gfc_loopinfo *new_loop; + + gcc_assert (ss != gfc_ss_terminator); + + for (; ss != gfc_ss_terminator; ss = ss->next) + { + new_ss = gfc_get_ss (); + new_ss->next = prev_ss; + new_ss->parent = ss; + new_ss->info = ss->info; + new_ss->info->refcount++; + if (ss->dimen != 0) + { + gcc_assert (ss->info->type != GFC_SS_SCALAR + && ss->info->type != GFC_SS_REFERENCE); + + new_ss->dimen = 1; + new_ss->dim[0] = ss->dim[dim]; + + gcc_assert (dim < ss->dimen); + + ss_dim = --ss->dimen; + for (i = dim; i < ss_dim; i++) + ss->dim[i] = ss->dim[i + 1]; + + ss->dim[ss_dim] = 0; + } + prev_ss = new_ss; + + if (ss->nested_ss) + { + ss->nested_ss->parent = new_ss; + new_ss->nested_ss = ss->nested_ss; + } + ss->nested_ss = new_ss; + } + + new_loop = gfc_get_loopinfo (); + gfc_init_loopinfo (new_loop); + + gcc_assert (prev_ss != NULL); + gcc_assert (prev_ss != gfc_ss_terminator); + gfc_add_ss_to_loop (new_loop, prev_ss); + return new_ss->parent; + } + + + /* Create the gfc_ss list for the SUM/PRODUCT arguments when the function + is to be inlined. */ + + static gfc_ss * + walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr) + { + gfc_ss *tmp_ss, *tail, *array_ss; + gfc_actual_arglist *arg1, *arg2, *arg3; + int sum_dim; + bool scalar_mask = false; + + /* The rank of the result will be determined later. */ + arg1 = expr->value.function.actual; + arg2 = arg1->next; + arg3 = arg2->next; + gcc_assert (arg3 != NULL); + + if (expr->rank == 0) + return ss; + + tmp_ss = gfc_ss_terminator; + + if (arg3->expr) + { + gfc_ss *mask_ss; + + mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr); + if (mask_ss == tmp_ss) + scalar_mask = 1; + + tmp_ss = mask_ss; + } + + array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr); + gcc_assert (array_ss != tmp_ss); + + /* Odd thing: If the mask is scalar, it is used by the frontend after + the array (to make it array around the nested loop). Thus it shall + be after array_ss once the gfc_ss list is reversed. */ + if (scalar_mask) + tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr); + else + tmp_ss = array_ss; + + /* "Hide" the dimension on which we will sum in the first arg's scalarization + chain. */ + sum_dim = mpz_get_si (arg2->expr->value.integer) - 1; + tail = nest_loop_dimension (tmp_ss, sum_dim); + tail->next = ss; + + return tmp_ss; + } + + static gfc_ss * walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr) { switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + return walk_inline_intrinsic_arith (ss, expr); + case GFC_ISYM_TRANSPOSE: return walk_inline_intrinsic_transpose (ss, expr); *************** gfc_walk_intrinsic_libfunc (gfc_ss * ss, *** 6862,6872 **** --- 7022,7047 ---- bool gfc_inline_intrinsic_function_p (gfc_expr *expr) { + gfc_actual_arglist *args; + if (!expr->value.function.isym) return false; switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + /* Disable inline expansion if code size matters. */ + if (optimize_size) + return false; + + args = expr->value.function.actual; + /* We need to be able to subset the SUM argument at compile-time. */ + if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT) + return false; + + return true; + case GFC_ISYM_TRANSPOSE: return true; diff --git a/trans.h b/trans.h index 5757865b3a180a32e5baa320c56d235924df68dc..22033d38d157f5c85eba6fcb8ee92ab28dc22535 100644 *** a/trans.h --- b/trans.h *************** typedef struct gfc_loopinfo *** 310,315 **** --- 310,316 ---- } gfc_loopinfo; + #define gfc_get_loopinfo() XCNEW (gfc_loopinfo) /* Information about a symbol that has been shadowed by a temporary. */ typedef struct --===============6664296882618565386==--