diff --git a/trans-array.c b/trans-array.c index acd9aec18fefc6631ad443c5dff2d3014a9d6565..262743d0d3779b4f02a63d604bd9a621401ae84e 100644 *** a/trans-array.c --- b/trans-array.c *************** gfc_conv_expr_descriptor (gfc_se * se, g *** 6187,6193 **** gcc_assert ((expr->value.function.esym != NULL && expr->value.function.esym->attr.elemental) || (expr->value.function.isym != NULL ! && expr->value.function.isym->elemental)); else gcc_assert (ss_type == GFC_SS_INTRINSIC); --- 6187,6194 ---- gcc_assert ((expr->value.function.esym != NULL && expr->value.function.esym->attr.elemental) || (expr->value.function.isym != NULL ! && expr->value.function.isym->elemental) ! || gfc_inline_intrinsic_function_p (expr)); else gcc_assert (ss_type == GFC_SS_INTRINSIC); diff --git a/trans-intrinsic.c b/trans-intrinsic.c index 25c54fb6db9cbc7e80e3b3adca77b3bb13b15304..973f912a624bdd442b3851471432c358118438d6 100644 *** a/trans-intrinsic.c --- b/trans-intrinsic.c *************** gfc_conv_intrinsic_count (gfc_se * se, g *** 2557,2562 **** --- 2557,2576 ---- se->expr = resvar; } + + /* Update given gfc_se to have ss component pointing to the nested gfc_ss + struct and return the corresponding loopinfo. */ + + static gfc_loopinfo * + enter_nested_loop (gfc_se *se) + { + se->ss = se->ss->nested_ss; + gcc_assert (se->ss == se->ss->loop->ss); + + return se->ss->loop; + } + + /* Inline implementation of the sum and product intrinsics. */ static void gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2570,2587 **** tree tmp; gfc_loopinfo loop, *ploop; gfc_actual_arglist *arg_array, *arg_mask; ! gfc_ss *arrayss; ! gfc_ss *maskss; gfc_se arrayse; gfc_se maskse; gfc_se *parent_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; ! if (se->ss) { ! gfc_conv_intrinsic_funcall (se, expr); ! return; } else parent_se = NULL; --- 2584,2601 ---- tree tmp; gfc_loopinfo loop, *ploop; gfc_actual_arglist *arg_array, *arg_mask; ! gfc_ss *arrayss = NULL; ! gfc_ss *maskss = NULL; gfc_se arrayse; gfc_se maskse; gfc_se *parent_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; ! if (expr->rank > 0) { ! gcc_assert (gfc_inline_intrinsic_function_p (expr)); ! parent_se = se; } else parent_se = NULL; *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2613,2622 **** arg_array = expr->value.function.actual; - /* Walk the arguments. */ arrayexpr = arg_array->expr; - arrayss = gfc_walk_expr (arrayexpr); - gcc_assert (arrayss != gfc_ss_terminator); if (op == NE_EXPR || norm2) /* PARITY and NORM2. */ --- 2627,2633 ---- *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2628,2633 **** --- 2639,2650 ---- maskexpr = arg_mask->expr; } + if (expr->rank == 0) + { + /* Walk the arguments. */ + arrayss = gfc_walk_expr (arrayexpr); + gcc_assert (arrayss != gfc_ss_terminator); + if (maskexpr && maskexpr->rank > 0) { maskss = gfc_walk_expr (maskexpr); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2651,2656 **** --- 2668,2680 ---- gfc_mark_ss_chain_used (maskss, 1); ploop = &loop; + } + else + /* All the work has been done in the parent loops. */ + ploop = enter_nested_loop (se); + + gcc_assert (ploop); + /* Generate the loop body. */ gfc_start_scalarized_body (ploop, &body); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2659,2664 **** --- 2683,2689 ---- { gfc_init_se (&maskse, parent_se); gfc_copy_loopinfo_to_se (&maskse, ploop); + if (expr->rank == 0) maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2671,2676 **** --- 2696,2702 ---- /* Do the actual summation/product. */ gfc_init_se (&arrayse, parent_se); gfc_copy_loopinfo_to_se (&arrayse, ploop); + if (expr->rank == 0) arrayse.ss = arrayss; gfc_conv_expr_val (&arrayse, arrayexpr); gfc_add_block_to_block (&block, &arrayse.pre); *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2763,2779 **** /* For a scalar mask, enclose the loop in an if statement. */ if (maskexpr && maskexpr->rank == 0) { - gfc_init_se (&maskse, NULL); - gfc_conv_expr_val (&maskse, maskexpr); gfc_init_block (&block); gfc_add_block_to_block (&block, &ploop->pre); gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); tmp = build3_v (COND_EXPR, maskse.expr, tmp, build_empty_stmt (input_location)); gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&se->pre, &block); } else { --- 2789,2817 ---- /* For a scalar mask, enclose the loop in an if statement. */ if (maskexpr && maskexpr->rank == 0) { gfc_init_block (&block); gfc_add_block_to_block (&block, &ploop->pre); gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); + if (expr->rank > 0) + { + tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp, + build_empty_stmt (input_location)); + gfc_advance_se_ss_chain (se); + } + else + { + gcc_assert (expr->rank == 0); + gfc_init_se (&maskse, NULL); + gfc_conv_expr_val (&maskse, maskexpr); tmp = build3_v (COND_EXPR, maskse.expr, tmp, build_empty_stmt (input_location)); + } + gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&se->pre, &block); + gcc_assert (se->post.head == NULL); } else { *************** gfc_conv_intrinsic_arith (gfc_se * se, g *** 2781,2786 **** --- 2819,2825 ---- gfc_add_block_to_block (&se->pre, &ploop->post); } + if (expr->rank == 0) gfc_cleanup_loop (ploop); if (norm2) *************** walk_inline_intrinsic_transpose (gfc_ss *** 6801,6812 **** --- 6840,6966 ---- } + /* Move the given dimension of the given gfc_ss list to a nested gfc_ss list. + This has the side effect of reversing the nested list, so there is no + need to call gfc_reverse_ss on it (the given list is assumed not to be + reversed yet). */ + + static gfc_ss * + nest_loop_dimension (gfc_ss *ss, int dim) + { + int ss_dim, i; + gfc_ss *new_ss, *prev_ss = gfc_ss_terminator; + gfc_loopinfo *new_loop; + + gcc_assert (ss != gfc_ss_terminator); + + for (; ss != gfc_ss_terminator; ss = ss->next) + { + new_ss = gfc_get_ss (); + new_ss->next = prev_ss; + new_ss->parent = ss; + new_ss->info = ss->info; + new_ss->info->refcount++; + if (ss->dimen != 0) + { + gcc_assert (ss->info->type != GFC_SS_SCALAR + && ss->info->type != GFC_SS_REFERENCE); + + new_ss->dimen = 1; + new_ss->dim[0] = ss->dim[dim]; + + gcc_assert (dim < ss->dimen); + + ss_dim = --ss->dimen; + for (i = dim; i < ss_dim; i++) + ss->dim[i] = ss->dim[i + 1]; + + ss->dim[ss_dim] = 0; + } + prev_ss = new_ss; + + if (ss->nested_ss) + { + ss->nested_ss->parent = new_ss; + new_ss->nested_ss = ss->nested_ss; + } + ss->nested_ss = new_ss; + } + + new_loop = gfc_get_loopinfo (); + gfc_init_loopinfo (new_loop); + + gcc_assert (prev_ss != NULL); + gcc_assert (prev_ss != gfc_ss_terminator); + gfc_add_ss_to_loop (new_loop, prev_ss); + return new_ss->parent; + } + + + /* Create the gfc_ss list for the SUM/PRODUCT arguments when the function + is to be inlined. */ + + static gfc_ss * + walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr) + { + gfc_ss *tmp_ss, *tail, *array_ss; + gfc_actual_arglist *arg1, *arg2, *arg3; + int sum_dim; + bool scalar_mask = false; + + /* The rank of the result will be determined later. */ + arg1 = expr->value.function.actual; + arg2 = arg1->next; + arg3 = arg2->next; + gcc_assert (arg3 != NULL); + + if (expr->rank == 0) + return ss; + + tmp_ss = gfc_ss_terminator; + + if (arg3->expr) + { + gfc_ss *mask_ss; + + mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr); + if (mask_ss == tmp_ss) + scalar_mask = 1; + + tmp_ss = mask_ss; + } + + array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr); + gcc_assert (array_ss != tmp_ss); + + /* Odd thing: If the mask is scalar, it is used by the frontend after + the array (to make it array around the nested loop). Thus it shall + be after array_ss once the gfc_ss list is reversed. */ + if (scalar_mask) + tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr); + else + tmp_ss = array_ss; + + /* "Hide" the dimension on which we will sum in the first arg's scalarization + chain. */ + sum_dim = mpz_get_si (arg2->expr->value.integer) - 1; + tail = nest_loop_dimension (tmp_ss, sum_dim); + tail->next = ss; + + return tmp_ss; + } + + static gfc_ss * walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr) { switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + return walk_inline_intrinsic_arith (ss, expr); + case GFC_ISYM_TRANSPOSE: return walk_inline_intrinsic_transpose (ss, expr); *************** gfc_walk_intrinsic_libfunc (gfc_ss * ss, *** 6868,6878 **** --- 7022,7047 ---- bool gfc_inline_intrinsic_function_p (gfc_expr *expr) { + gfc_actual_arglist *args; + if (!expr->value.function.isym) return false; switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + /* Disable inline expansion if code size matters. */ + if (optimize_size) + return false; + + args = expr->value.function.actual; + /* We need to be able to subset the SUM argument at compile-time. */ + if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT) + return false; + + return true; + case GFC_ISYM_TRANSPOSE: return true; diff --git a/trans.h b/trans.h index 5757865b3a180a32e5baa320c56d235924df68dc..22033d38d157f5c85eba6fcb8ee92ab28dc22535 100644 *** a/trans.h --- b/trans.h *************** typedef struct gfc_loopinfo *** 310,315 **** --- 310,316 ---- } gfc_loopinfo; + #define gfc_get_loopinfo() XCNEW (gfc_loopinfo) /* Information about a symbol that has been shadowed by a temporary. */ typedef struct