diff --git a/trans-array.c b/trans-array.c index acd9aec..262743d 100644 --- a/trans-array.c +++ b/trans-array.c @@ -6187,7 +6187,8 @@ gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss) gcc_assert ((expr->value.function.esym != NULL && expr->value.function.esym->attr.elemental) || (expr->value.function.isym != NULL - && expr->value.function.isym->elemental)); + && expr->value.function.isym->elemental) + || gfc_inline_intrinsic_function_p (expr)); else gcc_assert (ss_type == GFC_SS_INTRINSIC); diff --git a/trans-intrinsic.c b/trans-intrinsic.c index 25c54fb..973f912 100644 --- a/trans-intrinsic.c +++ b/trans-intrinsic.c @@ -2557,6 +2557,20 @@ gfc_conv_intrinsic_count (gfc_se * se, gfc_expr * expr) se->expr = resvar; } + +/* Update given gfc_se to have ss component pointing to the nested gfc_ss + struct and return the corresponding loopinfo. */ + +static gfc_loopinfo * +enter_nested_loop (gfc_se *se) +{ + se->ss = se->ss->nested_ss; + gcc_assert (se->ss == se->ss->loop->ss); + + return se->ss->loop; +} + + /* Inline implementation of the sum and product intrinsics. */ static void gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, @@ -2570,18 +2584,18 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, tree tmp; gfc_loopinfo loop, *ploop; gfc_actual_arglist *arg_array, *arg_mask; - gfc_ss *arrayss; - gfc_ss *maskss; + gfc_ss *arrayss = NULL; + gfc_ss *maskss = NULL; gfc_se arrayse; gfc_se maskse; gfc_se *parent_se; gfc_expr *arrayexpr; gfc_expr *maskexpr; - if (se->ss) + if (expr->rank > 0) { - gfc_conv_intrinsic_funcall (se, expr); - return; + gcc_assert (gfc_inline_intrinsic_function_p (expr)); + parent_se = se; } else parent_se = NULL; @@ -2613,10 +2627,7 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, arg_array = expr->value.function.actual; - /* Walk the arguments. */ arrayexpr = arg_array->expr; - arrayss = gfc_walk_expr (arrayexpr); - gcc_assert (arrayss != gfc_ss_terminator); if (op == NE_EXPR || norm2) /* PARITY and NORM2. */ @@ -2628,29 +2639,42 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, maskexpr = arg_mask->expr; } - if (maskexpr && maskexpr->rank > 0) + if (expr->rank == 0) { - maskss = gfc_walk_expr (maskexpr); - gcc_assert (maskss != gfc_ss_terminator); - } - else - maskss = NULL; + /* Walk the arguments. */ + arrayss = gfc_walk_expr (arrayexpr); + gcc_assert (arrayss != gfc_ss_terminator); - /* Initialize the scalarizer. */ - gfc_init_loopinfo (&loop); - gfc_add_ss_to_loop (&loop, arrayss); - if (maskexpr && maskexpr->rank > 0) - gfc_add_ss_to_loop (&loop, maskss); + if (maskexpr && maskexpr->rank > 0) + { + maskss = gfc_walk_expr (maskexpr); + gcc_assert (maskss != gfc_ss_terminator); + } + else + maskss = NULL; - /* Initialize the loop. */ - gfc_conv_ss_startstride (&loop); - gfc_conv_loop_setup (&loop, &expr->where); + /* Initialize the scalarizer. */ + gfc_init_loopinfo (&loop); + gfc_add_ss_to_loop (&loop, arrayss); + if (maskexpr && maskexpr->rank > 0) + gfc_add_ss_to_loop (&loop, maskss); - gfc_mark_ss_chain_used (arrayss, 1); - if (maskexpr && maskexpr->rank > 0) - gfc_mark_ss_chain_used (maskss, 1); + /* Initialize the loop. */ + gfc_conv_ss_startstride (&loop); + gfc_conv_loop_setup (&loop, &expr->where); + + gfc_mark_ss_chain_used (arrayss, 1); + if (maskexpr && maskexpr->rank > 0) + gfc_mark_ss_chain_used (maskss, 1); + + ploop = &loop; + } + else + /* All the work has been done in the parent loops. */ + ploop = enter_nested_loop (se); + + gcc_assert (ploop); - ploop = &loop; /* Generate the loop body. */ gfc_start_scalarized_body (ploop, &body); @@ -2659,7 +2683,8 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, { gfc_init_se (&maskse, parent_se); gfc_copy_loopinfo_to_se (&maskse, ploop); - maskse.ss = maskss; + if (expr->rank == 0) + maskse.ss = maskss; gfc_conv_expr_val (&maskse, maskexpr); gfc_add_block_to_block (&body, &maskse.pre); @@ -2671,7 +2696,8 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, /* Do the actual summation/product. */ gfc_init_se (&arrayse, parent_se); gfc_copy_loopinfo_to_se (&arrayse, ploop); - arrayse.ss = arrayss; + if (expr->rank == 0) + arrayse.ss = arrayss; gfc_conv_expr_val (&arrayse, arrayexpr); gfc_add_block_to_block (&block, &arrayse.pre); @@ -2763,17 +2789,29 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, /* For a scalar mask, enclose the loop in an if statement. */ if (maskexpr && maskexpr->rank == 0) { - gfc_init_se (&maskse, NULL); - gfc_conv_expr_val (&maskse, maskexpr); gfc_init_block (&block); gfc_add_block_to_block (&block, &ploop->pre); gfc_add_block_to_block (&block, &ploop->post); tmp = gfc_finish_block (&block); - tmp = build3_v (COND_EXPR, maskse.expr, tmp, - build_empty_stmt (input_location)); + if (expr->rank > 0) + { + tmp = build3_v (COND_EXPR, se->ss->info->data.scalar.value, tmp, + build_empty_stmt (input_location)); + gfc_advance_se_ss_chain (se); + } + else + { + gcc_assert (expr->rank == 0); + gfc_init_se (&maskse, NULL); + gfc_conv_expr_val (&maskse, maskexpr); + tmp = build3_v (COND_EXPR, maskse.expr, tmp, + build_empty_stmt (input_location)); + } + gfc_add_expr_to_block (&block, tmp); gfc_add_block_to_block (&se->pre, &block); + gcc_assert (se->post.head == NULL); } else { @@ -2781,7 +2819,8 @@ gfc_conv_intrinsic_arith (gfc_se * se, gfc_expr * expr, enum tree_code op, gfc_add_block_to_block (&se->pre, &ploop->post); } - gfc_cleanup_loop (ploop); + if (expr->rank == 0) + gfc_cleanup_loop (ploop); if (norm2) { @@ -6801,12 +6840,127 @@ walk_inline_intrinsic_transpose (gfc_ss *ss, gfc_expr *expr) } +/* Move the given dimension of the given gfc_ss list to a nested gfc_ss list. + This has the side effect of reversing the nested list, so there is no + need to call gfc_reverse_ss on it (the given list is assumed not to be + reversed yet). */ + +static gfc_ss * +nest_loop_dimension (gfc_ss *ss, int dim) +{ + int ss_dim, i; + gfc_ss *new_ss, *prev_ss = gfc_ss_terminator; + gfc_loopinfo *new_loop; + + gcc_assert (ss != gfc_ss_terminator); + + for (; ss != gfc_ss_terminator; ss = ss->next) + { + new_ss = gfc_get_ss (); + new_ss->next = prev_ss; + new_ss->parent = ss; + new_ss->info = ss->info; + new_ss->info->refcount++; + if (ss->dimen != 0) + { + gcc_assert (ss->info->type != GFC_SS_SCALAR + && ss->info->type != GFC_SS_REFERENCE); + + new_ss->dimen = 1; + new_ss->dim[0] = ss->dim[dim]; + + gcc_assert (dim < ss->dimen); + + ss_dim = --ss->dimen; + for (i = dim; i < ss_dim; i++) + ss->dim[i] = ss->dim[i + 1]; + + ss->dim[ss_dim] = 0; + } + prev_ss = new_ss; + + if (ss->nested_ss) + { + ss->nested_ss->parent = new_ss; + new_ss->nested_ss = ss->nested_ss; + } + ss->nested_ss = new_ss; + } + + new_loop = gfc_get_loopinfo (); + gfc_init_loopinfo (new_loop); + + gcc_assert (prev_ss != NULL); + gcc_assert (prev_ss != gfc_ss_terminator); + gfc_add_ss_to_loop (new_loop, prev_ss); + return new_ss->parent; +} + + +/* Create the gfc_ss list for the SUM/PRODUCT arguments when the function + is to be inlined. */ + +static gfc_ss * +walk_inline_intrinsic_arith (gfc_ss *ss, gfc_expr *expr) +{ + gfc_ss *tmp_ss, *tail, *array_ss; + gfc_actual_arglist *arg1, *arg2, *arg3; + int sum_dim; + bool scalar_mask = false; + + /* The rank of the result will be determined later. */ + arg1 = expr->value.function.actual; + arg2 = arg1->next; + arg3 = arg2->next; + gcc_assert (arg3 != NULL); + + if (expr->rank == 0) + return ss; + + tmp_ss = gfc_ss_terminator; + + if (arg3->expr) + { + gfc_ss *mask_ss; + + mask_ss = gfc_walk_subexpr (tmp_ss, arg3->expr); + if (mask_ss == tmp_ss) + scalar_mask = 1; + + tmp_ss = mask_ss; + } + + array_ss = gfc_walk_subexpr (tmp_ss, arg1->expr); + gcc_assert (array_ss != tmp_ss); + + /* Odd thing: If the mask is scalar, it is used by the frontend after + the array (to make it array around the nested loop). Thus it shall + be after array_ss once the gfc_ss list is reversed. */ + if (scalar_mask) + tmp_ss = gfc_get_scalar_ss (array_ss, arg3->expr); + else + tmp_ss = array_ss; + + /* "Hide" the dimension on which we will sum in the first arg's scalarization + chain. */ + sum_dim = mpz_get_si (arg2->expr->value.integer) - 1; + tail = nest_loop_dimension (tmp_ss, sum_dim); + tail->next = ss; + + return tmp_ss; +} + + static gfc_ss * walk_inline_intrinsic_function (gfc_ss * ss, gfc_expr * expr) { switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + return walk_inline_intrinsic_arith (ss, expr); + case GFC_ISYM_TRANSPOSE: return walk_inline_intrinsic_transpose (ss, expr); @@ -6868,11 +7022,26 @@ gfc_walk_intrinsic_libfunc (gfc_ss * ss, gfc_expr * expr) bool gfc_inline_intrinsic_function_p (gfc_expr *expr) { + gfc_actual_arglist *args; + if (!expr->value.function.isym) return false; switch (expr->value.function.isym->id) { + case GFC_ISYM_PRODUCT: + case GFC_ISYM_SUM: + /* Disable inline expansion if code size matters. */ + if (optimize_size) + return false; + + args = expr->value.function.actual; + /* We need to be able to subset the SUM argument at compile-time. */ + if (args->next->expr && args->next->expr->expr_type != EXPR_CONSTANT) + return false; + + return true; + case GFC_ISYM_TRANSPOSE: return true; diff --git a/trans.h b/trans.h index 5757865..22033d3 100644 --- a/trans.h +++ b/trans.h @@ -310,6 +310,7 @@ typedef struct gfc_loopinfo } gfc_loopinfo; +#define gfc_get_loopinfo() XCNEW (gfc_loopinfo) /* Information about a symbol that has been shadowed by a temporary. */ typedef struct