From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2153) id 2983D3858D29; Mon, 23 Aug 2021 08:23:57 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2983D3858D29 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jakub Jelinek To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-3066] openmp: Add support for strict modifier on grainsize/num_tasks clauses X-Act-Checkin: gcc X-Git-Author: Jakub Jelinek X-Git-Refname: refs/heads/master X-Git-Oldrev: 6f1a3668f5ee5152bdcca59843802e587339eda1 X-Git-Newrev: 3bc75533d1f87f0617be6c1af98804f9127ec637 Message-Id: <20210823082357.2983D3858D29@sourceware.org> Date: Mon, 23 Aug 2021 08:23:57 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 23 Aug 2021 08:23:57 -0000 https://gcc.gnu.org/g:3bc75533d1f87f0617be6c1af98804f9127ec637 commit r12-3066-g3bc75533d1f87f0617be6c1af98804f9127ec637 Author: Jakub Jelinek Date: Mon Aug 23 10:16:24 2021 +0200 openmp: Add support for strict modifier on grainsize/num_tasks clauses With strict: modifier on these clauses, the standard is explicit about how many iterations (and which) each generated task of taskloop directive should contain. For num_tasks it actually matches what we were already implementing, but for grainsize it does not (and even violates the old rule - without strict it requires that the number of iterations (unspecified which exactly) handled by each generated task is >= grainsize argument and < 2 * grainsize argument, with strict: it requires that each generated task handles exactly == grainsize argument iterations, except for the generated task handling the last iteration which can handles <= grainsize iterations). The following patch implements it for C and C++. 2021-08-23 Jakub Jelinek gcc/ * tree.h (OMP_CLAUSE_GRAINSIZE_STRICT): Define. (OMP_CLAUSE_NUM_TASKS_STRICT): Define. * tree-pretty-print.c (dump_omp_clause) : Print strict: modifier. * omp-expand.c (expand_task_call): Use GOMP_TASK_FLAG_STRICT in iflags if either grainsize or num_tasks clause has the strict modifier. gcc/c/ * c-parser.c (c_parser_omp_clause_num_tasks, c_parser_omp_clause_grainsize): Parse the optional strict: modifier. gcc/cp/ * parser.c (cp_parser_omp_clause_num_tasks, cp_parser_omp_clause_grainsize): Parse the optional strict: modifier. include/ * gomp-constants.h (GOMP_TASK_FLAG_STRICT): Define. libgomp/ * taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT. * testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment. * testsuite/libgomp.c-c++-common/taskloop-5.c: New test. Diff: --- gcc/c/c-parser.c | 34 +++++- gcc/cp/parser.c | 38 +++++- gcc/omp-expand.c | 8 +- gcc/tree-pretty-print.c | 4 + gcc/tree.h | 5 + include/gomp-constants.h | 1 + libgomp/taskloop.c | 27 ++++- .../testsuite/libgomp.c-c++-common/taskloop-4.c | 3 +- .../testsuite/libgomp.c-c++-common/taskloop-5.c | 135 +++++++++++++++++++++ 9 files changed, 245 insertions(+), 10 deletions(-) diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index c5783075a99..356cf2504d4 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -13786,7 +13786,10 @@ c_parser_omp_clause_num_threads (c_parser *parser, tree list) } /* OpenMP 4.5: - num_tasks ( expression ) */ + num_tasks ( expression ) + + OpenMP 5.1: + num_tasks ( strict : expression ) */ static tree c_parser_omp_clause_num_tasks (c_parser *parser, tree list) @@ -13795,6 +13798,17 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list) matching_parens parens; if (parens.require_open (parser)) { + bool strict = false; + if (c_parser_next_token_is (parser, CPP_NAME) + && c_parser_peek_2nd_token (parser)->type == CPP_COLON + && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value), + "strict") == 0) + { + strict = true; + c_parser_consume_token (parser); + c_parser_consume_token (parser); + } + location_t expr_loc = c_parser_peek_token (parser)->location; c_expr expr = c_parser_expr_no_commas (parser, NULL); expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true); @@ -13824,6 +13838,7 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list) c = build_omp_clause (num_tasks_loc, OMP_CLAUSE_NUM_TASKS); OMP_CLAUSE_NUM_TASKS_EXPR (c) = t; + OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict; OMP_CLAUSE_CHAIN (c) = list; list = c; } @@ -13832,7 +13847,10 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list) } /* OpenMP 4.5: - grainsize ( expression ) */ + grainsize ( expression ) + + OpenMP 5.1: + grainsize ( strict : expression ) */ static tree c_parser_omp_clause_grainsize (c_parser *parser, tree list) @@ -13841,6 +13859,17 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list) matching_parens parens; if (parens.require_open (parser)) { + bool strict = false; + if (c_parser_next_token_is (parser, CPP_NAME) + && c_parser_peek_2nd_token (parser)->type == CPP_COLON + && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value), + "strict") == 0) + { + strict = true; + c_parser_consume_token (parser); + c_parser_consume_token (parser); + } + location_t expr_loc = c_parser_peek_token (parser)->location; c_expr expr = c_parser_expr_no_commas (parser, NULL); expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true); @@ -13870,6 +13899,7 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list) c = build_omp_clause (grainsize_loc, OMP_CLAUSE_GRAINSIZE); OMP_CLAUSE_GRAINSIZE_EXPR (c) = t; + OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict; OMP_CLAUSE_CHAIN (c) = list; list = c; } diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 63c95039854..a959c71dfa3 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -37237,7 +37237,10 @@ cp_parser_omp_clause_num_threads (cp_parser *parser, tree list, } /* OpenMP 4.5: - num_tasks ( expression ) */ + num_tasks ( expression ) + + OpenMP 5.1: + num_tasks ( strict : expression ) */ static tree cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list, @@ -37249,6 +37252,19 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list, if (!parens.require_open (parser)) return list; + bool strict = false; + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME) + && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON)) + { + tree id = cp_lexer_peek_token (parser->lexer)->u.value; + if (!strcmp (IDENTIFIER_POINTER (id), "strict")) + { + strict = true; + cp_lexer_consume_token (parser->lexer); + cp_lexer_consume_token (parser->lexer); + } + } + t = cp_parser_assignment_expression (parser); if (t == error_mark_node @@ -37262,13 +37278,17 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list, c = build_omp_clause (location, OMP_CLAUSE_NUM_TASKS); OMP_CLAUSE_NUM_TASKS_EXPR (c) = t; + OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict; OMP_CLAUSE_CHAIN (c) = list; return c; } /* OpenMP 4.5: - grainsize ( expression ) */ + grainsize ( expression ) + + OpenMP 5.1: + grainsize ( strict : expression ) */ static tree cp_parser_omp_clause_grainsize (cp_parser *parser, tree list, @@ -37280,6 +37300,19 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list, if (!parens.require_open (parser)) return list; + bool strict = false; + if (cp_lexer_next_token_is (parser->lexer, CPP_NAME) + && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON)) + { + tree id = cp_lexer_peek_token (parser->lexer)->u.value; + if (!strcmp (IDENTIFIER_POINTER (id), "strict")) + { + strict = true; + cp_lexer_consume_token (parser->lexer); + cp_lexer_consume_token (parser->lexer); + } + } + t = cp_parser_assignment_expression (parser); if (t == error_mark_node @@ -37293,6 +37326,7 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list, c = build_omp_clause (location, OMP_CLAUSE_GRAINSIZE); OMP_CLAUSE_GRAINSIZE_EXPR (c) = t; + OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict; OMP_CLAUSE_CHAIN (c) = list; return c; diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index c868b8c3d3c..66c64f5a37b 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -791,13 +791,19 @@ expand_task_call (struct omp_region *region, basic_block bb, tree tclauses = gimple_omp_for_clauses (g); num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); if (num_tasks) - num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); + { + if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks)) + iflags |= GOMP_TASK_FLAG_STRICT; + num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); + } else { num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); if (num_tasks) { iflags |= GOMP_TASK_FLAG_GRAINSIZE; + if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks)) + iflags |= GOMP_TASK_FLAG_STRICT; num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); } else diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 0570fdcf890..e103d2c6bd4 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1066,6 +1066,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case OMP_CLAUSE_GRAINSIZE: pp_string (pp, "grainsize("); + if (OMP_CLAUSE_GRAINSIZE_STRICT (clause)) + pp_string (pp, "strict:"); dump_generic_node (pp, OMP_CLAUSE_GRAINSIZE_EXPR (clause), spc, flags, false); pp_right_paren (pp); @@ -1073,6 +1075,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case OMP_CLAUSE_NUM_TASKS: pp_string (pp, "num_tasks("); + if (OMP_CLAUSE_NUM_TASKS_STRICT (clause)) + pp_string (pp, "strict:"); dump_generic_node (pp, OMP_CLAUSE_NUM_TASKS_EXPR (clause), spc, flags, false); pp_right_paren (pp); diff --git a/gcc/tree.h b/gcc/tree.h index 905417fd17b..060a41f6991 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1612,6 +1612,11 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_PRIORITY_EXPR(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PRIORITY),0) +#define OMP_CLAUSE_GRAINSIZE_STRICT(NODE) \ + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GRAINSIZE)) +#define OMP_CLAUSE_NUM_TASKS_STRICT(NODE) \ + TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_TASKS)) + /* OpenACC clause expressions */ #define OMP_CLAUSE_EXPR(NODE, CLAUSE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0) diff --git a/include/gomp-constants.h b/include/gomp-constants.h index 6e163b02560..ebd08013430 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -222,6 +222,7 @@ enum gomp_map_kind #define GOMP_TASK_FLAG_NOGROUP (1 << 11) #define GOMP_TASK_FLAG_REDUCTION (1 << 12) #define GOMP_TASK_FLAG_DETACH (1 << 13) +#define GOMP_TASK_FLAG_STRICT (1 << 14) /* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */ #define GOMP_TARGET_FLAG_NOWAIT (1 << 0) diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index 791178a1fd7..9d27dd05f6c 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -97,6 +97,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), #endif TYPE task_step = step; + TYPE nfirst_task_step = step; unsigned long nfirst = n; if (flags & GOMP_TASK_FLAG_GRAINSIZE) { @@ -109,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (num_tasks != ndiv) num_tasks = ~0UL; #endif - if (num_tasks <= 1) + if ((flags & GOMP_TASK_FLAG_STRICT) + && num_tasks != ~0ULL) + { + UTYPE mod = n % grainsize; + task_step = (TYPE) grainsize * step; + if (mod) + { + num_tasks++; + nfirst_task_step = (TYPE) mod * step; + if (num_tasks == 1) + task_step = nfirst_task_step; + else + nfirst = num_tasks - 2; + } + } + else if (num_tasks <= 1) { num_tasks = 1; task_step = end - start; @@ -124,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) grainsize * step; if (mul != n) { + nfirst_task_step = task_step; task_step += step; nfirst = n - mul - 1; } @@ -135,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -153,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -225,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (arg); arg += arg_size; if (!priority_queue_empty_p (&task[i].children_queue, @@ -258,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)data)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (data); if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) @@ -303,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; thr->task = parent; task->kind = GOMP_TASK_WAITING; task->fn = fn; diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c index 4ac1b5a3751..b9499387d11 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c @@ -85,7 +85,8 @@ main () if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7 || ntasks != 1 || min_iters != 7 || max_iters != 7) __builtin_abort (); - /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */ + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. */ if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54 || ntasks != 9) __builtin_abort (); diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c new file mode 100644 index 00000000000..1b64a6dc4f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c @@ -0,0 +1,135 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int u[64], v, w[64]; + +__attribute__((noinline, noclone)) int +test (int a, int b, int c, int d, void (*fn) (int, int, int, int), + int *num_tasks, int *min_iters, int *max_iters, int *sep) +{ + int i, j, t = 0; + __builtin_memset (u, 0, sizeof u); + v = 0; + fn (a, b, c, d); + *min_iters = 0; + *max_iters = 0; + *num_tasks = v; + *sep = v; + if (v) + { + *min_iters = u[0]; + *max_iters = u[0]; + t = u[0]; + for (i = 1; i < v; i++) + { + if (*min_iters > u[i]) + *min_iters = u[i]; + if (*max_iters < u[i]) + *max_iters = u[i]; + t += u[i]; + } + if (*min_iters != *max_iters) + { + for (i = 0; i < v - 1; i++) + { + int min_idx = i; + for (j = i + 1; j < v; j++) + if (w[min_idx] > w[j]) + min_idx = j; + if (min_idx != i) + { + int tem = u[i]; + u[i] = u[min_idx]; + u[min_idx] = tem; + tem = w[i]; + w[i] = w[min_idx]; + w[min_idx] = tem; + } + } + if (u[0] != *max_iters) + __builtin_abort (); + for (i = 1; i < v; i++) + if (u[i] != u[i - 1]) + { + if (*sep != v || u[i] != *min_iters) + __builtin_abort (); + *sep = i; + } + } + } + return t; +} + +void +grainsize (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) grainsize(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +void +num_tasks (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) num_tasks(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int min_iters, max_iters, ntasks, sep; + /* If grainsize is present and has strict modifier, # of task loop iters is == grainsize, + except that it can be smaller on the last task. */ + if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79 + || ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4) + __builtin_abort (); + if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370 + || ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13) + __builtin_abort (); + if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7 + || ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1) + __builtin_abort (); + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. If strict modifier is present, + first set of tasks has ceil (# of loop iters / num_tasks) iterations, + followed by possibly empty set of tasks with floor (# of loop iters / num_tasks) + iterations. */ + if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54 + || ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9) + __builtin_abort (); + if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57 + || ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3) + __builtin_abort (); + if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13 + || ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13) + __builtin_abort (); + } + return 0; +}