public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3066] openmp: Add support for strict modifier on grainsize/num_tasks clauses
@ 2021-08-23 8:23 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-08-23 8:23 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:3bc75533d1f87f0617be6c1af98804f9127ec637
commit r12-3066-g3bc75533d1f87f0617be6c1af98804f9127ec637
Author: Jakub Jelinek <jakub@redhat.com>
Date: Mon Aug 23 10:16:24 2021 +0200
openmp: Add support for strict modifier on grainsize/num_tasks clauses
With strict: modifier on these clauses, the standard is explicit about
how many iterations (and which) each generated task of taskloop directive
should contain. For num_tasks it actually matches what we were already
implementing, but for grainsize it does not (and even violates the old
rule - without strict it requires that the number of iterations (unspecified
which exactly) handled by each generated task is >= grainsize argument and
< 2 * grainsize argument, with strict: it requires that each generated
task handles exactly == grainsize argument iterations, except for the
generated task handling the last iteration which can handles <= grainsize
iterations).
The following patch implements it for C and C++.
2021-08-23 Jakub Jelinek <jakub@redhat.com>
gcc/
* tree.h (OMP_CLAUSE_GRAINSIZE_STRICT): Define.
(OMP_CLAUSE_NUM_TASKS_STRICT): Define.
* tree-pretty-print.c (dump_omp_clause) <case OMP_CLAUSE_GRAINSIZE,
case OMP_CLAUSE_NUM_TASKS>: Print strict: modifier.
* omp-expand.c (expand_task_call): Use GOMP_TASK_FLAG_STRICT in iflags
if either grainsize or num_tasks clause has the strict modifier.
gcc/c/
* c-parser.c (c_parser_omp_clause_num_tasks,
c_parser_omp_clause_grainsize): Parse the optional strict: modifier.
gcc/cp/
* parser.c (cp_parser_omp_clause_num_tasks,
cp_parser_omp_clause_grainsize): Parse the optional strict: modifier.
include/
* gomp-constants.h (GOMP_TASK_FLAG_STRICT): Define.
libgomp/
* taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT.
* testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment.
* testsuite/libgomp.c-c++-common/taskloop-5.c: New test.
Diff:
---
gcc/c/c-parser.c | 34 +++++-
gcc/cp/parser.c | 38 +++++-
gcc/omp-expand.c | 8 +-
gcc/tree-pretty-print.c | 4 +
gcc/tree.h | 5 +
include/gomp-constants.h | 1 +
libgomp/taskloop.c | 27 ++++-
.../testsuite/libgomp.c-c++-common/taskloop-4.c | 3 +-
.../testsuite/libgomp.c-c++-common/taskloop-5.c | 135 +++++++++++++++++++++
9 files changed, 245 insertions(+), 10 deletions(-)
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index c5783075a99..356cf2504d4 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -13786,7 +13786,10 @@ c_parser_omp_clause_num_threads (c_parser *parser, tree list)
}
/* OpenMP 4.5:
- num_tasks ( expression ) */
+ num_tasks ( expression )
+
+ OpenMP 5.1:
+ num_tasks ( strict : expression ) */
static tree
c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
@@ -13795,6 +13798,17 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
matching_parens parens;
if (parens.require_open (parser))
{
+ bool strict = false;
+ if (c_parser_next_token_is (parser, CPP_NAME)
+ && c_parser_peek_2nd_token (parser)->type == CPP_COLON
+ && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
+ "strict") == 0)
+ {
+ strict = true;
+ c_parser_consume_token (parser);
+ c_parser_consume_token (parser);
+ }
+
location_t expr_loc = c_parser_peek_token (parser)->location;
c_expr expr = c_parser_expr_no_commas (parser, NULL);
expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
@@ -13824,6 +13838,7 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
c = build_omp_clause (num_tasks_loc, OMP_CLAUSE_NUM_TASKS);
OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
+ OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
OMP_CLAUSE_CHAIN (c) = list;
list = c;
}
@@ -13832,7 +13847,10 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
}
/* OpenMP 4.5:
- grainsize ( expression ) */
+ grainsize ( expression )
+
+ OpenMP 5.1:
+ grainsize ( strict : expression ) */
static tree
c_parser_omp_clause_grainsize (c_parser *parser, tree list)
@@ -13841,6 +13859,17 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list)
matching_parens parens;
if (parens.require_open (parser))
{
+ bool strict = false;
+ if (c_parser_next_token_is (parser, CPP_NAME)
+ && c_parser_peek_2nd_token (parser)->type == CPP_COLON
+ && strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
+ "strict") == 0)
+ {
+ strict = true;
+ c_parser_consume_token (parser);
+ c_parser_consume_token (parser);
+ }
+
location_t expr_loc = c_parser_peek_token (parser)->location;
c_expr expr = c_parser_expr_no_commas (parser, NULL);
expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
@@ -13870,6 +13899,7 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list)
c = build_omp_clause (grainsize_loc, OMP_CLAUSE_GRAINSIZE);
OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
+ OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
OMP_CLAUSE_CHAIN (c) = list;
list = c;
}
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 63c95039854..a959c71dfa3 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -37237,7 +37237,10 @@ cp_parser_omp_clause_num_threads (cp_parser *parser, tree list,
}
/* OpenMP 4.5:
- num_tasks ( expression ) */
+ num_tasks ( expression )
+
+ OpenMP 5.1:
+ num_tasks ( strict : expression ) */
static tree
cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
@@ -37249,6 +37252,19 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
if (!parens.require_open (parser))
return list;
+ bool strict = false;
+ if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
+ && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+ {
+ tree id = cp_lexer_peek_token (parser->lexer)->u.value;
+ if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
+ {
+ strict = true;
+ cp_lexer_consume_token (parser->lexer);
+ cp_lexer_consume_token (parser->lexer);
+ }
+ }
+
t = cp_parser_assignment_expression (parser);
if (t == error_mark_node
@@ -37262,13 +37278,17 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
c = build_omp_clause (location, OMP_CLAUSE_NUM_TASKS);
OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
+ OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
OMP_CLAUSE_CHAIN (c) = list;
return c;
}
/* OpenMP 4.5:
- grainsize ( expression ) */
+ grainsize ( expression )
+
+ OpenMP 5.1:
+ grainsize ( strict : expression ) */
static tree
cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
@@ -37280,6 +37300,19 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
if (!parens.require_open (parser))
return list;
+ bool strict = false;
+ if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
+ && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+ {
+ tree id = cp_lexer_peek_token (parser->lexer)->u.value;
+ if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
+ {
+ strict = true;
+ cp_lexer_consume_token (parser->lexer);
+ cp_lexer_consume_token (parser->lexer);
+ }
+ }
+
t = cp_parser_assignment_expression (parser);
if (t == error_mark_node
@@ -37293,6 +37326,7 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
c = build_omp_clause (location, OMP_CLAUSE_GRAINSIZE);
OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
+ OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
OMP_CLAUSE_CHAIN (c) = list;
return c;
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index c868b8c3d3c..66c64f5a37b 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -791,13 +791,19 @@ expand_task_call (struct omp_region *region, basic_block bb,
tree tclauses = gimple_omp_for_clauses (g);
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
if (num_tasks)
- num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+ {
+ if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
+ iflags |= GOMP_TASK_FLAG_STRICT;
+ num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+ }
else
{
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
if (num_tasks)
{
iflags |= GOMP_TASK_FLAG_GRAINSIZE;
+ if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
+ iflags |= GOMP_TASK_FLAG_STRICT;
num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
}
else
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 0570fdcf890..e103d2c6bd4 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1066,6 +1066,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
case OMP_CLAUSE_GRAINSIZE:
pp_string (pp, "grainsize(");
+ if (OMP_CLAUSE_GRAINSIZE_STRICT (clause))
+ pp_string (pp, "strict:");
dump_generic_node (pp, OMP_CLAUSE_GRAINSIZE_EXPR (clause),
spc, flags, false);
pp_right_paren (pp);
@@ -1073,6 +1075,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
case OMP_CLAUSE_NUM_TASKS:
pp_string (pp, "num_tasks(");
+ if (OMP_CLAUSE_NUM_TASKS_STRICT (clause))
+ pp_string (pp, "strict:");
dump_generic_node (pp, OMP_CLAUSE_NUM_TASKS_EXPR (clause),
spc, flags, false);
pp_right_paren (pp);
diff --git a/gcc/tree.h b/gcc/tree.h
index 905417fd17b..060a41f6991 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1612,6 +1612,11 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_PRIORITY_EXPR(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PRIORITY),0)
+#define OMP_CLAUSE_GRAINSIZE_STRICT(NODE) \
+ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GRAINSIZE))
+#define OMP_CLAUSE_NUM_TASKS_STRICT(NODE) \
+ TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_TASKS))
+
/* OpenACC clause expressions */
#define OMP_CLAUSE_EXPR(NODE, CLAUSE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0)
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 6e163b02560..ebd08013430 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -222,6 +222,7 @@ enum gomp_map_kind
#define GOMP_TASK_FLAG_NOGROUP (1 << 11)
#define GOMP_TASK_FLAG_REDUCTION (1 << 12)
#define GOMP_TASK_FLAG_DETACH (1 << 13)
+#define GOMP_TASK_FLAG_STRICT (1 << 14)
/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */
#define GOMP_TARGET_FLAG_NOWAIT (1 << 0)
diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c
index 791178a1fd7..9d27dd05f6c 100644
--- a/libgomp/taskloop.c
+++ b/libgomp/taskloop.c
@@ -97,6 +97,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
#endif
TYPE task_step = step;
+ TYPE nfirst_task_step = step;
unsigned long nfirst = n;
if (flags & GOMP_TASK_FLAG_GRAINSIZE)
{
@@ -109,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
if (num_tasks != ndiv)
num_tasks = ~0UL;
#endif
- if (num_tasks <= 1)
+ if ((flags & GOMP_TASK_FLAG_STRICT)
+ && num_tasks != ~0ULL)
+ {
+ UTYPE mod = n % grainsize;
+ task_step = (TYPE) grainsize * step;
+ if (mod)
+ {
+ num_tasks++;
+ nfirst_task_step = (TYPE) mod * step;
+ if (num_tasks == 1)
+ task_step = nfirst_task_step;
+ else
+ nfirst = num_tasks - 2;
+ }
+ }
+ else if (num_tasks <= 1)
{
num_tasks = 1;
task_step = end - start;
@@ -124,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
task_step = (TYPE) grainsize * step;
if (mul != n)
{
+ nfirst_task_step = task_step;
task_step += step;
nfirst = n - mul - 1;
}
@@ -135,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
task_step = (TYPE) div * step;
if (mod)
{
+ nfirst_task_step = task_step;
task_step += step;
nfirst = mod - 1;
}
@@ -153,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
task_step = (TYPE) div * step;
if (mod)
{
+ nfirst_task_step = task_step;
task_step += step;
nfirst = mod - 1;
}
@@ -225,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
start += task_step;
((TYPE *)arg)[1] = start;
if (i == nfirst)
- task_step -= step;
+ task_step = nfirst_task_step;
fn (arg);
arg += arg_size;
if (!priority_queue_empty_p (&task[i].children_queue,
@@ -258,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
start += task_step;
((TYPE *)data)[1] = start;
if (i == nfirst)
- task_step -= step;
+ task_step = nfirst_task_step;
fn (data);
if (!priority_queue_empty_p (&task.children_queue,
MEMMODEL_RELAXED))
@@ -303,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
start += task_step;
((TYPE *)arg)[1] = start;
if (i == nfirst)
- task_step -= step;
+ task_step = nfirst_task_step;
thr->task = parent;
task->kind = GOMP_TASK_WAITING;
task->fn = fn;
diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c
index 4ac1b5a3751..b9499387d11 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c
@@ -85,7 +85,8 @@ main ()
if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7
|| ntasks != 1 || min_iters != 7 || max_iters != 7)
__builtin_abort ();
- /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */
+ /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
+ and each task has at least one iteration. */
if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54
|| ntasks != 9)
__builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c
new file mode 100644
index 00000000000..1b64a6dc4f7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c
@@ -0,0 +1,135 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+int u[64], v, w[64];
+
+__attribute__((noinline, noclone)) int
+test (int a, int b, int c, int d, void (*fn) (int, int, int, int),
+ int *num_tasks, int *min_iters, int *max_iters, int *sep)
+{
+ int i, j, t = 0;
+ __builtin_memset (u, 0, sizeof u);
+ v = 0;
+ fn (a, b, c, d);
+ *min_iters = 0;
+ *max_iters = 0;
+ *num_tasks = v;
+ *sep = v;
+ if (v)
+ {
+ *min_iters = u[0];
+ *max_iters = u[0];
+ t = u[0];
+ for (i = 1; i < v; i++)
+ {
+ if (*min_iters > u[i])
+ *min_iters = u[i];
+ if (*max_iters < u[i])
+ *max_iters = u[i];
+ t += u[i];
+ }
+ if (*min_iters != *max_iters)
+ {
+ for (i = 0; i < v - 1; i++)
+ {
+ int min_idx = i;
+ for (j = i + 1; j < v; j++)
+ if (w[min_idx] > w[j])
+ min_idx = j;
+ if (min_idx != i)
+ {
+ int tem = u[i];
+ u[i] = u[min_idx];
+ u[min_idx] = tem;
+ tem = w[i];
+ w[i] = w[min_idx];
+ w[min_idx] = tem;
+ }
+ }
+ if (u[0] != *max_iters)
+ __builtin_abort ();
+ for (i = 1; i < v; i++)
+ if (u[i] != u[i - 1])
+ {
+ if (*sep != v || u[i] != *min_iters)
+ __builtin_abort ();
+ *sep = i;
+ }
+ }
+ }
+ return t;
+}
+
+void
+grainsize (int a, int b, int c, int d)
+{
+ int i, j = 0, k = 0;
+ #pragma omp taskloop firstprivate (j, k) grainsize(strict:d)
+ for (i = a; i < b; i += c)
+ {
+ if (j == 0)
+ {
+ #pragma omp atomic capture
+ k = v++;
+ if (k >= 64)
+ __builtin_abort ();
+ w[k] = i;
+ }
+ u[k] = ++j;
+ }
+}
+
+void
+num_tasks (int a, int b, int c, int d)
+{
+ int i, j = 0, k = 0;
+ #pragma omp taskloop firstprivate (j, k) num_tasks(strict:d)
+ for (i = a; i < b; i += c)
+ {
+ if (j == 0)
+ {
+ #pragma omp atomic capture
+ k = v++;
+ if (k >= 64)
+ __builtin_abort ();
+ w[k] = i;
+ }
+ u[k] = ++j;
+ }
+}
+
+int
+main ()
+{
+ #pragma omp parallel
+ #pragma omp single
+ {
+ int min_iters, max_iters, ntasks, sep;
+ /* If grainsize is present and has strict modifier, # of task loop iters is == grainsize,
+ except that it can be smaller on the last task. */
+ if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79
+ || ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4)
+ __builtin_abort ();
+ if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370
+ || ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13)
+ __builtin_abort ();
+ if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7
+ || ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1)
+ __builtin_abort ();
+ /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
+ and each task has at least one iteration. If strict modifier is present,
+ first set of tasks has ceil (# of loop iters / num_tasks) iterations,
+ followed by possibly empty set of tasks with floor (# of loop iters / num_tasks)
+ iterations. */
+ if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54
+ || ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9)
+ __builtin_abort ();
+ if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57
+ || ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3)
+ __builtin_abort ();
+ if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13
+ || ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13)
+ __builtin_abort ();
+ }
+ return 0;
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-23 8:23 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-23 8:23 [gcc r12-3066] openmp: Add support for strict modifier on grainsize/num_tasks clauses Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).