commit d37f644faa16f40b05cc4cd90f9dd6f6b868b5ea Author: Aldy Hernandez Date: Fri Nov 6 14:57:29 2015 -0800 * Makefile.am (libgomp_la_SOURCES): Add priority_queue.c. * Makefile.in: Regenerate. * libgomp.h: Shuffle prototypes and forward definitions around so priority queues can be defined. (struct gomp_task): Remove children, next_child, prev_child, next_queue, prev_queue, next_taskgroup, prev_taskgroup. Add pnode field. (struct gomp_taskgroup): Remove children. Add taskgroup_queue. (struct gomp_team): Change task_queue type to a priority queue. (splay_compare): Define inline. (priority_queue_offset): New. (priority_node_to_task): New. (task_to_priority_node): New. * oacc-mem.c: Do not include splay-tree.h. * priority_queue.c: New file. * priority_queue.h: New file. * splay-tree.c: Do not include splay-tree.h. (splay_tree_foreach_internal): New. (splay_tree_foreach): New. * splay-tree.h: Become re-entrant if splay_tree_prefix is defined. (splay_tree_callback): Define typedef. * target.c (splay_compare): Move to libgomp.h. * task.c (gomp_init_task): Initialize children_queue. (gomp_clear_parent_in_list): New. (gomp_clear_parent_in_tree): New. (gomp_clear_parent): Handle priorities. (GOMP_task): Same. (gomp_create_target_task): Use priority queues. (verify_children_queue): Remove. (priority_list_upgrade_task): New. (priority_queue_upgrade_task): New. (verify_task_queue): Remove. (priority_list_downgrade_task): New. (priority_queue_downgrade_task): New. (gomp_task_run_pre): Use priority queues. Abstract code out to priority_queue_downgrade_task. (gomp_task_run_post_handle_dependers): Use priority queues. (gomp_task_run_post_remove_parent): Same. (gomp_task_run_post_remove_taskgroup): Same. (gomp_barrier_handle_tasks): Same. (GOMP_taskwait): Same. (gomp_task_maybe_wait_for_dependencies): Same. Abstract code to priority-queue_upgrade_task. (GOMP_taskgroup_start): Use priority queues. (GOMP_taskgroup_end): Same. * taskloop.c (GOMP_taskloop): Handle priorities. * team.c (gomp_new_team): Call priority_queue_init. (free_team): Call priority_queue_free. * testsuite/libgomp.c/priority.c: New test. diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 5411278..a3e1c2b 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -63,7 +63,7 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ - oacc-plugin.c oacc-cuda.c + oacc-plugin.c oacc-cuda.c priority_queue.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 79745ce..7a1c976 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -168,7 +168,7 @@ am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ fortran.lo affinity.lo target.lo splay-tree.lo \ libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ - $(am__objects_1) + priority_queue.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -415,7 +415,7 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ - $(am__append_2) + priority_queue.c $(am__append_2) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -589,6 +589,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 23b516e..d8c8080 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -50,6 +50,22 @@ #include #include +/* Needed for memset in priority_queue.c. */ +#if _LIBGOMP_CHECKING_ +# ifdef STRING_WITH_STRINGS +# include +# include +# else +# ifdef HAVE_STRING_H +# include +# else +# ifdef HAVE_STRINGS_H +# include +# endif +# endif +# endif +#endif + #ifdef HAVE_ATTRIBUTE_VISIBILITY # pragma GCC visibility push(hidden) #endif @@ -65,6 +81,44 @@ enum memmodel MEMMODEL_SEQ_CST = 5 }; +/* alloc.c */ + +extern void *gomp_malloc (size_t) __attribute__((malloc)); +extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); +extern void *gomp_realloc (void *, size_t); + +/* Avoid conflicting prototypes of alloca() in system headers by using + GCC's builtin alloca(). */ +#define gomp_alloca(x) __builtin_alloca(x) + +/* error.c */ + +extern void gomp_vdebug (int, const char *, va_list); +extern void gomp_debug (int, const char *, ...) + __attribute__ ((format (printf, 2, 3))); +#define gomp_vdebug(KIND, FMT, VALIST) \ + do { \ + if (__builtin_expect (gomp_debug_var, 0)) \ + (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ + } while (0) +#define gomp_debug(KIND, ...) \ + do { \ + if (__builtin_expect (gomp_debug_var, 0)) \ + (gomp_debug) ((KIND), __VA_ARGS__); \ + } while (0) +extern void gomp_verror (const char *, va_list); +extern void gomp_error (const char *, ...) + __attribute__ ((format (printf, 1, 2))); +extern void gomp_vfatal (const char *, va_list) + __attribute__ ((noreturn)); +extern void gomp_fatal (const char *, ...) + __attribute__ ((noreturn, format (printf, 1, 2))); + +struct gomp_task; +struct gomp_taskgroup; +struct htab; + +#include "priority_queue.h" #include "sem.h" #include "mutex.h" #include "bar.h" @@ -298,6 +352,7 @@ extern gomp_mutex_t gomp_managed_threads_lock; #endif extern unsigned long gomp_max_active_levels_var; extern bool gomp_cancel_var; +extern int gomp_max_task_priority_var; extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var; extern unsigned long gomp_available_cpus, gomp_managed_threads; extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len; @@ -321,10 +376,6 @@ enum gomp_task_kind GOMP_TASK_TIED }; -struct gomp_task; -struct gomp_taskgroup; -struct htab; - struct gomp_task_depend_entry { /* Address of dependency. */ @@ -352,8 +403,8 @@ struct gomp_taskwait { bool in_taskwait; bool in_depend_wait; + /* Number of tasks we are waiting for. */ size_t n_depend; - struct gomp_task *last_parent_depends_on; gomp_sem_t taskwait_sem; }; @@ -361,26 +412,10 @@ struct gomp_taskwait struct gomp_task { - /* Parent circular list. See children description below. */ + /* Parent of this task. */ struct gomp_task *parent; - /* Circular list representing the children of this task. - - In this list we first have parent_depends_on ready to run tasks, - then !parent_depends_on ready to run tasks, and finally already - running tasks. */ - struct gomp_task *children; - struct gomp_task *next_child; - struct gomp_task *prev_child; - /* Circular task_queue in `struct gomp_team'. - - GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */ - struct gomp_task *next_queue; - struct gomp_task *prev_queue; - /* Circular queue in gomp_taskgroup->children. - - GOMP_TASK_WAITING tasks come before GOMP_TASK_TIED tasks. */ - struct gomp_task *next_taskgroup; - struct gomp_task *prev_taskgroup; + /* Children of this task. */ + struct priority_queue children_queue; /* Taskgroup this task belongs in. */ struct gomp_taskgroup *taskgroup; /* Tasks that depend on this task. */ @@ -389,8 +424,19 @@ struct gomp_task struct gomp_taskwait *taskwait; /* Number of items in DEPEND. */ size_t depend_count; - /* Number of tasks in the DEPENDERS field above. */ + /* Number of tasks this task depends on. Once this counter reaches + 0, we have no unsatisfied dependencies, and this task can be put + into the various queues to be scheduled. */ size_t num_dependees; + + /* Priority of this task. */ + int priority; + /* The priority node for this task in each of the different queues. + We put this here to avoid allocating space for each priority + node. Then we play offsetof() games to convert between pnode[] + entries and the gomp_task in which they reside. */ + struct priority_node pnode[3]; + struct gomp_task_icv icv; void (*fn) (void *); void *fn_data; @@ -410,12 +456,8 @@ struct gomp_task struct gomp_taskgroup { struct gomp_taskgroup *prev; - /* Circular list of tasks that belong in this taskgroup. - - Tasks are chained by next/prev_taskgroup within gomp_task, and - are sorted by GOMP_TASK_WAITING tasks, and then GOMP_TASK_TIED - tasks. */ - struct gomp_task *children; + /* Queue of tasks that belong in this taskgroup. */ + struct priority_queue taskgroup_queue; bool in_taskgroup_wait; bool cancelled; gomp_sem_t taskgroup_sem; @@ -495,9 +537,8 @@ struct gomp_team struct gomp_work_share work_shares[8]; gomp_mutex_t task_lock; - /* Scheduled tasks. Chain fields are next/prev_queue within a - gomp_task. */ - struct gomp_task *task_queue; + /* Scheduled tasks. */ + struct priority_queue task_queue; /* Number of all GOMP_TASK_{WAITING,TIED} tasks in the team. */ unsigned int task_count; /* Number of GOMP_TASK_WAITING tasks currently waiting to be scheduled. */ @@ -627,39 +668,6 @@ extern bool gomp_affinity_init_level (int, unsigned long, bool); extern void gomp_affinity_print_place (void *); extern void gomp_get_place_proc_ids_8 (int, int64_t *); -/* alloc.c */ - -extern void *gomp_malloc (size_t) __attribute__((malloc)); -extern void *gomp_malloc_cleared (size_t) __attribute__((malloc)); -extern void *gomp_realloc (void *, size_t); - -/* Avoid conflicting prototypes of alloca() in system headers by using - GCC's builtin alloca(). */ -#define gomp_alloca(x) __builtin_alloca(x) - -/* error.c */ - -extern void gomp_vdebug (int, const char *, va_list); -extern void gomp_debug (int, const char *, ...) - __attribute__ ((format (printf, 2, 3))); -#define gomp_vdebug(KIND, FMT, VALIST) \ - do { \ - if (__builtin_expect (gomp_debug_var, 0)) \ - (gomp_vdebug) ((KIND), (FMT), (VALIST)); \ - } while (0) -#define gomp_debug(KIND, ...) \ - do { \ - if (__builtin_expect (gomp_debug_var, 0)) \ - (gomp_debug) ((KIND), __VA_ARGS__); \ - } while (0) -extern void gomp_verror (const char *, va_list); -extern void gomp_error (const char *, ...) - __attribute__ ((format (printf, 1, 2))); -extern void gomp_vfatal (const char *, va_list) - __attribute__ ((noreturn)); -extern void gomp_fatal (const char *, ...) - __attribute__ ((noreturn, format (printf, 1, 2))); - /* iter.c */ extern int gomp_iter_static_next (long *, long *); @@ -741,6 +749,7 @@ extern void gomp_init_targets_once (void); extern int gomp_get_num_devices (void); extern void gomp_target_task_fn (void *); +/* Splay tree definitions. */ typedef struct splay_tree_node_s *splay_tree_node; typedef struct splay_tree_s *splay_tree; typedef struct splay_tree_key_s *splay_tree_key; @@ -800,6 +809,21 @@ struct splay_tree_key_s { uintptr_t async_refcount; }; +/* The comparison function. */ + +static inline int +splay_compare (splay_tree_key x, splay_tree_key y) +{ + if (x->host_start == x->host_end + && y->host_start == y->host_end) + return 0; + if (x->host_end <= y->host_start) + return -1; + if (x->host_start >= y->host_end) + return 1; + return 0; +} + #include "splay-tree.h" typedef struct acc_dispatch_t @@ -1016,4 +1040,34 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW; # define ialias_call(fn) fn #endif +/* Helper function for priority_node_to_task() and + task_to_priority_node(). + + Return the offset from a task to its priority_node entry. The + priority_node entry is has a type of TYPE. */ + +static inline size_t +priority_queue_offset (enum priority_queue_type type) +{ + return offsetof (struct gomp_task, pnode[(int) type]); +} + +/* Return the task associated with a priority NODE of type TYPE. */ + +static inline struct gomp_task * +priority_node_to_task (enum priority_queue_type type, + struct priority_node *node) +{ + return (struct gomp_task *) ((char *) node - priority_queue_offset (type)); +} + +/* Return the priority node of type TYPE for a given TASK. */ + +static inline struct priority_node * +task_to_priority_node (enum priority_queue_type type, + struct gomp_task *task) +{ + return (struct priority_node *) ((char *) task + + priority_queue_offset (type)); +} #endif /* LIBGOMP_H */ diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 5410906..2488480 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -31,7 +31,6 @@ #include "libgomp.h" #include "gomp-constants.h" #include "oacc-int.h" -#include "splay-tree.h" #include #include diff --git a/libgomp/priority_queue.c b/libgomp/priority_queue.c new file mode 100644 index 0000000..c08fdae --- /dev/null +++ b/libgomp/priority_queue.c @@ -0,0 +1,300 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Aldy Hernandez . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Priority queue implementation of GOMP tasks. */ + +#include "libgomp.h" + +#if _LIBGOMP_CHECKING_ +#include + +/* Sanity check to verify whether a TASK is in LIST. Return TRUE if + found, FALSE otherwise. + + TYPE is the type of priority queue this task resides in. */ + +static inline bool +priority_queue_task_in_list_p (enum priority_queue_type type, + struct priority_list *list, + struct gomp_task *task) +{ + struct priority_node *p = list->tasks; + do + { + if (priority_node_to_task (type, p) == task) + return true; + p = p->next; + } + while (p != list->tasks); + return false; +} + +/* Tree version of priority_queue_task_in_list_p. */ + +static inline bool +priority_queue_task_in_tree_p (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task) +{ + struct priority_list *list + = priority_queue_lookup_priority (head, task->priority); + if (!list) + return false; + return priority_queue_task_in_list_p (type, list, task); +} + +/* Generic version of priority_queue_task_in_list_p that works for + trees or lists. */ + +bool +priority_queue_task_in_queue_p (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task) +{ + if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) + return false; + if (priority_queue_multi_p (head)) + return priority_queue_task_in_tree_p (type, head, task); + else + return priority_queue_task_in_list_p (type, &head->l, task); +} + +/* Sanity check LIST to make sure the tasks therein are in the right + order. LIST is a priority list of type TYPE. + + The expected order is that GOMP_TASK_WAITING tasks come before + GOMP_TASK_TIED ones. + + If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING + tasks come before !parent_depends_on WAITING tasks. This is only + applicable to the children queue, and the caller is expected to + ensure that we are verifying the children queue. */ + +static void +priority_list_verify (enum priority_queue_type type, + struct priority_list *list, bool check_deps) +{ + bool seen_tied = false; + bool seen_plain_waiting = false; + struct priority_node *p = list->tasks; + while (1) + { + struct gomp_task *t = priority_node_to_task (type, p); + if (seen_tied && t->kind == GOMP_TASK_WAITING) + gomp_fatal ("priority_queue_verify: WAITING task after TIED"); + if (t->kind == GOMP_TASK_TIED) + seen_tied = true; + else if (check_deps && t->kind == GOMP_TASK_WAITING) + { + if (t->parent_depends_on) + { + if (seen_plain_waiting) + gomp_fatal ("priority_queue_verify: " + "parent_depends_on after !parent_depends_on"); + } + else + seen_plain_waiting = true; + } + p = p->next; + if (p == list->tasks) + break; + } +} + +/* Callback type for priority_tree_verify_callback. */ +struct cbtype +{ + enum priority_queue_type type; + bool check_deps; +}; + +/* Verify every task in NODE. + + Callback for splay_tree_foreach. */ + +static void +priority_tree_verify_callback (prio_splay_tree_key key, void *data) +{ + struct cbtype *cb = (struct cbtype *) data; + priority_list_verify (cb->type, &key->l, cb->check_deps); +} + +/* Generic version of priority_list_verify. + + Sanity check HEAD to make sure the tasks therein are in the right + order. The priority_queue holds tasks of type TYPE. + + If CHECK_DEPS is TRUE, we also check that parent_depends_on WAITING + tasks come before !parent_depends_on WAITING tasks. This is only + applicable to the children queue, and the caller is expected to + ensure that we are verifying the children queue. */ + +void +priority_queue_verify (enum priority_queue_type type, + struct priority_queue *head, bool check_deps) +{ + if (priority_queue_empty_p (head, MEMMODEL_RELAXED)) + return; + if (priority_queue_multi_p (head)) + { + struct cbtype cb = { type, check_deps }; + prio_splay_tree_foreach (&head->t, + priority_tree_verify_callback, &cb); + } + else + priority_list_verify (type, &head->l, check_deps); +} +#endif /* _LIBGOMP_CHECKING_ */ + +/* Remove NODE from priority queue HEAD, wherever it may be inside the + tree. HEAD contains tasks of type TYPE. */ + +void +priority_tree_remove (enum priority_queue_type type, + struct priority_queue *head, + struct priority_node *node) +{ + /* ?? The only reason this function is not inlined is because we + need to find the priority within gomp_task (which has not been + completely defined in the header file). If the lack of inlining + is a concern, we could pass the priority number as a + parameter, or we could move this to libgomp.h. */ + int priority = priority_node_to_task (type, node)->priority; + + /* ?? We could avoid this lookup by keeping a pointer to the key in + the priority_node. */ + struct priority_list *list + = priority_queue_lookup_priority (head, priority); +#if _LIBGOMP_CHECKING_ + if (!list) + gomp_fatal ("Unable to find priority %d", priority); +#endif + /* If NODE was the last in its priority, clean up the priority. */ + if (priority_list_remove (list, node, MEMMODEL_RELAXED)) + { + prio_splay_tree_remove (&head->t, (prio_splay_tree_key) list); + list->tasks = NULL; +#if _LIBGOMP_CHECKING_ + memset (list, 0xaf, sizeof (*list)); +#endif + free (list); + } +} + +/* Return the highest priority WAITING task in a splay tree NODE. If + there are no WAITING tasks available, return NULL. + + NODE is a priority list containing tasks of type TYPE. + + The right most node in a tree contains the highest priority. + Recurse down to find such a node. If the task at that max node is + not WAITING, bubble back up and look at the remaining tasks + in-order. */ + +static struct gomp_task * +priority_tree_next_task_1 (enum priority_queue_type type, + prio_splay_tree_node node) +{ + again: + if (!node) + return NULL; + struct gomp_task *ret = priority_tree_next_task_1 (type, node->right); + if (ret) + return ret; + ret = priority_node_to_task (type, node->key.l.tasks); + if (ret->kind == GOMP_TASK_WAITING) + return ret; + node = node->left; + goto again; +} + +/* Return the highest priority WAITING task from within Q1 and Q2, + while giving preference to tasks from Q1. Q1 is a queue containing + items of type TYPE1. Q2 is a queue containing items of type TYPE2. + + Since we are mostly interested in Q1, if there are no WAITING tasks + in Q1, we don't bother checking Q2, and just return NULL. + + As a special case, Q2 can be NULL, in which case, we just choose + the highest priority WAITING task in Q1. This is an optimization + to speed up looking through only one queue. + + If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to + TRUE, otherwise it is set to FALSE. */ + +struct gomp_task * +priority_tree_next_task (enum priority_queue_type type1, + struct priority_queue *q1, + enum priority_queue_type type2, + struct priority_queue *q2, + bool *q1_chosen_p) +{ + struct gomp_task *t1 = priority_tree_next_task_1 (type1, q1->t.root); + if (!t1 + /* Special optimization when only searching through one queue. */ + || !q2) + { + *q1_chosen_p = true; + return t1; + } + struct gomp_task *t2 = priority_tree_next_task_1 (type2, q2->t.root); + if (!t2 || t1->priority > t2->priority) + { + *q1_chosen_p = true; + return t1; + } + if (t2->priority > t1->priority) + { + *q1_chosen_p = false; + return t2; + } + /* If we get here, the priorities are the same, so we must look at + parent_depends_on to make our decision. */ +#if _LIBGOMP_CHECKING_ + if (t1 != t2) + gomp_fatal ("priority_tree_next_task: t1 != t2"); +#endif + if (t2->parent_depends_on && !t1->parent_depends_on) + { + *q1_chosen_p = false; + return t2; + } + *q1_chosen_p = true; + return t1; +} + +/* Priority splay trees comparison function. */ +static inline int +prio_splay_compare (prio_splay_tree_key x, prio_splay_tree_key y) +{ + if (x->l.priority == y->l.priority) + return 0; + return x->l.priority < y->l.priority ? -1 : 1; +} + +/* Define another splay tree instantiation, for priority_list's. */ +#define splay_tree_prefix prio +#define splay_tree_c +#include "splay-tree.h" diff --git a/libgomp/priority_queue.h b/libgomp/priority_queue.h new file mode 100644 index 0000000..e9c369b --- /dev/null +++ b/libgomp/priority_queue.h @@ -0,0 +1,485 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Aldy Hernandez . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Header file for a priority queue of GOMP tasks. */ + +/* ?? Perhaps all the priority_tree_* functions are complex and rare + enough to go out-of-line and be moved to priority_queue.c. ?? */ + +#ifndef _PRIORITY_QUEUE_H_ +#define _PRIORITY_QUEUE_H_ + +/* One task. */ + +struct priority_node +{ + /* Next and previous chains in a circular doubly linked list for + tasks within this task's priority. */ + struct priority_node *next, *prev; +}; + +/* All tasks within the same priority. */ + +struct priority_list +{ + /* Priority of the tasks in this set. */ + int priority; + + /* Tasks. */ + struct priority_node *tasks; + + /* This points to the last of the higher priority WAITING tasks. + Remember that for the children queue, we have: + + parent_depends_on WAITING tasks. + !parent_depends_on WAITING tasks. + TIED tasks. + + This is a pointer to the last of the parent_depends_on WAITING + tasks which are essentially, higher priority items within their + priority. */ + struct priority_node *last_parent_depends_on; +}; + +/* Another splay tree instantiation, for priority_list's. */ +typedef struct prio_splay_tree_node_s *prio_splay_tree_node; +typedef struct prio_splay_tree_s *prio_splay_tree; +typedef struct prio_splay_tree_key_s *prio_splay_tree_key; +struct prio_splay_tree_key_s { + /* This structure must only containing a priority_list, as we cast + prio_splay_tree_key to priority_list throughout. */ + struct priority_list l; +}; +#define splay_tree_prefix prio +#include "splay-tree.h" + +/* The entry point into a priority queue of tasks. + + There are two alternate implementations with which to store tasks: + as a balanced tree of sorts, or as a simple list of tasks. If + there are only priority-0 items (ROOT is NULL), we use the simple + list, otherwise (ROOT is non-NULL) we use the tree. */ + +struct priority_queue +{ + /* If t.root != NULL, this is a splay tree of priority_lists to hold + all tasks. This is only used if multiple priorities are in play, + otherwise we use the priority_list `l' below to hold all + (priority-0) tasks. */ + struct prio_splay_tree_s t; + + /* If T above is NULL, only priority-0 items exist, so keep them + in a simple list. */ + struct priority_list l; +}; + +enum priority_insert_type { + /* Insert at the beginning of a priority list. */ + PRIORITY_INSERT_BEGIN, + /* Insert at the end of a priority list. */ + PRIORITY_INSERT_END +}; + +/* Used to determine in which queue a given priority node belongs in. + See pnode field of gomp_task. */ + +enum priority_queue_type +{ + PQ_TEAM, /* Node belongs in gomp_team's task_queue. */ + PQ_CHILDREN, /* Node belongs in parent's children_queue. */ + PQ_TASKGROUP, /* Node belongs in taskgroup->taskgroup_queue. */ + PQ_IGNORED = 999 +}; + +/* Priority queue implementation prototypes. */ + +extern bool priority_queue_task_in_queue_p (enum priority_queue_type, + struct priority_queue *, + struct gomp_task *); +extern void priority_queue_dump (enum priority_queue_type, + struct priority_queue *); +extern void priority_queue_verify (enum priority_queue_type, + struct priority_queue *, bool); +extern void priority_tree_remove (enum priority_queue_type, + struct priority_queue *, + struct priority_node *); +extern struct gomp_task *priority_tree_next_task (enum priority_queue_type, + struct priority_queue *, + enum priority_queue_type, + struct priority_queue *, + bool *); + +/* Return TRUE if there is more than one priority in HEAD. This is + used throughout to to choose between the fast path (priority 0 only + items) and a world with multiple priorities. */ + +static inline bool +priority_queue_multi_p (struct priority_queue *head) +{ + return __builtin_expect (head->t.root != NULL, 0); +} + +/* Initialize a priority queue. */ + +static inline void +priority_queue_init (struct priority_queue *head) +{ + head->t.root = NULL; + /* To save a few microseconds, we don't initialize head->l.priority + to 0 here. It is implied that priority will be 0 if head->t.root + == NULL. + + priority_tree_insert() will fix this when we encounter multiple + priorities. */ + head->l.tasks = NULL; + head->l.last_parent_depends_on = NULL; +} + +static inline void +priority_queue_free (struct priority_queue *head) +{ + /* There's nothing to do, as tasks were freed as they were removed + in priority_queue_remove. */ +} + +/* Forward declarations. */ +static inline size_t priority_queue_offset (enum priority_queue_type); +static inline struct gomp_task *priority_node_to_task + (enum priority_queue_type, + struct priority_node *); +static inline struct priority_node *task_to_priority_node + (enum priority_queue_type, + struct gomp_task *); + +/* Return TRUE if priority queue HEAD is empty. + + MODEL IS MEMMODEL_ACQUIRE if we should use an acquire atomic to + read from the root of the queue, otherwise MEMMODEL_RELAXED if we + should use a plain load. */ + +static inline _Bool +priority_queue_empty_p (struct priority_queue *head, enum memmodel model) +{ + /* Note: The acquire barriers on the loads here synchronize with + the write of a NULL in gomp_task_run_post_remove_parent. It is + not necessary that we synchronize with other non-NULL writes at + this point, but we must ensure that all writes to memory by a + child thread task work function are seen before we exit from + GOMP_taskwait. */ + if (priority_queue_multi_p (head)) + { + if (model == MEMMODEL_ACQUIRE) + return __atomic_load_n (&head->t.root, MEMMODEL_ACQUIRE) == NULL; + return head->t.root == NULL; + } + if (model == MEMMODEL_ACQUIRE) + return __atomic_load_n (&head->l.tasks, MEMMODEL_ACQUIRE) == NULL; + return head->l.tasks == NULL; +} + +/* Look for a given PRIORITY in HEAD. Return it if found, otherwise + return NULL. This only applies to the tree variant in HEAD. There + is no point in searching for priorities in HEAD->L. */ + +static inline struct priority_list * +priority_queue_lookup_priority (struct priority_queue *head, int priority) +{ + if (head->t.root == NULL) + return NULL; + struct prio_splay_tree_key_s k; + k.l.priority = priority; + return (struct priority_list *) + prio_splay_tree_lookup (&head->t, &k); +} + +/* Insert task in DATA, with PRIORITY, in the priority list in LIST. + LIST contains items of type TYPE. + + If POS is PRIORITY_INSERT_BEGIN, the new task is inserted at the + top of its respective priority. If POS is PRIORITY_INSERT_END, the + task is inserted at the end of its priority. + + If ADJUST_PARENT_DEPENDS_ON is TRUE, LIST is a children queue, and + we must keep track of higher and lower priority WAITING tasks by + keeping the queue's last_parent_depends_on field accurate. This + only applies to the children queue, and the caller must ensure LIST + is a children queue in this case. + + If ADJUST_PARENT_DEPENDS_ON is TRUE, TASK_IS_PARENT_DEPENDS_ON is + set to the task's parent_depends_on field. If + ADJUST_PARENT_DEPENDS_ON is FALSE, this field is irrelevant. + + Return the new priority_node. */ + +static inline void +priority_list_insert (enum priority_queue_type type, + struct priority_list *list, + struct gomp_task *task, + int priority, + enum priority_insert_type pos, + bool adjust_parent_depends_on, + bool task_is_parent_depends_on) +{ + struct priority_node *node = task_to_priority_node (type, task); + if (list->tasks) + { + /* If we are keeping track of higher/lower priority items, + but this is a lower priority WAITING task + (parent_depends_on != NULL), put it after all ready to + run tasks. See the comment in + priority_queue_upgrade_task for a visual on how tasks + should be organized. */ + if (adjust_parent_depends_on + && pos == PRIORITY_INSERT_BEGIN + && list->last_parent_depends_on + && !task_is_parent_depends_on) + { + struct priority_node *last_parent_depends_on + = list->last_parent_depends_on; + node->next = last_parent_depends_on->next; + node->prev = last_parent_depends_on; + } + /* Otherwise, put it at the top/bottom of the queue. */ + else + { + node->next = list->tasks; + node->prev = list->tasks->prev; + if (pos == PRIORITY_INSERT_BEGIN) + list->tasks = node; + } + node->next->prev = node; + node->prev->next = node; + } + else + { + node->next = node; + node->prev = node; + list->tasks = node; + } + if (adjust_parent_depends_on + && list->last_parent_depends_on == NULL + && task_is_parent_depends_on) + list->last_parent_depends_on = node; +} + +/* Tree version of priority_list_insert. */ + +static inline void +priority_tree_insert (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task, + int priority, + enum priority_insert_type pos, + bool adjust_parent_depends_on, + bool task_is_parent_depends_on) +{ + if (__builtin_expect (head->t.root == NULL, 0)) + { + /* The first time around, transfer any priority 0 items to the + tree. */ + if (head->l.tasks != NULL) + { + prio_splay_tree_node k = gomp_malloc (sizeof (*k)); + k->left = NULL; + k->right = NULL; + k->key.l.priority = 0; + k->key.l.tasks = head->l.tasks; + k->key.l.last_parent_depends_on = head->l.last_parent_depends_on; + prio_splay_tree_insert (&head->t, k); + head->l.tasks = NULL; + } + } + struct priority_list *list + = priority_queue_lookup_priority (head, priority); + if (!list) + { + prio_splay_tree_node k = gomp_malloc (sizeof (*k)); + k->left = NULL; + k->right = NULL; + k->key.l.priority = priority; + k->key.l.tasks = NULL; + k->key.l.last_parent_depends_on = NULL; + prio_splay_tree_insert (&head->t, k); + list = &k->key.l; + } + priority_list_insert (type, list, task, priority, pos, + adjust_parent_depends_on, + task_is_parent_depends_on); +} + +/* Generic version of priority_*_insert. */ + +static inline void +priority_queue_insert (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task, + int priority, + enum priority_insert_type pos, + bool adjust_parent_depends_on, + bool task_is_parent_depends_on) +{ +#if _LIBGOMP_CHECKING_ + if (priority_queue_task_in_queue_p (type, head, task)) + gomp_fatal ("Attempt to insert existing task %p", task); +#endif + if (priority_queue_multi_p (head) || __builtin_expect (priority > 0, 0)) + priority_tree_insert (type, head, task, priority, pos, + adjust_parent_depends_on, + task_is_parent_depends_on); + else + priority_list_insert (type, &head->l, task, priority, pos, + adjust_parent_depends_on, + task_is_parent_depends_on); +} + +/* If multiple priorities are in play, return the highest priority + task from within Q1 and Q2, while giving preference to tasks from + Q1. If the returned task is chosen from Q1, *Q1_CHOSEN_P is set to + TRUE, otherwise it is set to FALSE. + + If multiple priorities are not in play (only 0 priorities are + available), the next task is chosen exclusively from Q1. + + As a special case, Q2 can be NULL, in which case, we just choose + the highest priority WAITING task in Q1. This is an optimization + to speed up looking through only one queue. + + We assume Q1 has at least one item. */ + +static inline struct gomp_task * +priority_queue_next_task (enum priority_queue_type t1, + struct priority_queue *q1, + enum priority_queue_type t2, + struct priority_queue *q2, + bool *q1_chosen_p) +{ +#if _LIBGOMP_CHECKING_ + if (priority_queue_empty_p (q1, MEMMODEL_RELAXED)) + gomp_fatal ("priority_queue_next_task: Q1 is empty"); +#endif + if (priority_queue_multi_p (q1)) + { + struct gomp_task *t + = priority_tree_next_task (t1, q1, t2, q2, q1_chosen_p); + /* If T is NULL, there are no WAITING tasks in Q1. In which + case, return any old (non-waiting) task which will cause the + caller to do the right thing when checking T->KIND == + GOMP_TASK_WAITING. */ + if (!t) + { +#if _LIBGOMP_CHECKING_ + if (*q1_chosen_p == false) + gomp_fatal ("priority_queue_next_task inconsistency"); +#endif + return priority_node_to_task (t1, q1->t.root->key.l.tasks); + } + return t; + } + else + { + *q1_chosen_p = true; + return priority_node_to_task (t1, q1->l.tasks); + } +} + +/* Remove NODE from LIST. + + If we are removing the one and only item in the list, and MODEL is + MEMMODEL_RELEASE, use an atomic release to clear the list. + + If the list becomes empty after the remove, return TRUE. */ + +static inline bool +priority_list_remove (struct priority_list *list, + struct priority_node *node, + enum memmodel model) +{ + bool empty = false; + node->prev->next = node->next; + node->next->prev = node->prev; + if (list->tasks == node) + { + if (node->next != node) + list->tasks = node->next; + else + { + /* We access task->children in GOMP_taskwait outside of + the task lock mutex region, so need a release barrier + here to ensure memory written by child_task->fn above + is flushed before the NULL is written. */ + if (model == MEMMODEL_RELEASE) + __atomic_store_n (&list->tasks, NULL, MEMMODEL_RELEASE); + else + list->tasks = NULL; + empty = true; + goto remove_out; + } + } +remove_out: +#if _LIBGOMP_CHECKING_ + memset (node, 0xaf, sizeof (*node)); +#endif + return empty; +} + +/* This is the generic version of priority_list_remove. + + Remove NODE from priority queue HEAD. HEAD contains tasks of type TYPE. + + If we are removing the one and only item in the priority queue and + MODEL is MEMMODEL_RELEASE, use an atomic release to clear the queue. + + If the queue becomes empty after the remove, return TRUE. */ + +static inline bool +priority_queue_remove (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task, + enum memmodel model) +{ +#if _LIBGOMP_CHECKING_ + if (!priority_queue_task_in_queue_p (type, head, task)) + gomp_fatal ("Attempt to remove missing task %p", task); +#endif + if (priority_queue_multi_p (head)) + { + priority_tree_remove (type, head, task_to_priority_node (type, task)); + if (head->t.root == NULL) + { + if (model == MEMMODEL_RELEASE) + /* Errr, we store NULL twice, the alternative would be to + use an atomic release directly in the splay tree + routines. Worth it? */ + __atomic_store_n (&head->t.root, NULL, MEMMODEL_RELEASE); + return true; + } + return false; + } + else + return priority_list_remove (&head->l, + task_to_priority_node (type, task), model); +} + +#endif /* _PRIORITY_QUEUE_H_ */ diff --git a/libgomp/splay-tree.c b/libgomp/splay-tree.c index 030ca8f..862bbb8 100644 --- a/libgomp/splay-tree.c +++ b/libgomp/splay-tree.c @@ -37,9 +37,6 @@ are amortized O(log n) time for a tree with n nodes. */ #include "libgomp.h" -#include "splay-tree.h" - -extern int splay_compare (splay_tree_key, splay_tree_key); /* Rotate the edge joining the left child N with its parent P. PP is the grandparents' pointer to P. */ @@ -215,3 +212,27 @@ splay_tree_lookup (splay_tree sp, splay_tree_key key) else return NULL; } + +/* Helper function for splay_tree_foreach. + + Run FUNC on every node in KEY. */ + +static void +splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, + void *data) +{ + if (!node) + return; + func (&node->key, data); + splay_tree_foreach_internal (node->left, func, data); + /* Yeah, whatever. GCC can fix my tail recursion. */ + splay_tree_foreach_internal (node->right, func, data); +} + +/* Run FUNC on each of the nodes in SP. */ + +attribute_hidden void +splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) +{ + splay_tree_foreach_internal (sp->root, func, data); +} diff --git a/libgomp/splay-tree.h b/libgomp/splay-tree.h index 085021c..92c51bf 100644 --- a/libgomp/splay-tree.h +++ b/libgomp/splay-tree.h @@ -33,7 +33,17 @@ typedef struct splay_tree_node_s *splay_tree_node; typedef struct splay_tree_s *splay_tree; typedef struct splay_tree_key_s *splay_tree_key; define splay_tree_key_s structure, and define - splay_compare inline function. */ + splay_compare inline function. + + Alternatively, they can define splay_tree_prefix macro before + including this header and then all the above types, the + splay_compare function and the splay_tree_{lookup,insert_remove} + function will be prefixed by that prefix. If splay_tree_prefix + macro is defined, this header must be included twice: once where + you need the header file definitions, and once where you need the + .c implementation routines. In the latter case, you must also + define the macro splay_tree_c. See the include of splay-tree.h in + priority_queue.[hc] for an example. */ /* For an easily readable description of splay-trees, see: @@ -43,8 +53,37 @@ typedef struct splay_tree_key_s *splay_tree_key; The major feature of splay trees is that all basic tree operations are amortized O(log n) time for a tree with n nodes. */ -#ifndef _SPLAY_TREE_H -#define _SPLAY_TREE_H 1 +#ifdef splay_tree_prefix +# define splay_tree_name_1(prefix, name) prefix ## _ ## name +# define splay_tree_name(prefix, name) splay_tree_name_1 (prefix, name) +# define splay_tree_node_s \ + splay_tree_name (splay_tree_prefix, splay_tree_node_s) +# define splay_tree_s \ + splay_tree_name (splay_tree_prefix, splay_tree_s) +# define splay_tree_key_s \ + splay_tree_name (splay_tree_prefix, splay_tree_key_s) +# define splay_tree_node \ + splay_tree_name (splay_tree_prefix, splay_tree_node) +# define splay_tree \ + splay_tree_name (splay_tree_prefix, splay_tree) +# define splay_tree_key \ + splay_tree_name (splay_tree_prefix, splay_tree_key) +# define splay_compare \ + splay_tree_name (splay_tree_prefix, splay_compare) +# define splay_tree_lookup \ + splay_tree_name (splay_tree_prefix, splay_tree_lookup) +# define splay_tree_insert \ + splay_tree_name (splay_tree_prefix, splay_tree_insert) +# define splay_tree_remove \ + splay_tree_name (splay_tree_prefix, splay_tree_remove) +# define splay_tree_foreach \ + splay_tree_name (splay_tree_prefix, splay_tree_foreach) +# define splay_tree_callback \ + splay_tree_name (splay_tree_prefix, splay_tree_callback) +#endif + +#ifndef splay_tree_c +/* Header file definitions and prototypes. */ /* The nodes in the splay tree. */ struct splay_tree_node_s { @@ -59,8 +98,33 @@ struct splay_tree_s { splay_tree_node root; }; +typedef void (*splay_tree_callback) (splay_tree_key, void *); + extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); extern void splay_tree_insert (splay_tree, splay_tree_node); extern void splay_tree_remove (splay_tree, splay_tree_key); +extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); +#else /* splay_tree_c */ +# ifdef splay_tree_prefix +# include "splay-tree.c" +# undef splay_tree_name_1 +# undef splay_tree_name +# undef splay_tree_node_s +# undef splay_tree_s +# undef splay_tree_key_s +# undef splay_tree_node +# undef splay_tree +# undef splay_tree_key +# undef splay_compare +# undef splay_tree_lookup +# undef splay_tree_insert +# undef splay_tree_remove +# undef splay_tree_foreach +# undef splay_tree_callback +# undef splay_tree_c +# endif +#endif /* #ifndef splay_tree_c */ -#endif /* _SPLAY_TREE_H */ +#ifdef splay_tree_prefix +# undef splay_tree_prefix +#endif diff --git a/libgomp/target.c b/libgomp/target.c index 1bddc6f..142e4dd 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -92,23 +92,6 @@ gomp_realloc_unlock (void *old, size_t size) return ret; } -/* The comparison function. */ - -attribute_hidden int -splay_compare (splay_tree_key x, splay_tree_key y) -{ - if (x->host_start == x->host_end - && y->host_start == y->host_end) - return 0; - if (x->host_end <= y->host_start) - return -1; - if (x->host_start >= y->host_end) - return 1; - return 0; -} - -#include "splay-tree.h" - attribute_hidden void gomp_init_targets_once (void) { diff --git a/libgomp/task.c b/libgomp/task.c index 1246c6a..0fb59d1 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -65,6 +65,14 @@ void gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, struct gomp_task_icv *prev_icv) { + /* It would seem that using memset here would be a win, but it turns + out that partially filling gomp_task allows us to keep the + overhead of task creation low. In the nqueens-1.c test, for a + sufficiently large N, we drop the overhead from 5-6% to 1%. + + Note, the nqueens-1.c test in serial mode is a good test to + benchmark the overhead of creating tasks as there are millions of + tiny tasks created that all run undeferred. */ task->parent = parent_task; task->icv = *prev_icv; task->kind = GOMP_TASK_IMPLICIT; @@ -73,7 +81,7 @@ gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, task->final_task = false; task->copy_ctors_done = false; task->parent_depends_on = false; - task->children = NULL; + priority_queue_init (&task->children_queue); task->taskgroup = NULL; task->dependers = NULL; task->depend_hash = NULL; @@ -92,24 +100,66 @@ gomp_end_task (void) thr->task = task->parent; } -/* Orphan the task in CHILDREN and all its siblings. */ +/* Clear the parent field of every task in LIST. */ static inline void -gomp_clear_parent (struct gomp_task *children) +gomp_clear_parent_in_list (struct priority_list *list) { - struct gomp_task *task = children; - - if (task) + struct priority_node *p = list->tasks; + if (p) do { - task->parent = NULL; - task = task->next_child; + priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; + p = p->next; } - while (task != children); + while (p != list->tasks); } -/* Helper function for GOMP_task and gomp_create_target_task. Depend clause - handling for undeferred task creation. */ +/* Splay tree version of gomp_clear_parent_in_list. + + Clear the parent field of every task in NODE within SP, and free + the node when done. */ + +static void +gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) +{ + if (!node) + return; + prio_splay_tree_node left = node->left, right = node->right; + gomp_clear_parent_in_list (&node->key.l); +#if _LIBGOMP_CHECKING_ + memset (node, 0xaf, sizeof (*node)); +#endif + /* No need to remove the node from the tree. We're nuking + everything, so just free the nodes and our caller can clear the + entire splay tree. */ + free (node); + gomp_clear_parent_in_tree (sp, left); + gomp_clear_parent_in_tree (sp, right); +} + +/* Clear the parent field of every task in Q and remove every task + from Q. */ + +static inline void +gomp_clear_parent (struct priority_queue *q) +{ + if (priority_queue_multi_p (q)) + { + gomp_clear_parent_in_tree (&q->t, q->t.root); + /* All the nodes have been cleared in gomp_clear_parent_in_tree. + No need to remove anything. We can just nuke everything. */ + q->t.root = NULL; + } + else + gomp_clear_parent_in_list (&q->l); +} + +/* Helper function for GOMP_task and gomp_create_target_task. + + For a TASK with in/out dependencies, fill in the various dependency + queues. PARENT is the parent of said task. DEPEND is as in + GOMP_task. */ static void gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, @@ -260,8 +310,8 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0) priority = 0; - /* FIXME, use priority. */ - (void) priority; + else if (priority > gomp_max_task_priority_var) + priority = gomp_max_task_priority_var; if (!if_clause || team == NULL || (thr->task && thr->task->final_task) @@ -283,6 +333,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task.kind = GOMP_TASK_UNDEFERRED; task.final_task = (thr->task && thr->task->final_task) || (flags & GOMP_TASK_FLAG_FINAL); + task.priority = priority; if (thr->task) { task.in_tied_task = thr->task->in_tied_task; @@ -308,10 +359,10 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), child thread, but seeing a stale non-NULL value is not a problem. Once past the task_lock acquisition, this thread will see the real value of task.children. */ - if (task.children != NULL) + if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) { gomp_mutex_lock (&team->task_lock); - gomp_clear_parent (task.children); + gomp_clear_parent (&task.children_queue); gomp_mutex_unlock (&team->task_lock); } gomp_end_task (); @@ -333,6 +384,7 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) & ~(uintptr_t) (arg_align - 1)); gomp_init_task (task, parent, gomp_icv (false)); + task->priority = priority; task->kind = GOMP_TASK_UNDEFERRED; task->in_tied_task = parent->in_tied_task; task->taskgroup = taskgroup; @@ -368,53 +420,36 @@ GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), gomp_task_handle_depend (task, parent, depend); if (task->num_dependees) { + /* Tasks that depend on other tasks are not put into the + various waiting queues, so we are done for now. Said + tasks are instead put into the queues via + gomp_task_run_post_handle_dependers() after their + dependencies have been satisfied. After which, they + can be picked up by the various scheduling + points. */ gomp_mutex_unlock (&team->task_lock); return; } } - if (parent->children) - { - task->next_child = parent->children; - task->prev_child = parent->children->prev_child; - task->next_child->prev_child = task; - task->prev_child->next_child = task; - } - else - { - task->next_child = task; - task->prev_child = task; - } - parent->children = task; + + priority_queue_insert (PQ_CHILDREN, &parent->children_queue, + task, priority, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); if (taskgroup) - { - /* If applicable, place task into its taskgroup. */ - if (taskgroup->children) - { - task->next_taskgroup = taskgroup->children; - task->prev_taskgroup = taskgroup->children->prev_taskgroup; - task->next_taskgroup->prev_taskgroup = task; - task->prev_taskgroup->next_taskgroup = task; - } - else - { - task->next_taskgroup = task; - task->prev_taskgroup = task; - } - taskgroup->children = task; - } - if (team->task_queue) - { - task->next_queue = team->task_queue; - task->prev_queue = team->task_queue->prev_queue; - task->next_queue->prev_queue = task; - task->prev_queue->next_queue = task; - } - else - { - task->next_queue = task; - task->prev_queue = task; - team->task_queue = task; - } + priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, + task, priority, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); + + priority_queue_insert (PQ_TEAM, &team->task_queue, + task, priority, + PRIORITY_INSERT_END, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); + ++team->task_count; ++team->task_queued_count; gomp_team_barrier_set_task_pending (&team->barrier); @@ -514,49 +549,19 @@ gomp_create_target_task (struct gomp_device_descr *devicep, return; } } - if (parent->children) - { - task->next_child = parent->children; - task->prev_child = parent->children->prev_child; - task->next_child->prev_child = task; - task->prev_child->next_child = task; - } - else - { - task->next_child = task; - task->prev_child = task; - } - parent->children = task; + priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); if (taskgroup) - { - /* If applicable, place task into its taskgroup. */ - if (taskgroup->children) - { - task->next_taskgroup = taskgroup->children; - task->prev_taskgroup = taskgroup->children->prev_taskgroup; - task->next_taskgroup->prev_taskgroup = task; - task->prev_taskgroup->next_taskgroup = task; - } - else - { - task->next_taskgroup = task; - task->prev_taskgroup = task; - } - taskgroup->children = task; - } - if (team->task_queue) - { - task->next_queue = team->task_queue; - task->prev_queue = team->task_queue->prev_queue; - task->next_queue->prev_queue = task; - task->prev_queue->next_queue = task; - } - else - { - task->next_queue = task; - task->prev_queue = task; - team->task_queue = task; - } + priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); + priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, + PRIORITY_INSERT_END, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); ++team->task_count; ++team->task_queued_count; gomp_team_barrier_set_task_pending (&team->barrier); @@ -567,208 +572,211 @@ gomp_create_target_task (struct gomp_device_descr *devicep, gomp_team_barrier_wake (&team->barrier, 1); } -#if _LIBGOMP_CHECKING -/* Sanity check TASK to make sure it is in its parent's children - queue, and that the tasks therein are in the right order. +/* Given a parent_depends_on task in LIST, move it to the front of its + priority so it is run as soon as possible. - The expected order is: - parent_depends_on WAITING tasks - !parent_depends_on WAITING tasks - TIED tasks + Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. - PARENT is the alleged parent of TASK. */ + We rearrange the queue such that all parent_depends_on tasks are + first, and last_parent_depends_on points to the last such task we + rearranged. For example, given the following tasks in a queue + where PD[123] are the parent_depends_on tasks: -static void -verify_children_queue (struct gomp_task *task, struct gomp_task *parent) -{ - if (task->parent != parent) - gomp_fatal ("verify_children_queue: incompatible parents"); - /* It's OK, Annie was an orphan and she turned out all right. */ - if (!parent) - return; + task->children + | + V + C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 - bool seen_tied = false; - bool seen_plain_waiting = false; - bool found = false; - struct gomp_task *t = parent->children; - while (1) + We rearrange such that: + + task->children + | +--- last_parent_depends_on + | | + V V + PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ + +static void inline +priority_list_upgrade_task (struct priority_list *list, + struct priority_node *node) +{ + struct priority_node *last_parent_depends_on + = list->last_parent_depends_on; + if (last_parent_depends_on) { - if (t == task) - found = true; - if (seen_tied && t->kind == GOMP_TASK_WAITING) - gomp_fatal ("verify_children_queue: WAITING task after TIED"); - if (t->kind == GOMP_TASK_TIED) - seen_tied = true; - else if (t->kind == GOMP_TASK_WAITING) - { - if (t->parent_depends_on) - { - if (seen_plain_waiting) - gomp_fatal ("verify_children_queue: parent_depends_on after " - "!parent_depends_on"); - } - else - seen_plain_waiting = true; - } - t = t->next_child; - if (t == parent->children) - break; + node->prev->next = node->next; + node->next->prev = node->prev; + node->prev = last_parent_depends_on; + node->next = last_parent_depends_on->next; + node->prev->next = node; + node->next->prev = node; } - if (!found) - gomp_fatal ("verify_children_queue: child not found in parent queue"); + else if (node != list->tasks) + { + node->prev->next = node->next; + node->next->prev = node->prev; + node->prev = list->tasks->prev; + node->next = list->tasks; + list->tasks = node; + node->prev->next = node; + node->next->prev = node; + } + list->last_parent_depends_on = node; } -/* Sanity check TASK to make sure it is in its taskgroup queue (if - applicable), and that the tasks therein are in the right order. +/* Given a parent_depends_on TASK in its parent's children_queue, move + it to the front of its priority so it is run as soon as possible. - The expected order is that GOMP_TASK_WAITING tasks must come before - GOMP_TASK_TIED tasks. + PARENT is passed as an optimization. - TASK is the task. */ + (This function could be defined in priority_queue.c, but we want it + inlined, and putting it in priority_queue.h is not an option, given + that gomp_task has not been properly defined at that point). */ -static void -verify_taskgroup_queue (struct gomp_task *task) +static void inline +priority_queue_upgrade_task (struct gomp_task *task, + struct gomp_task *parent) { - struct gomp_taskgroup *taskgroup = task->taskgroup; - if (!taskgroup) - return; - - bool seen_tied = false; - bool found = false; - struct gomp_task *t = taskgroup->children; - while (1) + struct priority_queue *head = &parent->children_queue; + struct priority_node *node = &task->pnode[PQ_CHILDREN]; +#if _LIBGOMP_CHECKING_ + if (!task->parent_depends_on) + gomp_fatal ("priority_queue_upgrade_task: task must be a " + "parent_depends_on task"); + if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) + gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); +#endif + if (priority_queue_multi_p (head)) { - if (t == task) - found = true; - if (t->kind == GOMP_TASK_WAITING && seen_tied) - gomp_fatal ("verify_taskgroup_queue: WAITING task after TIED"); - if (t->kind == GOMP_TASK_TIED) - seen_tied = true; - t = t->next_taskgroup; - if (t == taskgroup->children) - break; + struct priority_list *list + = priority_queue_lookup_priority (head, task->priority); + priority_list_upgrade_task (list, node); } - if (!found) - gomp_fatal ("verify_taskgroup_queue: child not found in parent queue"); + else + priority_list_upgrade_task (&head->l, node); } -/* Verify that TASK is in the team's task queue. */ +/* Given a CHILD_TASK in LIST that is about to be executed, move it out of + the way in LIST so that other tasks can be considered for + execution. LIST contains tasks of type TYPE. -static void -verify_task_queue (struct gomp_task *task, struct gomp_team *team) + Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field + if applicable. */ + +static void inline +priority_list_downgrade_task (enum priority_queue_type type, + struct priority_list *list, + struct gomp_task *child_task) { - struct gomp_task *t = team->task_queue; - if (team) - while (1) - { - if (t == task) - return; - t = t->next_queue; - if (t == team->task_queue) - break; - } - gomp_fatal ("verify_team_queue: child not in team"); + struct priority_node *node = task_to_priority_node (type, child_task); + if (list->tasks == node) + list->tasks = node->next; + else if (node->next != list->tasks) + { + /* The task in NODE is about to become TIED and TIED tasks + cannot come before WAITING tasks. If we're about to + leave the queue in such an indeterminate state, rewire + things appropriately. However, a TIED task at the end is + perfectly fine. */ + struct gomp_task *next_task = priority_node_to_task (type, node->next); + if (next_task->kind == GOMP_TASK_WAITING) + { + /* Remove from list. */ + node->prev->next = node->next; + node->next->prev = node->prev; + /* Rewire at the end. */ + node->next = list->tasks; + node->prev = list->tasks->prev; + list->tasks->prev->next = node; + list->tasks->prev = node; + } + } + + /* If the current task is the last_parent_depends_on for its + priority, adjust last_parent_depends_on appropriately. */ + if (__builtin_expect (child_task->parent_depends_on, 0) + && list->last_parent_depends_on == node) + { + struct gomp_task *prev_child = priority_node_to_task (type, node->prev); + if (node->prev != node + && prev_child->kind == GOMP_TASK_WAITING + && prev_child->parent_depends_on) + list->last_parent_depends_on = node->prev; + else + { + /* There are no more parent_depends_on entries waiting + to run, clear the list. */ + list->last_parent_depends_on = NULL; + } + } } + +/* Given a TASK in HEAD that is about to be executed, move it out of + the way so that other tasks can be considered for execution. HEAD + contains tasks of type TYPE. + + Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field + if applicable. + + (This function could be defined in priority_queue.c, but we want it + inlined, and putting it in priority_queue.h is not an option, given + that gomp_task has not been properly defined at that point). */ + +static void inline +priority_queue_downgrade_task (enum priority_queue_type type, + struct priority_queue *head, + struct gomp_task *task) +{ +#if _LIBGOMP_CHECKING_ + if (!priority_queue_task_in_queue_p (type, head, task)) + gomp_fatal ("Attempt to downgrade missing task %p", task); #endif + if (priority_queue_multi_p (head)) + { + struct priority_list *list + = priority_queue_lookup_priority (head, task->priority); + priority_list_downgrade_task (type, list, task); + } + else + priority_list_downgrade_task (type, &head->l, task); +} + +/* Setup CHILD_TASK to execute. This is done by setting the task to + TIED, and updating all relevant queues so that CHILD_TASK is no + longer chosen for scheduling. Also, remove CHILD_TASK from the + overall team task queue entirely. + + Return TRUE if task or its containing taskgroup has been + cancelled. */ static inline bool gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, struct gomp_team *team) { -#if _LIBGOMP_CHECKING - verify_children_queue (child_task, parent); - verify_taskgroup_queue (child_task); - verify_task_queue (child_task, team); +#if _LIBGOMP_CHECKING_ + if (child_task->parent) + priority_queue_verify (PQ_CHILDREN, + &child_task->parent->children_queue, true); + if (child_task->taskgroup) + priority_queue_verify (PQ_TASKGROUP, + &child_task->taskgroup->taskgroup_queue, false); + priority_queue_verify (PQ_TEAM, &team->task_queue, false); #endif + /* Task is about to go tied, move it out of the way. */ if (parent) - { - /* Adjust children such that it will point to a next child, - while the current one is scheduled to be executed. This way, - GOMP_taskwait (and others) can schedule a next task while - waiting. - - Do not remove it entirely from the circular list, as it is - still a child, though not one we should consider first (say - by GOMP_taskwait). */ - if (parent->children == child_task) - parent->children = child_task->next_child; - /* TIED tasks cannot come before WAITING tasks. If we're about - to make this task TIED, rewire things appropriately. - However, a TIED task at the end is perfectly fine. */ - else if (child_task->next_child->kind == GOMP_TASK_WAITING - && child_task->next_child != parent->children) - { - /* Remove from the list. */ - child_task->prev_child->next_child = child_task->next_child; - child_task->next_child->prev_child = child_task->prev_child; - /* Rewire at the end of its siblings. */ - child_task->next_child = parent->children; - child_task->prev_child = parent->children->prev_child; - parent->children->prev_child->next_child = child_task; - parent->children->prev_child = child_task; - } + priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, + child_task); - /* If the current task (child_task) is at the top of the - parent's last_parent_depends_on, it's about to be removed - from it. Adjust last_parent_depends_on appropriately. */ - if (__builtin_expect (child_task->parent_depends_on, 0) - && parent->taskwait->last_parent_depends_on == child_task) - { - /* The last_parent_depends_on list was built with all - parent_depends_on entries linked to the prev_child. Grab - the next last_parent_depends_on head from this prev_child if - available... */ - if (child_task->prev_child->kind == GOMP_TASK_WAITING - && child_task->prev_child->parent_depends_on) - parent->taskwait->last_parent_depends_on = child_task->prev_child; - else - { - /* ...otherwise, there are no more parent_depends_on - entries waiting to run. In which case, clear the - list. */ - parent->taskwait->last_parent_depends_on = NULL; - } - } - } - - /* Adjust taskgroup to point to the next taskgroup. See note above - regarding adjustment of children as to why the child_task is not - removed entirely from the circular list. */ + /* Task is about to go tied, move it out of the way. */ struct gomp_taskgroup *taskgroup = child_task->taskgroup; if (taskgroup) - { - if (taskgroup->children == child_task) - taskgroup->children = child_task->next_taskgroup; - /* TIED tasks cannot come before WAITING tasks. If we're about - to make this task TIED, rewire things appropriately. - However, a TIED task at the end is perfectly fine. */ - else if (child_task->next_taskgroup->kind == GOMP_TASK_WAITING - && child_task->next_taskgroup != taskgroup->children) - { - /* Remove from the list. */ - child_task->prev_taskgroup->next_taskgroup - = child_task->next_taskgroup; - child_task->next_taskgroup->prev_taskgroup - = child_task->prev_taskgroup; - /* Rewire at the end of its taskgroup. */ - child_task->next_taskgroup = taskgroup->children; - child_task->prev_taskgroup = taskgroup->children->prev_taskgroup; - taskgroup->children->prev_taskgroup->next_taskgroup = child_task; - taskgroup->children->prev_taskgroup = child_task; - } - } + priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, + child_task); - /* Remove child_task from the task_queue. */ - child_task->prev_queue->next_queue = child_task->next_queue; - child_task->next_queue->prev_queue = child_task->prev_queue; - if (team->task_queue == child_task) - { - if (child_task->next_queue != child_task) - team->task_queue = child_task->next_queue; - else - team->task_queue = NULL; - } + priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, + MEMMODEL_RELAXED); + child_task->pnode[PQ_TEAM].next = NULL; + child_task->pnode[PQ_TEAM].prev = NULL; child_task->kind = GOMP_TASK_TIED; if (--team->task_queued_count == 0) @@ -808,8 +816,11 @@ gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) } } -/* After CHILD_TASK has been run, adjust the various task queues to - give higher priority to the tasks that depend on CHILD_TASK. +/* After a CHILD_TASK has been run, adjust the dependency queue for + each task that depends on CHILD_TASK, to record the fact that there + is one less dependency to worry about. If a task that depended on + CHILD_TASK now has no dependencies, place it in the various queues + so it gets scheduled to run. TEAM is the team to which CHILD_TASK belongs to. */ @@ -822,99 +833,60 @@ gomp_task_run_post_handle_dependers (struct gomp_task *child_task, for (i = 0; i < count; i++) { struct gomp_task *task = child_task->dependers->elem[i]; + + /* CHILD_TASK satisfies a dependency for TASK. Keep track of + TASK's remaining dependencies. Once TASK has no other + depenencies, put it into the various queues so it will get + scheduled for execution. */ if (--task->num_dependees != 0) continue; struct gomp_taskgroup *taskgroup = task->taskgroup; if (parent) { - if (parent->children) - { - /* If parent is in gomp_task_maybe_wait_for_dependencies - and it doesn't need to wait for this task, put it after - all ready to run tasks it needs to wait for. */ - if (parent->taskwait && parent->taskwait->last_parent_depends_on - && !task->parent_depends_on) - { - /* Put depender in last_parent_depends_on. */ - struct gomp_task *last_parent_depends_on - = parent->taskwait->last_parent_depends_on; - task->next_child = last_parent_depends_on->next_child; - task->prev_child = last_parent_depends_on; - } - else - { - /* Make depender a sibling of child_task, and place - it at the top of said sibling list. */ - task->next_child = parent->children; - task->prev_child = parent->children->prev_child; - parent->children = task; - } - task->next_child->prev_child = task; - task->prev_child->next_child = task; - } - else - { - /* Make depender a sibling of child_task. */ - task->next_child = task; - task->prev_child = task; - parent->children = task; - } + priority_queue_insert (PQ_CHILDREN, &parent->children_queue, + task, task->priority, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/true, + task->parent_depends_on); if (parent->taskwait) { if (parent->taskwait->in_taskwait) { + /* One more task has had its dependencies met. + Inform any waiters. */ parent->taskwait->in_taskwait = false; gomp_sem_post (&parent->taskwait->taskwait_sem); } else if (parent->taskwait->in_depend_wait) { + /* One more task has had its dependencies met. + Inform any waiters. */ parent->taskwait->in_depend_wait = false; gomp_sem_post (&parent->taskwait->taskwait_sem); } - if (parent->taskwait->last_parent_depends_on == NULL - && task->parent_depends_on) - parent->taskwait->last_parent_depends_on = task; } } - /* If depender is in a taskgroup, put it at the TOP of its - taskgroup. */ if (taskgroup) { - if (taskgroup->children) - { - task->next_taskgroup = taskgroup->children; - task->prev_taskgroup = taskgroup->children->prev_taskgroup; - task->next_taskgroup->prev_taskgroup = task; - task->prev_taskgroup->next_taskgroup = task; - } - else - { - task->next_taskgroup = task; - task->prev_taskgroup = task; - } - taskgroup->children = task; + priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, + task, task->priority, + PRIORITY_INSERT_BEGIN, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); if (taskgroup->in_taskgroup_wait) { + /* One more task has had its dependencies met. + Inform any waiters. */ taskgroup->in_taskgroup_wait = false; gomp_sem_post (&taskgroup->taskgroup_sem); } } - /* Put depender of child_task at the END of the team's - task_queue. */ - if (team->task_queue) - { - task->next_queue = team->task_queue; - task->prev_queue = team->task_queue->prev_queue; - task->next_queue->prev_queue = task; - task->prev_queue->next_queue = task; - } - else - { - task->next_queue = task; - task->prev_queue = task; - team->task_queue = task; - } + priority_queue_insert (PQ_TEAM, &team->task_queue, + task, task->priority, + PRIORITY_INSERT_END, + /*adjust_parent_depends_on=*/false, + task->parent_depends_on); ++team->task_count; ++team->task_queued_count; ++ret; @@ -964,27 +936,15 @@ gomp_task_run_post_remove_parent (struct gomp_task *child_task) gomp_sem_post (&parent->taskwait->taskwait_sem); } - /* Remove CHILD_TASK from its sibling list. */ - child_task->prev_child->next_child = child_task->next_child; - child_task->next_child->prev_child = child_task->prev_child; - if (parent->children != child_task) - return; - if (child_task->next_child != child_task) - parent->children = child_task->next_child; - else + if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, + child_task, MEMMODEL_RELEASE) + && parent->taskwait && parent->taskwait->in_taskwait) { - /* We access task->children in GOMP_taskwait - outside of the task lock mutex region, so - need a release barrier here to ensure memory - written by child_task->fn above is flushed - before the NULL is written. */ - __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); - if (parent->taskwait && parent->taskwait->in_taskwait) - { - parent->taskwait->in_taskwait = false; - gomp_sem_post (&parent->taskwait->taskwait_sem); - } + parent->taskwait->in_taskwait = false; + gomp_sem_post (&parent->taskwait->taskwait_sem); } + child_task->pnode[PQ_CHILDREN].next = NULL; + child_task->pnode[PQ_CHILDREN].prev = NULL; } /* Remove CHILD_TASK from its taskgroup. */ @@ -995,8 +955,11 @@ gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) struct gomp_taskgroup *taskgroup = child_task->taskgroup; if (taskgroup == NULL) return; - child_task->prev_taskgroup->next_taskgroup = child_task->next_taskgroup; - child_task->next_taskgroup->prev_taskgroup = child_task->prev_taskgroup; + bool empty = priority_queue_remove (PQ_TASKGROUP, + &taskgroup->taskgroup_queue, + child_task, MEMMODEL_RELAXED); + child_task->pnode[PQ_TASKGROUP].next = NULL; + child_task->pnode[PQ_TASKGROUP].prev = NULL; if (taskgroup->num_children > 1) --taskgroup->num_children; else @@ -1008,18 +971,10 @@ gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) before the NULL is written. */ __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); } - if (taskgroup->children != child_task) - return; - if (child_task->next_taskgroup != child_task) - taskgroup->children = child_task->next_taskgroup; - else + if (empty && taskgroup->in_taskgroup_wait) { - taskgroup->children = NULL; - if (taskgroup->in_taskgroup_wait) - { - taskgroup->in_taskgroup_wait = false; - gomp_sem_post (&taskgroup->taskgroup_sem); - } + taskgroup->in_taskgroup_wait = false; + gomp_sem_post (&taskgroup->taskgroup_sem); } } @@ -1049,9 +1004,13 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) while (1) { bool cancelled = false; - if (team->task_queue != NULL) + if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) { - child_task = team->task_queue; + bool ignored; + child_task + = priority_queue_next_task (PQ_TEAM, &team->task_queue, + PQ_IGNORED, NULL, + &ignored); cancelled = gomp_task_run_pre (child_task, child_task->parent, team); if (__builtin_expect (cancelled, 0)) @@ -1094,7 +1053,7 @@ gomp_barrier_handle_tasks (gomp_barrier_state_t state) size_t new_tasks = gomp_task_run_post_handle_depend (child_task, team); gomp_task_run_post_remove_parent (child_task); - gomp_clear_parent (child_task->children); + gomp_clear_parent (&child_task->children_queue); gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; child_task = NULL; @@ -1140,15 +1099,16 @@ GOMP_taskwait (void) child thread task work function are seen before we exit from GOMP_taskwait. */ if (task == NULL - || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) + || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) return; memset (&taskwait, 0, sizeof (taskwait)); + bool child_q = false; gomp_mutex_lock (&team->task_lock); while (1) { bool cancelled = false; - if (task->children == NULL) + if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) { bool destroy_taskwait = task->taskwait != NULL; task->taskwait = NULL; @@ -1162,9 +1122,12 @@ GOMP_taskwait (void) gomp_sem_destroy (&taskwait.taskwait_sem); return; } - if (task->children->kind == GOMP_TASK_WAITING) + struct gomp_task *next_task + = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, + PQ_TEAM, &team->task_queue, &child_q); + if (next_task->kind == GOMP_TASK_WAITING) { - child_task = task->children; + child_task = next_task; cancelled = gomp_task_run_pre (child_task, task, team); if (__builtin_expect (cancelled, 0)) @@ -1180,8 +1143,10 @@ GOMP_taskwait (void) } else { - /* All tasks we are waiting for are already running - in other threads. Wait for them. */ + /* All tasks we are waiting for are either running in other + threads, or they are tasks that have not had their + dependencies met (so they're not even in the queue). Wait + for them. */ if (task->taskwait == NULL) { taskwait.in_depend_wait = false; @@ -1217,21 +1182,16 @@ GOMP_taskwait (void) size_t new_tasks = gomp_task_run_post_handle_depend (child_task, team); - /* Remove child_task from children list, and set up the next - sibling to be run. */ - child_task->prev_child->next_child = child_task->next_child; - child_task->next_child->prev_child = child_task->prev_child; - if (task->children == child_task) + if (child_q) { - if (child_task->next_child != child_task) - task->children = child_task->next_child; - else - task->children = NULL; + priority_queue_remove (PQ_CHILDREN, &task->children_queue, + child_task, MEMMODEL_RELAXED); + child_task->pnode[PQ_CHILDREN].next = NULL; + child_task->pnode[PQ_CHILDREN].prev = NULL; } - /* Orphan all the children of CHILD_TASK. */ - gomp_clear_parent (child_task->children); - /* Remove CHILD_TASK from its taskgroup. */ + gomp_clear_parent (&child_task->children_queue); + gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; @@ -1248,8 +1208,16 @@ GOMP_taskwait (void) } } -/* This is like GOMP_taskwait, but we only wait for tasks that the - upcoming task depends on. +/* An undeferred task is about to run. Wait for all tasks that this + undeferred task depends on. + + This is done by first putting all known ready dependencies + (dependencies that have their own dependencies met) at the top of + the scheduling queues. Then we iterate through these imminently + ready tasks (and possibly other high priority tasks), and run them. + If we run out of ready dependencies to execute, we either wait for + the reamining dependencies to finish, or wait for them to get + scheduled so we can run them. DEPEND is as in GOMP_task. */ @@ -1261,7 +1229,6 @@ gomp_task_maybe_wait_for_dependencies (void **depend) struct gomp_team *team = thr->ts.team; struct gomp_task_depend_entry elem, *ent = NULL; struct gomp_taskwait taskwait; - struct gomp_task *last_parent_depends_on = NULL; size_t ndepend = (uintptr_t) depend[0]; size_t nout = (uintptr_t) depend[1]; size_t i; @@ -1285,54 +1252,11 @@ gomp_task_maybe_wait_for_dependencies (void **depend) { tsk->parent_depends_on = true; ++num_awaited; - /* If a task we need to wait for is not already - running and is ready to be scheduled, move it to - front, so that we run it as soon as possible. - - We rearrange the children queue such that all - parent_depends_on tasks are first, and - last_parent_depends_on points to the last such task - we rearranged. For example, given the following - children where PD[123] are the parent_depends_on - tasks: - - task->children - | - V - C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 - - We rearrange such that: - - task->children - | +--- last_parent_depends_on - | | - V V - PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4 - */ - + /* If depenency TSK itself has no dependencies and is + ready to run, move it up front so that we run it as + soon as possible. */ if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) - { - if (last_parent_depends_on) - { - tsk->prev_child->next_child = tsk->next_child; - tsk->next_child->prev_child = tsk->prev_child; - tsk->prev_child = last_parent_depends_on; - tsk->next_child = last_parent_depends_on->next_child; - tsk->prev_child->next_child = tsk; - tsk->next_child->prev_child = tsk; - } - else if (tsk != task->children) - { - tsk->prev_child->next_child = tsk->next_child; - tsk->next_child->prev_child = tsk->prev_child; - tsk->prev_child = task->children->prev_child; - tsk->next_child = task->children; - task->children = tsk; - tsk->prev_child->next_child = tsk; - tsk->next_child->prev_child = tsk; - } - last_parent_depends_on = tsk; - } + priority_queue_upgrade_task (tsk, task); } } } @@ -1344,7 +1268,6 @@ gomp_task_maybe_wait_for_dependencies (void **depend) memset (&taskwait, 0, sizeof (taskwait)); taskwait.n_depend = num_awaited; - taskwait.last_parent_depends_on = last_parent_depends_on; gomp_sem_init (&taskwait.taskwait_sem, 0); task->taskwait = &taskwait; @@ -1363,9 +1286,28 @@ gomp_task_maybe_wait_for_dependencies (void **depend) gomp_sem_destroy (&taskwait.taskwait_sem); return; } - if (task->children->kind == GOMP_TASK_WAITING) + + /* Theoretically when we have multiple priorities, we should + chose between the highest priority item in + task->children_queue and team->task_queue here, so we should + use priority_queue_next_task(). However, since we are + running an undeferred task, perhaps that makes all tasks it + depends on undeferred, thus a priority of INF? This would + make it unnecessary to take anything into account here, + but the dependencies. + + On the other hand, if we want to use priority_queue_next_task(), + care should be taken to only use priority_queue_remove() + below if the task was actually removed from the children + queue. */ + bool ignored; + struct gomp_task *next_task + = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, + PQ_IGNORED, NULL, &ignored); + + if (next_task->kind == GOMP_TASK_WAITING) { - child_task = task->children; + child_task = next_task; cancelled = gomp_task_run_pre (child_task, task, team); if (__builtin_expect (cancelled, 0)) @@ -1380,8 +1322,10 @@ gomp_task_maybe_wait_for_dependencies (void **depend) } } else - /* All tasks we are waiting for are already running - in other threads. Wait for them. */ + /* All tasks we are waiting for are either running in other + threads, or they are tasks that have not had their + dependencies met (so they're not even in the queue). Wait + for them. */ taskwait.in_depend_wait = true; gomp_mutex_unlock (&team->task_lock); if (do_wake) @@ -1412,18 +1356,12 @@ gomp_task_maybe_wait_for_dependencies (void **depend) if (child_task->parent_depends_on) --taskwait.n_depend; - /* Remove child_task from sibling list. */ - child_task->prev_child->next_child = child_task->next_child; - child_task->next_child->prev_child = child_task->prev_child; - if (task->children == child_task) - { - if (child_task->next_child != child_task) - task->children = child_task->next_child; - else - task->children = NULL; - } + priority_queue_remove (PQ_CHILDREN, &task->children_queue, + child_task, MEMMODEL_RELAXED); + child_task->pnode[PQ_CHILDREN].next = NULL; + child_task->pnode[PQ_CHILDREN].prev = NULL; - gomp_clear_parent (child_task->children); + gomp_clear_parent (&child_task->children_queue); gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; child_task = NULL; @@ -1463,7 +1401,7 @@ GOMP_taskgroup_start (void) return; taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup)); taskgroup->prev = task->taskgroup; - taskgroup->children = NULL; + priority_queue_init (&taskgroup->taskgroup_queue); taskgroup->in_taskgroup_wait = false; taskgroup->cancelled = false; taskgroup->num_children = 0; @@ -1495,17 +1433,23 @@ GOMP_taskgroup_end (void) if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) goto finish; + bool unused; gomp_mutex_lock (&team->task_lock); while (1) { bool cancelled = false; - if (taskgroup->children == NULL) + if (priority_queue_empty_p (&taskgroup->taskgroup_queue, + MEMMODEL_RELAXED)) { if (taskgroup->num_children) { - if (task->children == NULL) + if (priority_queue_empty_p (&task->children_queue, + MEMMODEL_RELAXED)) goto do_wait; - child_task = task->children; + child_task + = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, + PQ_TEAM, &team->task_queue, + &unused); } else { @@ -1519,7 +1463,9 @@ GOMP_taskgroup_end (void) } } else - child_task = taskgroup->children; + child_task + = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, + PQ_TEAM, &team->task_queue, &unused); if (child_task->kind == GOMP_TASK_WAITING) { cancelled @@ -1539,8 +1485,10 @@ GOMP_taskgroup_end (void) { child_task = NULL; do_wait: - /* All tasks we are waiting for are already running - in other threads. Wait for them. */ + /* All tasks we are waiting for are either running in other + threads, or they are tasks that have not had their + dependencies met (so they're not even in the queue). Wait + for them. */ taskgroup->in_taskgroup_wait = true; } gomp_mutex_unlock (&team->task_lock); @@ -1570,7 +1518,7 @@ GOMP_taskgroup_end (void) size_t new_tasks = gomp_task_run_post_handle_depend (child_task, team); gomp_task_run_post_remove_parent (child_task); - gomp_clear_parent (child_task->children); + gomp_clear_parent (&child_task->children_queue); gomp_task_run_post_remove_taskgroup (child_task); to_free = child_task; child_task = NULL; diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index f57a5a1..bcee326 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -155,8 +155,8 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), else ialias_call (GOMP_taskgroup_start) (); - /* FIXME, use priority. */ - (void) priority; + if (priority > gomp_max_task_priority_var) + priority = gomp_max_task_priority_var; if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL || (thr->task && thr->task->final_task) @@ -175,6 +175,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), for (i = 0; i < num_tasks; i++) { gomp_init_task (&task[i], parent, gomp_icv (false)); + task[i].priority = priority; task[i].kind = GOMP_TASK_UNDEFERRED; task[i].final_task = (thr->task && thr->task->final_task) || (flags & GOMP_TASK_FLAG_FINAL); @@ -198,10 +199,11 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step -= step; fn (arg); arg += arg_size; - if (task[i].children != NULL) + if (!priority_queue_empty_p (&task[i].children_queue, + MEMMODEL_RELAXED)) { gomp_mutex_lock (&team->task_lock); - gomp_clear_parent (task[i].children); + gomp_clear_parent (&task[i].children_queue); gomp_mutex_unlock (&team->task_lock); } gomp_end_task (); @@ -213,6 +215,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), struct gomp_task task; gomp_init_task (&task, thr->task, gomp_icv (false)); + task.priority = priority; task.kind = GOMP_TASK_UNDEFERRED; task.final_task = (thr->task && thr->task->final_task) || (flags & GOMP_TASK_FLAG_FINAL); @@ -228,10 +231,11 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (i == nfirst) task_step -= step; fn (data); - if (task.children != NULL) + if (!priority_queue_empty_p (&task.children_queue, + MEMMODEL_RELAXED)) { gomp_mutex_lock (&team->task_lock); - gomp_clear_parent (task.children); + gomp_clear_parent (&task.children_queue); gomp_mutex_unlock (&team->task_lock); } gomp_end_task (); @@ -254,6 +258,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1) & ~(uintptr_t) (arg_align - 1)); gomp_init_task (task, parent, gomp_icv (false)); + task->priority = priority; task->kind = GOMP_TASK_UNDEFERRED; task->in_tied_task = parent->in_tied_task; task->taskgroup = taskgroup; @@ -298,48 +303,20 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), for (i = 0; i < num_tasks; i++) { struct gomp_task *task = tasks[i]; - if (parent->children) - { - task->next_child = parent->children; - task->prev_child = parent->children->prev_child; - task->next_child->prev_child = task; - task->prev_child->next_child = task; - } - else - { - task->next_child = task; - task->prev_child = task; - } - parent->children = task; + priority_queue_insert (PQ_CHILDREN, &parent->children_queue, + task, priority, + PRIORITY_INSERT_BEGIN, + /*last_parent_depends_on=*/false, + task->parent_depends_on); if (taskgroup) - { - if (taskgroup->children) - { - task->next_taskgroup = taskgroup->children; - task->prev_taskgroup = taskgroup->children->prev_taskgroup; - task->next_taskgroup->prev_taskgroup = task; - task->prev_taskgroup->next_taskgroup = task; - } - else - { - task->next_taskgroup = task; - task->prev_taskgroup = task; - } - taskgroup->children = task; - } - if (team->task_queue) - { - task->next_queue = team->task_queue; - task->prev_queue = team->task_queue->prev_queue; - task->next_queue->prev_queue = task; - task->prev_queue->next_queue = task; - } - else - { - task->next_queue = task; - task->prev_queue = task; - team->task_queue = task; - } + priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, + task, priority, PRIORITY_INSERT_BEGIN, + /*last_parent_depends_on=*/false, + task->parent_depends_on); + priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority, + PRIORITY_INSERT_END, + /*last_parent_depends_on=*/false, + task->parent_depends_on); ++team->task_count; ++team->task_queued_count; } diff --git a/libgomp/team.c b/libgomp/team.c index 67e25b3..4eadca0 100644 --- a/libgomp/team.c +++ b/libgomp/team.c @@ -193,7 +193,7 @@ gomp_new_team (unsigned nthreads) team->ordered_release = (void *) &team->implicit_task[nthreads]; team->ordered_release[0] = &team->master_release; - team->task_queue = NULL; + priority_queue_init (&team->task_queue); team->task_count = 0; team->task_queued_count = 0; team->task_running_count = 0; @@ -214,6 +214,7 @@ free_team (struct gomp_team *team) #endif gomp_barrier_destroy (&team->barrier); gomp_mutex_destroy (&team->task_lock); + priority_queue_free (&team->task_queue); free (team); } diff --git a/libgomp/testsuite/libgomp.c/priority.c b/libgomp/testsuite/libgomp.c/priority.c new file mode 100644 index 0000000..012f09d --- /dev/null +++ b/libgomp/testsuite/libgomp.c/priority.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-set-target-env-var OMP_MAX_TASK_PRIORITY "10" } */ + +/* This test verifies that the "priority" clause of omp task works as + advertised. + + Testing the OpenMP task scheduler is a bit tricky, especially when + trying to determine what ran first (without explicitly calling + time() and/or synchronizing between threads). What we do here is + run in single threaded mode which guarantees that we won't run into + data races while accessing the "prio" array. + + We give each task a priority from 0..63, while setting + OMP_MAX_TASK_PRIORITY to 10, which basically gives us 10 lower + priority tasks, and the rest scheduled to run earlier. We verify + that the priority < 10 tasks run last. */ + +#include +#include + +#define N 64 + +int main() +{ + int tsknum=0, prio[N]; + int max_priority = omp_get_max_task_priority (); + int saved_tsknum = -1; + int i; + +#pragma omp parallel num_threads(1) +#pragma omp single private (i) + { + for (i = 0; i < N; i++) + #pragma omp task priority(i ^ 1) + { + int t; + #pragma omp atomic capture seq_cst + t = tsknum++; + prio[t] = i ^ 1; + } + #pragma omp atomic read seq_cst + saved_tsknum = tsknum; + } + + /* If any of the tasks have run before all tasks were created, don't + make any assumption on the task order. Otherwise, we should have + tasks with >= max_priority scheduled first in arbitrary order, + followed by the rest of tasks in decreasing priority order, as + there is only one thread that can schedule them. */ + if (saved_tsknum == 0) + { + for (i = 0; i < N; i++) + if (i < N - max_priority) + { + if (prio[i] < max_priority) + abort (); + } + else if (i != N - prio[i] - 1) + abort (); + } + return 0; +}