public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5191] openmp: Honor OpenMP 5.1 num_teams lower bound
@ 2021-11-12 11:42 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-11-12 11:42 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7d6da11fce054b25b50d0dec7f8d49cf22852680

commit r12-5191-g7d6da11fce054b25b50d0dec7f8d49cf22852680
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Fri Nov 12 12:41:22 2021 +0100

    openmp: Honor OpenMP 5.1 num_teams lower bound
    
    The following patch implements what I've been talking about earlier,
    honor that for explicit num_teams clause we create at least the
    lower-bound (if not specified, upper-bound) teams in the league.
    For host fallback, it still means we only have one thread doing all the
    teams, sequentially one after another.
    For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too
    will or might fail.
    For these offloads, I think it is ok to remove symbols no longer used
    from libgomp.a.
    If num_teams_lower is bigger than the provided num_blocks or num_workgroups,
    we should arrange for gomp_num_teams_var to be num_teams_lower - 1,
    stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num ()
    and instead use for it some .shared var that GOMP_teams4 initializes to
    %ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first
    increment that by num_blocks or num_workgroups each time and only
    return false when we are above num_teams_lower.
    Any help with actually implementing this for the 2 architectures highly
    appreciated.
    
    2021-11-12  Jakub Jelinek  <jakub@redhat.com>
    
    gcc/
            * omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove.
            (BUILT_IN_GOMP_TEAMS4): New.
            * builtin-types.def (BT_FN_VOID_UINT_UINT): Remove.
            (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
            * omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of
            GOMP_teams, pass to it also num_teams lower-bound expression
            or a dup of upper-bound if it is missing and a flag whether
            it is the first call or not.
    gcc/fortran/
            * types.def (BT_FN_VOID_UINT_UINT): Remove.
            (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New.
    libgomp/
            * libgomp_g.h (GOMP_teams4): Declare.
            * libgomp.map (GOMP_5.1): Export GOMP_teams4.
            * target.c (GOMP_teams4): New function.
            * config/nvptx/target.c (GOMP_teams): Remove.
            (GOMP_teams4): New function.
            * config/gcn/target.c (GOMP_teams): Remove.
            (GOMP_teams4): New function.
            * testsuite/libgomp.c/teams-4.c (main): Expect exactly 2
            teams instead of <= 2.
            * testsuite/libgomp.c-c++-common/teams-2.c: New test.

Diff:
---
 gcc/builtin-types.def                            |  3 +-
 gcc/fortran/types.def                            |  3 +-
 gcc/omp-builtins.def                             |  4 +-
 gcc/omp-low.c                                    | 42 ++++++++++++--
 libgomp/config/gcn/target.c                      | 24 ++++----
 libgomp/config/nvptx/target.c                    | 24 ++++----
 libgomp/libgomp.map                              |  1 +
 libgomp/libgomp_g.h                              |  1 +
 libgomp/target.c                                 | 26 +++++++++
 libgomp/testsuite/libgomp.c-c++-common/teams-2.c | 70 ++++++++++++++++++++++++
 libgomp/testsuite/libgomp.c/teams-4.c            |  2 +-
 11 files changed, 169 insertions(+), 31 deletions(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index d160826e1d4..3f1c81bd7a8 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -489,7 +489,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
 		     BT_CONST_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
-DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE)
@@ -680,6 +679,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
 		     BT_PTR_ULONGLONG)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
 		     BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
+		     BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index 85b85ed0580..850fe97c6d3 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -117,7 +117,6 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
 		     BT_CONST_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
-DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTRMODE,
 		     BT_VOID, BT_PTR, BT_PTRMODE)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_CONST_PTR_SIZE, BT_VOID, BT_CONST_PTR, BT_SIZE)
@@ -173,6 +172,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR,
 		     BT_PTR_ULONGLONG)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR,
 		     BT_INT, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_UINT_UINT_BOOL,
+		     BT_BOOL, BT_UINT, BT_UINT, BT_UINT, BT_BOOL)
 
 DEF_FUNCTION_TYPE_5 (BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 4520dc01b93..295081318c8 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -442,8 +442,8 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_UPDATE, "GOMP_target_update_ext",
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA,
 		  "GOMP_target_enter_exit_data",
 		  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST)
-DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS, "GOMP_teams",
-		  BT_FN_VOID_UINT_UINT, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4",
+		  BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg",
 		  BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TASKGROUP_REDUCTION_REGISTER,
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d5841ea7313..5b6aa30eb9c 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13902,14 +13902,24 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
 				    OMP_CLAUSE_NUM_TEAMS);
+  tree num_teams_lower = NULL_TREE;
   if (num_teams == NULL_TREE)
     num_teams = build_int_cst (unsigned_type_node, 0);
   else
     {
+      num_teams_lower = OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (num_teams);
+      if (num_teams_lower)
+	{
+	  num_teams_lower = fold_convert (unsigned_type_node, num_teams_lower);
+	  gimplify_expr (&num_teams_lower, &bind_body, NULL, is_gimple_val,
+			 fb_rvalue);
+	}
       num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
       num_teams = fold_convert (unsigned_type_node, num_teams);
       gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
     }
+  if (num_teams_lower == NULL_TREE)
+    num_teams_lower = num_teams;
   tree thread_limit = omp_find_clause (gimple_omp_teams_clauses (teams_stmt),
 				       OMP_CLAUSE_THREAD_LIMIT);
   if (thread_limit == NULL_TREE)
@@ -13921,6 +13931,30 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
 		     fb_rvalue);
     }
+  location_t loc = gimple_location (teams_stmt);
+  tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS4);
+  tree rettype = TREE_TYPE (TREE_TYPE (decl));
+  tree first = create_tmp_var (rettype);
+  gimple_seq_add_stmt (&bind_body,
+		       gimple_build_assign (first, build_one_cst (rettype)));
+  tree llabel = create_artificial_label (loc);
+  gimple_seq_add_stmt (&bind_body, gimple_build_label (llabel));
+  gimple *call
+    = gimple_build_call (decl, 4, num_teams_lower, num_teams, thread_limit,
+			 first);
+  gimple_set_location (call, loc);
+  tree temp = create_tmp_var (rettype);
+  gimple_call_set_lhs (call, temp);
+  gimple_seq_add_stmt (&bind_body, call);
+
+  tree tlabel = create_artificial_label (loc);
+  tree flabel = create_artificial_label (loc);
+  gimple *cond = gimple_build_cond (NE_EXPR, temp, build_zero_cst (rettype),
+				    tlabel, flabel);
+  gimple_seq_add_stmt (&bind_body, cond);
+  gimple_seq_add_stmt (&bind_body, gimple_build_label (tlabel));
+  gimple_seq_add_stmt (&bind_body,
+		       gimple_build_assign (first, build_zero_cst (rettype)));
 
   lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
 			   &bind_body, &dlist, ctx, NULL);
@@ -13929,17 +13963,13 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 			   NULL, ctx);
   gimple_seq_add_stmt (&bind_body, teams_stmt);
 
-  location_t loc = gimple_location (teams_stmt);
-  tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
-  gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
-  gimple_set_location (call, loc);
-  gimple_seq_add_stmt (&bind_body, call);
-
   gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
   gimple_omp_set_body (teams_stmt, NULL);
   gimple_seq_add_seq (&bind_body, olist);
   gimple_seq_add_seq (&bind_body, dlist);
   gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
+  gimple_seq_add_stmt (&bind_body, gimple_build_goto (llabel));
+  gimple_seq_add_stmt (&bind_body, gimple_build_label (flabel));
   gimple_bind_set_body (bind, bind_body);
 
   pop_gimplify_context (bind);
diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c
index a93ecc90d44..ba830006453 100644
--- a/libgomp/config/gcn/target.c
+++ b/libgomp/config/gcn/target.c
@@ -26,9 +26,12 @@
 #include "libgomp.h"
 #include <limits.h>
 
-void
-GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+bool
+GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
+	     unsigned int thread_limit, bool first)
 {
+  if (!first)
+    return false;
   if (thread_limit)
     {
       struct gomp_task_icv *icv = gomp_icv (true);
@@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   unsigned int num_workgroups, workgroup_id;
   num_workgroups = __builtin_gcn_dim_size (0);
   workgroup_id = __builtin_gcn_dim_pos (0);
-  if (!num_teams || num_teams >= num_workgroups)
-    num_teams = num_workgroups;
-  else if (workgroup_id >= num_teams)
-    {
-      gomp_free_thread (gcn_thrs ());
-      exit (0);
-    }
-  gomp_num_teams_var = num_teams - 1;
+  /* FIXME: If num_teams_lower > num_workgroups, we want to loop
+     multiple times at least for some workgroups.  */
+  (void) num_teams_lower;
+  if (!num_teams_upper || num_teams_upper >= num_workgroups)
+    num_teams_upper = num_workgroups;
+  else if (workgroup_id >= num_teams_upper)
+    return false;
+  gomp_num_teams_var = num_teams_upper - 1;
+  return true;
 }
 
 int
diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c
index e4140e48296..3d815bb0cb6 100644
--- a/libgomp/config/nvptx/target.c
+++ b/libgomp/config/nvptx/target.c
@@ -26,9 +26,12 @@
 #include "libgomp.h"
 #include <limits.h>
 
-void
-GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
+bool
+GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
+	     unsigned int thread_limit, bool first)
 {
+  if (!first)
+    return false;
   if (thread_limit)
     {
       struct gomp_task_icv *icv = gomp_icv (true);
@@ -38,14 +41,15 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   unsigned int num_blocks, block_id;
   asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
   asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
-  if (!num_teams || num_teams >= num_blocks)
-    num_teams = num_blocks;
-  else if (block_id >= num_teams)
-    {
-      gomp_free_thread (nvptx_thrs);
-      asm ("exit;");
-    }
-  gomp_num_teams_var = num_teams - 1;
+  /* FIXME: If num_teams_lower > num_blocks, we want to loop multiple
+     times for some CTAs.  */
+  (void) num_teams_lower;
+  if (!num_teams_upper || num_teams_upper >= num_blocks)
+    num_teams_upper = num_blocks;
+  else if (block_id >= num_teams_upper)
+    return false;
+  gomp_num_teams_var = num_teams_upper - 1;
+  return true;
 }
 
 int
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 4e5397a39a5..2ac58094169 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -399,6 +399,7 @@ GOMP_5.1 {
 	GOMP_error;
 	GOMP_scope_start;
 	GOMP_warning;
+	GOMP_teams4;
 } GOMP_5.0.1;
 
 OACC_2.0 {
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index 40e5cf04907..ab50f6542dc 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -355,6 +355,7 @@ extern void GOMP_target_enter_exit_data (int, size_t, void **, size_t *,
 					 unsigned short *, unsigned int,
 					 void **);
 extern void GOMP_teams (unsigned int, unsigned int);
+extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
 
 /* teams.c */
 
diff --git a/libgomp/target.c b/libgomp/target.c
index 196dba4f08c..ecf09f91312 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3088,6 +3088,32 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
   (void) num_teams;
 }
 
+bool
+GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
+	     unsigned int thread_limit, bool first)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  if (first)
+    {
+      if (thread_limit)
+	{
+	  struct gomp_task_icv *icv = gomp_icv (true);
+	  icv->thread_limit_var
+	    = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+	}
+      (void) num_teams_high;
+      if (num_teams_low == 0)
+	num_teams_low = 1;
+      thr->num_teams = num_teams_low - 1;
+      thr->team_num = 0;
+    }
+  else if (thr->team_num == thr->num_teams)
+    return false;
+  else
+    ++thr->team_num;
+  return true;
+}
+
 void *
 omp_target_alloc (size_t size, int device_num)
 {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/teams-2.c b/libgomp/testsuite/libgomp.c-c++-common/teams-2.c
new file mode 100644
index 00000000000..316bcfe5848
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/teams-2.c
@@ -0,0 +1,70 @@
+#include <omp.h>
+#include <stdlib.h>
+
+int
+foo ()
+{
+  return 934;
+}
+
+int
+main ()
+{
+  int a[934] = {};
+  int k, e;
+  #pragma omp target map(a)
+  #pragma omp teams num_teams (foo ())
+  {
+    int i = omp_get_team_num ();
+    if (omp_get_num_teams () != 934
+	|| (unsigned) i >= 934U
+	|| a[i] != 0)
+      abort ();
+    ++a[i];
+  }
+  #pragma omp target map(a)
+  #pragma omp teams num_teams (foo () - 50 : foo ())
+  {
+    int i = omp_get_team_num ();
+    int j = omp_get_num_teams ();
+    if (j < 884
+	|| j > 934
+	|| (unsigned) i >= (unsigned) j
+	|| a[i] != 1)
+      abort ();
+    ++a[i];
+  }
+  #pragma omp target teams map(a) num_teams (foo () / 2)
+  {
+    int i = omp_get_team_num ();
+    if (omp_get_num_teams () != 467
+	|| (unsigned) i >= 467U
+	|| a[i] != 2)
+      abort ();
+    ++a[i];
+  }
+  #pragma omp target teams map(a) num_teams (foo () / 2 - 50 : foo () / 2)
+  {
+    int i = omp_get_team_num ();
+    int j = omp_get_num_teams ();
+    if (j < 417
+	|| j > 467
+	|| (unsigned) i >= (unsigned) j
+	|| a[i] != 3)
+      abort ();
+    ++a[i];
+  }
+  e = 4;
+  for (k = 0; k < 934; k++)
+    {
+      if (k >= 417 && k < 467 && a[k] == 3)
+	e = 3;
+      else if (k == 467)
+	e = 2;
+      else if (k >= 884 && a[k] == 1)
+	e = 1;
+      if (a[k] != e)
+	abort ();
+    }
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/teams-4.c b/libgomp/testsuite/libgomp.c/teams-4.c
index c1f5356c622..6e065c51948 100644
--- a/libgomp/testsuite/libgomp.c/teams-4.c
+++ b/libgomp/testsuite/libgomp.c/teams-4.c
@@ -20,7 +20,7 @@ main ()
     #pragma omp parallel if (0)
     #pragma omp target
     #pragma omp teams num_teams (2)
-    if (omp_get_num_teams () > 2
+    if (omp_get_num_teams () != 2
 	|| (unsigned) omp_get_team_num () >= 2U)
       abort ();
     if (omp_get_num_teams () != 4 || (unsigned) team >= 4U)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-12 11:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-12 11:42 [gcc r12-5191] openmp: Honor OpenMP 5.1 num_teams lower bound Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).