public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: Tom de Vries <tdevries@suse.de>, Alexander Monakov <amonakov@ispras.ru>
Cc: gcc-patches@gcc.gnu.org, Tobias Burnus <tobias@codesourcery.com>
Subject: [PATCH] libgomp, nvptx, v3: Honor OpenMP 5.1 num_teams lower bound
Date: Fri, 12 Nov 2021 18:58:04 +0100	[thread overview]
Message-ID: <20211112175804.GJ2710@tucnak> (raw)
In-Reply-To: <20211112132716.GD2710@tucnak>

On Fri, Nov 12, 2021 at 02:27:16PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Fri, Nov 12, 2021 at 02:20:23PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > This patch assumes that .shared variables are initialized to 0,
> > https://docs.nvidia.com/cuda/parallel-thread-execution/index.html lists
> > in Table 7. .shared as non-initializable.  If that isn't the case,
> > we need to initialize it somewhere for the case of #pragma omp target
> > without #pragma omp teams in it, maybe in libgcc/config/nvptx/crt0.c ?
> 
> A quick look at libgcc/config/nvptx/crt0.c shows the target supports
> __attribute__((shared)), so perhaps either following instead, or, if
> .shared isn't preinitialized to zero, defining the variable in
> libgcc/config/nvptx/crt0.c , adding there __gomp_team_num = 0;
> and adding extern keyword before int __gomp_team_num __attribute__((shared));
> in libgomp/config/nvptx/target.c.

And finally here is a third version, which fixes a typo in the previous
patch (in instead of int) and actually initializes the shared var because
PTX documentation doesn't say anything about how the shared vars are
initialized.

Tested on x86_64-linux with nvptx-none offloading, ok for trunk?

2021-11-12  Jakub Jelinek  <jakub@redhat.com>

	* config/nvptx/team.c (__gomp_team_num): Define as
	__attribute__((shared)) var.
	(gomp_nvptx_main): Initialize __gomp_team_num to 0.
	* config/nvptx/target.c (__gomp_team_num): Declare as
	extern __attribute__((shared)) var.
	(GOMP_teams4): Use __gomp_team_num as the team number instead of
	%ctaid.x.  If first, initialize it to %ctaid.x.  If num_teams_lower
	is bigger than num_blocks, use num_teams_lower teams and arrange for
	bumping of __gomp_team_num if !first and returning false once we run
	out of teams.
	* config/nvptx/teams.c (__gomp_team_num): Declare as
	extern __attribute__((shared)) var.
	(omp_get_team_num): Return __gomp_team_num value instead of %ctaid.x.

--- libgomp/config/nvptx/team.c.jj	2021-05-25 13:43:02.793121350 +0200
+++ libgomp/config/nvptx/team.c	2021-11-12 17:49:02.847341650 +0100
@@ -32,6 +32,7 @@
 #include <string.h>
 
 struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
+int __gomp_team_num __attribute__((shared));
 
 static void gomp_thread_start (struct gomp_thread_pool *);
 
@@ -57,6 +58,7 @@ gomp_nvptx_main (void (*fn) (void *), vo
       /* Starting additional threads is not supported.  */
       gomp_global_icv.dyn_var = true;
 
+      __gomp_team_num = 0;
       nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
       memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
 
--- libgomp/config/nvptx/target.c.jj	2021-11-12 15:57:29.400632875 +0100
+++ libgomp/config/nvptx/target.c	2021-11-12 17:47:39.499533296 +0100
@@ -26,28 +26,41 @@
 #include "libgomp.h"
 #include <limits.h>
 
+extern int __gomp_team_num __attribute__((shared));
+
 bool
 GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
 	     unsigned int thread_limit, bool first)
 {
+  unsigned int num_blocks, block_id;
+  asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
   if (!first)
-    return false;
+    {
+      unsigned int team_num;
+      if (num_blocks > gomp_num_teams_var)
+	return false;
+      team_num = __gomp_team_num;
+      if (team_num > gomp_num_teams_var - num_blocks)
+	return false;
+      __gomp_team_num = team_num + num_blocks;
+      return true;
+    }
   if (thread_limit)
     {
       struct gomp_task_icv *icv = gomp_icv (true);
       icv->thread_limit_var
 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
     }
-  unsigned int num_blocks, block_id;
-  asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks));
-  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
-  /* FIXME: If num_teams_lower > num_blocks, we want to loop multiple
-     times for some CTAs.  */
-  (void) num_teams_lower;
-  if (!num_teams_upper || num_teams_upper >= num_blocks)
+  if (!num_teams_upper)
     num_teams_upper = num_blocks;
-  else if (block_id >= num_teams_upper)
+  else if (num_blocks < num_teams_lower)
+    num_teams_upper = num_teams_lower;
+  else if (num_blocks < num_teams_upper)
+    num_teams_upper = num_blocks;
+  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
+  if (block_id >= num_teams_upper)
     return false;
+  __gomp_team_num = block_id;
   gomp_num_teams_var = num_teams_upper - 1;
   return true;
 }
--- libgomp/config/nvptx/teams.c.jj	2021-05-25 13:43:02.793121350 +0200
+++ libgomp/config/nvptx/teams.c	2021-11-12 17:37:18.933361024 +0100
@@ -28,6 +28,8 @@
 
 #include "libgomp.h"
 
+extern int __gomp_team_num __attribute__((shared));
+
 void
 GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams,
 		unsigned int thread_limit, unsigned int flags)
@@ -48,9 +50,7 @@ omp_get_num_teams (void)
 int
 omp_get_team_num (void)
 {
-  int ctaid;
-  asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid));
-  return ctaid;
+  return __gomp_team_num;
 }
 
 ialias (omp_get_num_teams)


	Jakub


  reply	other threads:[~2021-11-12 17:58 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-12 13:20 [PATCH] libgomp, nvptx: " Jakub Jelinek
2021-11-12 13:27 ` [PATCH] libgomp, nvptx, v2: " Jakub Jelinek
2021-11-12 17:58   ` Jakub Jelinek [this message]
2021-11-12 19:16     ` [PATCH] libgomp, nvptx, v3: " Alexander Monakov
2021-11-12 19:47       ` Jakub Jelinek
2021-11-12 19:49         ` Jakub Jelinek
2021-11-12 21:21           ` Alexander Monakov
2021-11-12 21:08     ` Alexander Monakov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211112175804.GJ2710@tucnak \
    --to=jakub@redhat.com \
    --cc=amonakov@ispras.ru \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=tdevries@suse.de \
    --cc=tobias@codesourcery.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).