public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6958] [libgomp, testsuite] Fix insufficient resources in test-cases
@ 2022-02-01  7:19 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2022-02-01  7:19 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d43fbc7d3f7621e1c8f153c1471d2a5cd20bfdc8

commit r12-6958-gd43fbc7d3f7621e1c8f153c1471d2a5cd20bfdc8
Author: Tom de Vries <tdevries@suse.de>
Date:   Sun Jan 23 06:29:58 2022 +0100

    [libgomp, testsuite] Fix insufficient resources in test-cases
    
    When running libgomp test-case broadcast-many.c on an nvptx accelerator
    (T400, driver version 470.86), I run into:
    ...
    libgomp: The Nvidia accelerator has insufficient resources to launch \
      'main$_omp_fn$0' with num_workers = 32 and vector_length = 32; \
      recompile the program with 'num_workers = x and vector_length = y' on \
      that offloaded region or '-fopenacc-dim=:x:y' where x * y <= 896.
    
    FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/broadcast-many.c \
      -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  \
      -O0  execution test
    ...
    
    The error does not occur when using GOMP_NVPTX_JIT=-O0.
    
    Fix this by using 896 / 32 == 28 workers for ACC_DEVICE_TYPE_nvidia.
    
    Likewise for some other test-cases.
    
    Tested libgomp on x86_64 with nvptx accelerator.
    
    libgomp/ChangeLog:
    
    2022-01-27  Tom de Vries  <tdevries@suse.de>
    
            * testsuite/libgomp.oacc-c-c++-common/broadcast-many.c: Reduce
            num_workers for nvidia accelerator to fix libgomp error 'insufficient
            resources'.
            * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c:
            Same.
            * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Same.

Diff:
---
 libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c   |  9 ++++++++-
 .../libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c      | 10 +++++++++-
 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c      |  9 ++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
index 37839edfb09..08e026960e6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/broadcast-many.c
@@ -5,6 +5,13 @@
 #include <assert.h>
 #include <stdio.h>
 
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
+#define NUM_WORKERS 28
+#else
+#define NUM_WORKERS 32
+#endif
+
 #define LOCAL(n) double n = input;
 #define LOCALS(n) LOCAL(n##1) LOCAL(n##2) LOCAL(n##3) LOCAL(n##4) \
 		  LOCAL(n##5) LOCAL(n##6) LOCAL(n##7) LOCAL(n##8)
@@ -23,7 +30,7 @@ int main (void)
   int ret;
   int input = 1;
 
-  #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+  #pragma acc parallel num_gangs(1) num_workers(NUM_WORKERS) copyout(ret)
   {
     int w = 0;
     LOCALS2(h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
index c3cc12fa953..4c66dc7bfea 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
@@ -1,5 +1,12 @@
 #include <assert.h>
 
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
+#define NUM_WORKERS 24
+#else
+#define NUM_WORKERS 32
+#endif
+
 /* Test of reduction on both parallel and loop directives (workers and vectors
    together in gang-partitioned mode, float type, multiple reductions).  */
 
@@ -13,7 +20,8 @@ main (int argc, char *argv[])
   for (i = 0; i < 32768; i++)
     arr[i] = i % (32768 / 64);
 
-  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+  #pragma acc parallel \
+    num_gangs(32) num_workers(NUM_WORKERS) vector_length(32) \
     reduction(+:res) reduction(max:mres) copy(res, mres)
   {
     #pragma acc loop gang /* { dg-warning "nested loop in reduction needs reduction clause for 'm\?res'" "TODO" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
index c2fb922a7f1..b4fe2300581 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
@@ -181,6 +181,12 @@ void gwv_np_3()
   assert (res == hres);
 }
 
+#if ACC_DEVICE_TYPE_nvidia
+/* To avoid 'libgomp: The Nvidia accelerator has insufficient resources'.  */
+#define NUM_WORKERS 28
+#else
+#define NUM_WORKERS 32
+#endif
 
 /* Test of reduction on loop directive (gangs, workers and vectors, multiple
    non-private reduction variables, float type).  */
@@ -194,7 +200,7 @@ void gwv_np_4()
   for (i = 0; i < 32768; i++)
     arr[i] = i % (32768 / 64);
 
-  #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
+  #pragma acc parallel num_gangs(32) num_workers(NUM_WORKERS) vector_length(32)
   {
     #pragma acc loop gang reduction(+:res) reduction(max:mres)
     for (j = 0; j < 32; j++)
@@ -235,6 +241,7 @@ void gwv_np_4()
   assert (mres == hmres);
 }
 
+#undef NUM_WORKERS
 
 /* Test of reduction on loop directive (vectors, private reduction
    variable).  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-02-01  7:19 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-01  7:19 [gcc r12-6958] [libgomp, testsuite] Fix insufficient resources in test-cases Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).