From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 21504 invoked by alias); 10 Nov 2015 00:48:18 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 21494 invoked by uid 89); 10 Nov 2015 00:48:17 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=BAYES_00,FREEMAIL_FROM,KAM_ASCII_DIVIDERS,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=no version=3.3.2 X-HELO: mail-qk0-f173.google.com Received: from mail-qk0-f173.google.com (HELO mail-qk0-f173.google.com) (209.85.220.173) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 10 Nov 2015 00:48:12 +0000 Received: by qkas77 with SMTP id s77so75908388qka.0 for ; Mon, 09 Nov 2015 16:48:09 -0800 (PST) X-Received: by 10.55.73.150 with SMTP id w144mr921663qka.80.1447116489903; Mon, 09 Nov 2015 16:48:09 -0800 (PST) Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48? ([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by smtp.googlemail.com with ESMTPSA id 86sm325941qky.24.2015.11.09.16.48.09 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 09 Nov 2015 16:48:09 -0800 (PST) Subject: Re: [1/2] OpenACC routine support To: Jakub Jelinek References: <5637B1CF.5060408@acm.org> <5637B7C7.70901@acm.org> <20151103153533.GQ478@tucnak.redhat.com> <56413AF1.8070101@acm.org> Cc: GCC Patches , Cesar Philippidis From: Nathan Sidwell Message-ID: <56413EC8.8040907@acm.org> Date: Tue, 10 Nov 2015 00:48:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0 MIME-Version: 1.0 In-Reply-To: <56413AF1.8070101@acm.org> Content-Type: multipart/mixed; boundary="------------060407010306040207090703" X-SW-Source: 2015-11/txt/msg01144.txt.bz2 This is a multi-part message in MIME format. --------------060407010306040207090703 Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Content-length: 167 And these are the new tests. Cesar, c-c++-common/goacc/routine-5.c will need adjusting with your C++ parser patch. You'll see the two cases I've #if'd out. nathan --------------060407010306040207090703 Content-Type: text/x-patch; name="trunk-routine-tests-1109.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="trunk-routine-tests-1109.patch" Content-length: 14874 2015-11-09 Nathan Sidwell gcc/testsuite/ * c-c++-common/goacc/routine-1.c: New. * c-c++-common/goacc/routine-2.c: New. * c-c++-common/goacc/routine-3.c: New. * c-c++-common/goacc/routine-4.c: New. * c-c++-common/goacc/routine-5.c: New. libgomp/ * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: New. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: New. Index: gcc/testsuite/c-c++-common/goacc/routine-1.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-1.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-1.c (working copy) @@ -0,0 +1,34 @@ + +#pragma acc routine gang +void gang (void) +{ +} + +#pragma acc routine worker +void worker (void) +{ +} + +#pragma acc routine vector +void vector (void) +{ +} + +#pragma acc routine seq +void seq (void) +{ +} + +int main () +{ + +#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) + { + gang (); + worker (); + vector (); + seq (); + } + + return 0; +} Index: gcc/testsuite/c-c++-common/goacc/routine-2.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-2.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-2.c (working copy) @@ -0,0 +1,21 @@ +#pragma acc routine gang worker /* { dg-error "multiple loop axes" } */ +void gang (void) +{ +} + +#pragma acc routine worker vector /* { dg-error "multiple loop axes" } */ +void worker (void) +{ +} + +#pragma acc routine vector seq /* { dg-error "multiple loop axes" } */ +void vector (void) +{ +} + +#pragma acc routine seq gang /* { dg-error "multiple loop axes" } */ +void seq (void) +{ +} + +#pragma acc routine (nothing) gang /* { dg-error "not been declared" } */ Index: gcc/testsuite/c-c++-common/goacc/routine-3.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-3.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-3.c (working copy) @@ -0,0 +1,53 @@ +#pragma acc routine gang +void gang (void) /* { dg-message "declared here" 3 } */ +{ +} + +#pragma acc routine worker +void worker (void) /* { dg-message "declared here" 2 } */ +{ +} + +#pragma acc routine vector +void vector (void) /* { dg-message "declared here" 1 } */ +{ +} + +#pragma acc routine seq +void seq (void) +{ +} + +int main () +{ + +#pragma acc parallel num_gangs (32) num_workers (32) vector_length (32) + { + #pragma acc loop gang /* { dg-message "loop here" 1 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); + vector (); + seq (); + } + #pragma acc loop worker /* { dg-message "loop here" 2 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); /* { dg-error "routine call uses same" } */ + vector (); + seq (); + } + #pragma acc loop vector /* { dg-message "loop here" 3 } */ + for (int i = 0; i < 10; i++) + { + gang (); /* { dg-error "routine call uses same" } */ + worker (); /* { dg-error "routine call uses same" } */ + vector (); /* { dg-error "routine call uses same" } */ + seq (); + } + } + + return 0; +} Index: gcc/testsuite/c-c++-common/goacc/routine-4.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-4.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-4.c (working copy) @@ -0,0 +1,41 @@ + +void gang (void); +void worker (void); +void vector (void); + +#pragma acc routine (gang) gang +#pragma acc routine (worker) worker +#pragma acc routine (vector) vector + +#pragma acc routine seq +void seq (void) +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); /* { dg-error "routine call uses" } */ + vector (); /* { dg-error "routine call uses" } */ + seq (); +} + +void vector (void) /* { dg-message "declared here" 1 } */ +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); /* { dg-error "routine call uses" } */ + vector (); + seq (); +} + +void worker (void) /* { dg-message "declared here" 2 } */ +{ + gang (); /* { dg-error "routine call uses" } */ + worker (); + vector (); + seq (); +} + +void gang (void) /* { dg-message "declared here" 3 } */ +{ + gang (); + worker (); + vector (); + seq (); +} Index: gcc/testsuite/c-c++-common/goacc/routine-5.c =================================================================== --- gcc/testsuite/c-c++-common/goacc/routine-5.c (revision 0) +++ gcc/testsuite/c-c++-common/goacc/routine-5.c (working copy) @@ -0,0 +1,51 @@ +/* { dg-do compile } */ + +#pragma acc routine /* { dg-error "not followed by" } */ +int a; + +#if 0 /* Disable for the moment. */ +#pragma acc routine /* dg-error "not followed by" */ +void fn1 (void), fn1b (void); +#endif + +#pragma acc routine /* { dg-error "not followed by" } */ +int b, fn2 (void); + +#if 0 /* Disable for the moment. */ +#pragma acc routine /* dg-error "not followed by" */ +int fn3 (void), b2; +#endif + +#pragma acc routine /* { dg-error "not followed by" } */ +typedef struct c c; + +#pragma acc routine /* { dg-error "not followed by" } */ +struct d {} d; + +#pragma acc routine /* { dg-error "not followed by" } */ +#pragma acc routine +int fn4 (void); + +int fn5a (void); + +#pragma acc routine /* { dg-error "not followed by" } */ +#pragma acc routine (fn5a) +int fn5 (void); + +#ifdef __cplusplus + +#pragma acc routine /* { dg-error "not followed by" "" { target c++ } } */ +namespace f {} + +namespace g {} + +#pragma acc routine /* { dg-error "not followed by" "" { target c++ } } */ +using namespace g; + +#pragma acc routine (g) /* { dg-error "does not refer to" "" { target c++ } } */ + +#endif + +#pragma acc routine (a) /* { dg-error "does not refer to" } */ + +#pragma acc routine (c) /* { dg-error "does not refer to" } */ Index: libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c (working copy) @@ -0,0 +1,41 @@ +/* { dg-do run } */ + +#include + +int main () +{ + int ok = 1; + int val = 2; + int ary[32]; + int ondev = 0; + + for (int i = 0; i < 32; i++) + ary[i] = ~0; + +#pragma acc parallel num_gangs (32) copy (ok) firstprivate (val) copy(ary, ondev) + { + ondev = acc_on_device (acc_device_not_host); +#pragma acc loop gang(static:1) + for (unsigned i = 0; i < 32; i++) + { + if (val != 2) + ok = 0; + val += i; + ary[i] = val; + } + } + + if (ondev) + { + if (!ok) + return 1; + if (val != 2) + return 1; + + for (int i = 0; i < 32; i++) + if (ary[i] != 2 + i) + return 1; + } + + return 0; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine gang +void __attribute__ ((noinline)) gang (int ary[N]) +{ +#pragma acc loop gang + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + gang (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = ix / ((N + 31) / 32); + int w = 0; + int v = 0; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (working copy) @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine gang +void __attribute__ ((noinline)) gang (int ary[N]) +{ +#pragma acc loop gang worker vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + gang (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + + int g = ix / (chunk_size * 32 * 32); + int w = ix / 32 % 32; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine vector +void __attribute__ ((noinline)) vector (int ary[N]) +{ +#pragma acc loop vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + vector (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = 0; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine worker +void __attribute__ ((noinline)) worker (int ary[N]) +{ +#pragma acc loop worker + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + worker (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = ix % 32; + int v = 0; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} Index: libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (working copy) @@ -0,0 +1,64 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +#pragma acc routine worker +void __attribute__ ((noinline)) worker (int ary[N]) +{ +#pragma acc loop worker vector + for (unsigned ix = 0; ix < N; ix++) + { + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + ary[ix] = (g << 16) | (w << 8) | v; + } + else + ary[ix] = ix; + } +} + +int main () +{ + int ary[N]; + int ix; + int exit = 0; + int ondev = 0; + + for (ix = 0; ix < N;ix++) + ary[ix] = -1; + +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) + { + ondev = __builtin_acc_on_device (5); + worker (ary); + } + + for (ix = 0; ix < N; ix++) + { + int expected = ix; + if(ondev) + { + int g = 0; + int w = (ix / 32) % 32; + int v = ix % 32; + + expected = (g << 16) | (w << 8) | v; + } + + if (ary[ix] != expected) + { + exit = 1; + printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected); + } + } + + return exit; +} --------------060407010306040207090703--