#include #include #include #include #include #ifdef FEWER_KERNELS #define THREADS 8 #define BLOCKSIZE (8 * 1024 * 1024) #define REPS 32 #else #define THREADS 8 #define BLOCKSIZE (8 * 256) #define REPS (32 * 1024 * 4) #endif /* A worker thread. Launch a pile of offload kernels and do some work. */ void * target_incr (void *arg) { int *myarr = (int *) arg; for (int r = 0; r < REPS; r++) { #pragma omp target map(tofrom: myarr[0:BLOCKSIZE]) { #pragma omp for for (int i = 0; i < BLOCKSIZE; i++) { myarr[i]++; } } } return NULL; } int main (int argc, char* argv[]) { int *arr[THREADS]; pthread_t workerthread[THREADS]; int i; for (i = 0; i < THREADS; i++) { arr[i] = malloc (BLOCKSIZE * sizeof (int)); memset (arr[i], 0, BLOCKSIZE * sizeof (int)); } for (i = 0; i < THREADS; i++) { int *tmp = arr[i]; #pragma omp target enter data map(to: tmp[0:BLOCKSIZE]) } for (i = 0; i < THREADS; i++) pthread_create (&workerthread[i], NULL, target_incr, (void *) arr[i]); for (i = 0; i < THREADS; i++) { void *rv; pthread_join (workerthread[i], &rv); assert (rv == NULL); } for (i = 0; i < THREADS; i++) { int *tmp = arr[i]; #pragma omp target exit data map(from: tmp[0:BLOCKSIZE]) } for (i = 0; i < THREADS; i++) for (int j = 0; j < BLOCKSIZE; j++) assert (arr[i][j] == REPS); for (i = 0; i < THREADS; i++) free (arr[i]); return 0; }