diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index ba612e5c67d..cdadd6f0c96 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -15097,6 +15097,180 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) { return new pass_diagnose_omp_blocks (ctxt); } + +/* Provide transformation required for using unified shared memory + by replacing calls to standard memory allocation functions with + function provided by the libgomp. */ + +static tree +usm_transform (gimple_stmt_iterator *gsi_p, bool *, + struct walk_stmt_info *wi) +{ + gimple *stmt = gsi_stmt (*gsi_p); + /* ompx_unified_shared_mem_alloc is 10. */ + const unsigned int unified_shared_mem_alloc = 10; + + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + { + gcall *gs = as_a (stmt); + tree fndecl = gimple_call_fndecl (gs); + if (fndecl) + { + tree allocator = build_int_cst (pointer_sized_int_node, + unified_shared_mem_alloc); + const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if ((strcmp (name, "malloc") == 0) + || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) + && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MALLOC) + || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl) + || strcmp (name, "omp_target_alloc") == 0) + { + tree omp_alloc_type + = build_function_type_list (ptr_type_node, size_type_node, + pointer_sized_int_node, + NULL_TREE); + tree repl = build_fn_decl ("omp_alloc", omp_alloc_type); + tree size = gimple_call_arg (gs, 0); + gimple *g = gimple_build_call (repl, 2, size, allocator); + gimple_call_set_lhs (g, gimple_call_lhs (gs)); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi_p, g, true); + } + else if (strcmp (name, "aligned_alloc") == 0) + { + /* May be we can also use this for new operator with + std::align_val_t parameter. */ + tree omp_alloc_type + = build_function_type_list (ptr_type_node, size_type_node, + size_type_node, + pointer_sized_int_node, + NULL_TREE); + tree repl = build_fn_decl ("omp_aligned_alloc", + omp_alloc_type); + tree align = gimple_call_arg (gs, 0); + tree size = gimple_call_arg (gs, 1); + gimple *g = gimple_build_call (repl, 3, align, size, + allocator); + gimple_call_set_lhs (g, gimple_call_lhs (gs)); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi_p, g, true); + } + else if ((strcmp (name, "calloc") == 0) + || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) + && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CALLOC)) + { + tree omp_calloc_type + = build_function_type_list (ptr_type_node, size_type_node, + size_type_node, + pointer_sized_int_node, + NULL_TREE); + tree repl = build_fn_decl ("omp_calloc", omp_calloc_type); + tree num = gimple_call_arg (gs, 0); + tree size = gimple_call_arg (gs, 1); + gimple *g = gimple_build_call (repl, 3, num, size, allocator); + gimple_call_set_lhs (g, gimple_call_lhs (gs)); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi_p, g, true); + } + else if ((strcmp (name, "realloc") == 0) + || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) + && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_REALLOC)) + { + tree omp_realloc_type + = build_function_type_list (ptr_type_node, ptr_type_node, + size_type_node, + pointer_sized_int_node, + pointer_sized_int_node, + NULL_TREE); + tree repl = build_fn_decl ("omp_realloc", omp_realloc_type); + tree ptr = gimple_call_arg (gs, 0); + tree size = gimple_call_arg (gs, 1); + gimple *g = gimple_build_call (repl, 4, ptr, size, allocator, + allocator); + gimple_call_set_lhs (g, gimple_call_lhs (gs)); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi_p, g, true); + } + else if ((strcmp (name, "free") == 0) + || (fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) + && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FREE) + || (DECL_IS_OPERATOR_DELETE_P (fndecl) + && DECL_IS_REPLACEABLE_OPERATOR (fndecl)) + || strcmp (name, "omp_target_free") == 0) + { + tree omp_free_type + = build_function_type_list (void_type_node, ptr_type_node, + pointer_sized_int_node, + NULL_TREE); + tree repl = build_fn_decl ("omp_free", omp_free_type); + tree ptr = gimple_call_arg (gs, 0); + gimple *g = gimple_build_call (repl, 2, ptr, allocator); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi_p, g, true); + } + } + } + break; + + default: + break; + } + + return NULL_TREE; +} + +namespace { + +const pass_data pass_data_usm_transform = +{ + GIMPLE_PASS, /* type */ + "usm_transform", /* name */ + OPTGROUP_OMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_usm_transform : public gimple_opt_pass +{ +public: + pass_usm_transform (gcc::context *ctxt) + : gimple_opt_pass (pass_data_usm_transform, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_openmp || flag_openmp_simd) + && (flag_offload_memory == OFFLOAD_MEMORY_UNIFIED + || omp_requires_mask & OMP_REQUIRES_UNIFIED_SHARED_MEMORY + || omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS); + } + virtual unsigned int execute (function *) + { + struct walk_stmt_info wi; + gimple_seq body = gimple_body (current_function_decl); + + memset (&wi, 0, sizeof (wi)); + walk_gimple_seq (body, usm_transform, NULL, &wi); + + return 0; + } + +}; // class pass_usm_transform + +} // anon namespace + +gimple_opt_pass * +make_pass_usm_transform (gcc::context *ctxt) +{ + return new pass_usm_transform (ctxt); +} #include "gt-omp-low.h" diff --git a/gcc/passes.def b/gcc/passes.def index 375d3d62d51..7f838bfc96a 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_diagnose_tm_blocks); NEXT_PASS (pass_omp_oacc_kernels_decompose); NEXT_PASS (pass_lower_omp); + NEXT_PASS (pass_usm_transform); NEXT_PASS (pass_lower_cf); NEXT_PASS (pass_lower_tm); NEXT_PASS (pass_refactor_eh); diff --git a/gcc/testsuite/c-c++-common/gomp/usm-2.c b/gcc/testsuite/c-c++-common/gomp/usm-2.c new file mode 100644 index 00000000000..8c20ef94e69 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/usm-2.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fdump-tree-usm_transform" } */ + +#pragma omp requires unified_shared_memory + +#ifdef __cplusplus +extern "C" { +#endif + +void *malloc (__SIZE_TYPE__); +void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__); +void *calloc(__SIZE_TYPE__, __SIZE_TYPE__); +void *realloc(void *, __SIZE_TYPE__); +void free (void *); +void *omp_target_alloc (__SIZE_TYPE__, int); +void omp_target_free (void *, int); + +#ifdef __cplusplus +} +#endif + +void +foo () +{ + void *p1 = malloc(20); + void *p2 = realloc(p1, 30); + void *p3 = calloc(4, 15); + void *p4 = aligned_alloc(16, 40); + void *p5 = omp_target_alloc(50, 1); + free (p2); + free (p3); + free (p4); + omp_target_free (p5, 1); +} + +/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/usm-3.c b/gcc/testsuite/c-c++-common/gomp/usm-3.c new file mode 100644 index 00000000000..2b0cbb45e27 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/usm-3.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" } */ + +#ifdef __cplusplus +extern "C" { +#endif + +void *malloc (__SIZE_TYPE__); +void *aligned_alloc (__SIZE_TYPE__, __SIZE_TYPE__); +void *calloc(__SIZE_TYPE__, __SIZE_TYPE__); +void *realloc(void *, __SIZE_TYPE__); +void free (void *); +void *omp_target_alloc (__SIZE_TYPE__, int); +void omp_target_free (void *, int); + +#ifdef __cplusplus +} +#endif + +void +foo () +{ + void *p1 = malloc(20); + void *p2 = realloc(p1, 30); + void *p3 = calloc(4, 15); + void *p4 = aligned_alloc(16, 40); + void *p5 = omp_target_alloc(50, 1); + free (p2); + free (p3); + free (p4); + omp_target_free (p5, 1); +} + +/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_realloc \\(.*, 30, 10, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_calloc \\(4, 15, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_aligned_alloc \\(16, 40, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc \\(50, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " free" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " aligned_alloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " malloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " omp_target_alloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not " omp_target_free" "usm_transform" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/usm-1.C b/gcc/testsuite/g++.dg/gomp/usm-1.C new file mode 100644 index 00000000000..bd70a81b5bb --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/usm-1.C @@ -0,0 +1,32 @@ +// { dg-do compile } +// { dg-options "-fopenmp -fdump-tree-usm_transform" } + +#pragma omp requires unified_shared_memory + +struct t1 +{ + int a; + int b; +}; + +typedef unsigned char uint8_t; + +void +foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y) +{ + uint8_t *p1 = new uint8_t; + uint8_t *p2 = new uint8_t[20]; + t1 *p3 = new t1; + t1 *p4 = new t1[y]; + delete p1; + delete p3; + delete [] p2; + delete [] p4; +} + +/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/usm-2.C b/gcc/testsuite/g++.dg/gomp/usm-2.C new file mode 100644 index 00000000000..f6ab155c6de --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/usm-2.C @@ -0,0 +1,30 @@ +// { dg-do compile } +// { dg-options "-fopenmp -foffload-memory=unified -fdump-tree-usm_transform" } + +struct t1 +{ + int a; + int b; +}; + +typedef unsigned char uint8_t; + +void +foo (__SIZE_TYPE__ x, __SIZE_TYPE__ y) +{ + uint8_t *p1 = new uint8_t; + uint8_t *p2 = new uint8_t[20]; + t1 *p3 = new t1; + t1 *p4 = new t1[y]; + delete p1; + delete p3; + delete [] p2; + delete [] p4; +} + +/* { dg-final { scan-tree-dump-times "omp_alloc \\(1, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc \\(20, 10\\)" 1 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_alloc" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-times "omp_free" 4 "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not "operator new" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not "operator delete" "usm_transform" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/usm-3.C b/gcc/testsuite/g++.dg/gomp/usm-3.C new file mode 100644 index 00000000000..50ac9302c8b --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/usm-3.C @@ -0,0 +1,38 @@ +// { dg-do compile } +// { dg-options "-fopenmp -fdump-tree-usm_transform" } + +#pragma omp requires unified_shared_memory + +#include + + +struct X { + static void* operator new(std::size_t count) + { + static char buf[10]; + return &buf[0]; + } + static void* operator new[](std::size_t count) + { + static char buf[10]; + return &buf[0]; + } + static void operator delete(void*) + { + } + static void operator delete[](void*) + { + } +}; +void foo() { + X* p1 = new X; + delete p1; + X* p2 = new X[10]; + delete[] p2; + unsigned char buf[24] ; + int *p3 = new (buf) int(3); + p3[0] = 1; +} + +/* { dg-final { scan-tree-dump-not "omp_alloc" "usm_transform" } } */ +/* { dg-final { scan-tree-dump-not "omp_free" "usm_transform" } } */ diff --git a/gcc/testsuite/gfortran.dg/gomp/usm-2.f90 b/gcc/testsuite/gfortran.dg/gomp/usm-2.f90 new file mode 100644 index 00000000000..dc775260cb7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/usm-2.f90 @@ -0,0 +1,16 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-usm_transform" } + +!$omp requires unified_shared_memory +end + +subroutine foo() + implicit none + integer, allocatable :: var1 + + allocate(var1) + +end subroutine + +! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } } +! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } } \ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/gomp/usm-3.f90 b/gcc/testsuite/gfortran.dg/gomp/usm-3.f90 new file mode 100644 index 00000000000..7983444ebff --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/usm-3.f90 @@ -0,0 +1,13 @@ +! { dg-do compile } +! { dg-additional-options "-foffload-memory=unified -fdump-tree-usm_transform" } + +subroutine foo() + implicit none + integer, allocatable :: var1 + + allocate(var1) + +end subroutine + +! { dg-final { scan-tree-dump-times "omp_alloc" 1 "usm_transform" } } +! { dg-final { scan-tree-dump-times "omp_free" 1 "usm_transform" } } \ No newline at end of file diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 606d1d60b85..494a9662afa 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -424,6 +424,7 @@ extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_usm_transform (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt); diff --git a/libgomp/testsuite/libgomp.c++/usm-1.C b/libgomp/testsuite/libgomp.c++/usm-1.C new file mode 100644 index 00000000000..fea25e5f10b --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/usm-1.C @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */ +#include + +#pragma omp requires unified_shared_memory + +int g1 = 0; + +struct s1 +{ + s1() { a = g1++;} + ~s1() { g1--;} + int a; +}; + +int +main () +{ + s1 *p1 = new s1; + s1 *p2 = new s1[10]; + + if (!p1 || !p2 || p1->a != 0) + __builtin_abort (); + + for (int i = 0; i < 10; i++) + if (p2[i].a != i+1) + __builtin_abort (); + + uintptr_t pp1 = (uintptr_t)p1; + uintptr_t pp2 = (uintptr_t)p2; + +#pragma omp target firstprivate(pp1, pp2) + { + s1 *t1 = (s1*)pp1; + s1 *t2 = (s1*)pp2; + if (t1->a != 0) + __builtin_abort (); + + for (int i = 0; i < 10; i++) + if (t2[i].a != i+1) + __builtin_abort (); + + t1->a = 42; + } + + if (p1->a != 42) + __builtin_abort (); + + delete [] p2; + delete p1; + if (g1 != 0) + __builtin_abort (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/usm-6.c b/libgomp/testsuite/libgomp.c/usm-6.c new file mode 100644 index 00000000000..c207140092a --- /dev/null +++ b/libgomp/testsuite/libgomp.c/usm-6.c @@ -0,0 +1,92 @@ +/* { dg-do run } */ +/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */ + +#include +#include + +#include + +/* On old systems, the declaraition may not be present in stdlib.h which + will generate a warning. This function is going to be replaced with + omp_aligned_alloc so the purpose of this declaration is to avoid that + warning. */ +void *aligned_alloc(size_t alignment, size_t size); + +#pragma omp requires unified_shared_memory + +int +main () +{ + int *a = (int *) malloc(sizeof(int)*2); + int *b = (int *) calloc(sizeof(int), 3); + int *c = (int *) realloc(NULL, sizeof(int) * 4); + int *d = (int *) aligned_alloc(32, sizeof(int)); + int *e = (int *) omp_target_alloc(sizeof(int), 1); + if (!a || !b || !c || !d || !e) + __builtin_abort (); + + a[0] = 42; + a[1] = 43; + b[0] = 52; + b[1] = 53; + b[2] = 54; + c[0] = 62; + c[1] = 63; + c[2] = 64; + c[3] = 65; + + uintptr_t a_p = (uintptr_t)a; + uintptr_t b_p = (uintptr_t)b; + uintptr_t c_p = (uintptr_t)c; + uintptr_t d_p = (uintptr_t)d; + uintptr_t e_p = (uintptr_t)e; + + if (d_p & 31 != 0) + __builtin_abort (); + +#pragma omp target enter data map(to:a[0:2]) + +#pragma omp target is_device_ptr(c) + { + if (a[0] != 42 || a_p != (uintptr_t)a) + __builtin_abort (); + if (b[0] != 52 || b[2] != 54 || b_p != (uintptr_t)b) + __builtin_abort (); + if (c[0] != 62 || c[3] != 65 || c_p != (uintptr_t)c) + __builtin_abort (); + if (d_p != (uintptr_t)d) + __builtin_abort (); + if (e_p != (uintptr_t)e) + __builtin_abort (); + a[0] = 72; + b[0] = 82; + c[0] = 92; + e[0] = 102; + } + +#pragma omp target + { + if (a[1] != 43 || a_p != (uintptr_t)a) + __builtin_abort (); + if (b[1] != 53 || b_p != (uintptr_t)b) + __builtin_abort (); + if (c[1] != 63 || c[2] != 64 || c_p != (uintptr_t)c) + __builtin_abort (); + a[1] = 73; + b[1] = 83; + c[1] = 93; + } + +#pragma omp target exit data map(delete:a[0:2]) + + if (a[0] != 72 || a[1] != 73 + || b[0] != 82 || b[1] != 83 + || c[0] != 92 || c[1] != 93 + || e[0] != 102) + __builtin_abort (); + free(a); + free(b); + free(c); + omp_target_free(e, 1); + return 0; +}