commit 15114e33ecb6cb687dbdfb30d69d7dcbeeb87fca Author: Julian Brown Date: Thu Dec 6 04:38:59 2018 -0800 Enable GOMP_MAP_FIRSTPRIVATE_INT for OpenACC gcc/ * omp-low.c (maybe_lookup_field_in_outer_ctx): New function. (convert_to_firstprivate_int): New function. (convert_from_firstprivate_int): New function. (lower_omp_target): Enable GOMP_MAP_FIRSTPRIVATE_INT in OpenACC. libgomp/ * oacc-parallel.c (GOACC_parallel_keyed): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * plugin/plugin-nvptx.c (nvptx_exec): Handle GOMP_MAP_FIRSTPRIVATE_INT host addresses. * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test. * testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c: New test. * testsuite/libgomp.oacc-fortran/firstprivate-int.f90: New test. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index b406ce7..adc686c 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3497,6 +3497,19 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) return t ? t : decl; } +/* Returns true if DECL is present inside a field that encloses CTX. */ + +static bool +maybe_lookup_field_in_outer_ctx (tree decl, omp_context *ctx) +{ + omp_context *up; + + for (up = ctx->outer; up; up = up->outer) + if (maybe_lookup_field (decl, up)) + return true; + + return false; +} /* Construct the initialization value for reduction operation OP. */ @@ -9052,6 +9065,87 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } +/* Helper function for lower_omp_target. Converts VAR to something that can + be represented by a POINTER_SIZED_INT_NODE. Any new instructions are + appended to GS. This is used to optimize firstprivate variables, so that + small types (less precision than POINTER_SIZE) do not require additional + data mappings. */ + +static tree +convert_to_firstprivate_int (tree var, gimple_seq *gs) +{ + tree type = TREE_TYPE (var), new_type = NULL_TREE; + tree tmp = NULL_TREE; + + if (omp_is_reference (var)) + type = TREE_TYPE (type); + + if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + { + if (omp_is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + return fold_convert (pointer_sized_int_node, var); + } + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + + if (omp_is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + tmp = create_tmp_var (new_type); + var = fold_build1 (VIEW_CONVERT_EXPR, new_type, var); + gimplify_assign (tmp, var, gs); + + return fold_convert (pointer_sized_int_node, tmp); +} + +/* Like convert_to_firstprivate_int, but restore the original type. */ + +static tree +convert_from_firstprivate_int (tree var, bool is_ref, gimple_seq *gs) +{ + tree type = TREE_TYPE (var); + tree new_type = NULL_TREE; + tree tmp = NULL_TREE; + + gcc_assert (TREE_CODE (var) == MEM_REF); + var = TREE_OPERAND (var, 0); + + if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + return fold_convert (type, var); + + gcc_assert (tree_to_uhwi (TYPE_SIZE (type)) <= POINTER_SIZE); + + new_type = lang_hooks.types.type_for_size (tree_to_uhwi (TYPE_SIZE (type)), + true); + + tmp = create_tmp_var (new_type); + var = fold_convert (new_type, var); + gimplify_assign (tmp, var, gs); + var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); + + if (is_ref) + { + tmp = create_tmp_var (build_pointer_type (type)); + gimplify_assign (tmp, build_fold_addr_expr (var), gs); + var = tmp; + } + + return var; +} + /* Lower the GIMPLE_OMP_TARGET in the current statement in GSI_P. CTX holds context information for the directive. */ @@ -9213,25 +9307,43 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (offloaded && !(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_IN_REDUCTION (c))) { - x = build_receiver_ref (var, true, ctx); + tree var_type = TREE_TYPE (var); tree new_var = lookup_decl (var, ctx); + tree inner_type = omp_is_reference (new_var) + ? TREE_TYPE (var_type) : var_type; + + x = build_receiver_ref (var, true, ctx); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE + && (FLOAT_TYPE_P (inner_type) + || ANY_INTEGRAL_TYPE_P (inner_type)) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + gcc_assert (is_gimple_omp_oacc (ctx->stmt)); + x = convert_from_firstprivate_int (x, omp_is_reference (var), + &fplist); + gimplify_assign (new_var, x, &fplist); + map_cnt++; + break; + } if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) + && TREE_CODE (var_type) == ARRAY_TYPE) x = build_simple_mem_ref (x); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); if (omp_is_reference (new_var) - && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE) + && TREE_CODE (var_type) != POINTER_TYPE) { /* Create a local object to hold the instance value. */ - tree type = TREE_TYPE (TREE_TYPE (new_var)); const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); - tree inst = create_tmp_var (type, id); + tree inst = create_tmp_var (TREE_TYPE (var_type), id); gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } @@ -9386,6 +9498,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree ovar, nc, s, purpose, var, x, type; unsigned int talign; + bool oacc_firstprivate_int; default: break; @@ -9394,6 +9507,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: oacc_firstprivate_map: + oacc_firstprivate_int = false; nc = c; ovar = OMP_CLAUSE_DECL (c); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -9459,8 +9573,22 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { - gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!omp_is_reference (var)) + gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); + tree type = TREE_TYPE (var); + tree inner_type + = omp_is_reference (var) ? TREE_TYPE (type) : type; + if ((FLOAT_TYPE_P (inner_type) + || ANY_INTEGRAL_TYPE_P (inner_type)) + && tree_to_uhwi (TYPE_SIZE (inner_type)) <= POINTER_SIZE + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + oacc_firstprivate_int = true; + if (is_gimple_reg (var) + && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) + TREE_NO_WARNING (var) = 1; + var = convert_to_firstprivate_int (var, &ilist); + } + else if (!omp_is_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -9512,10 +9640,15 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - s = TREE_TYPE (ovar); - if (TREE_CODE (s) == REFERENCE_TYPE) - s = TREE_TYPE (s); - s = TYPE_SIZE_UNIT (s); + if (oacc_firstprivate_int) + s = size_int (0); + else + { + s = TREE_TYPE (ovar); + if (TREE_CODE (s) == REFERENCE_TYPE) + s = TREE_TYPE (s); + s = TYPE_SIZE_UNIT (s); + } } else s = OMP_CLAUSE_SIZE (c); @@ -9565,7 +9698,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case OMP_CLAUSE_FIRSTPRIVATE: gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - tkind = GOMP_MAP_TO; + if (oacc_firstprivate_int) + tkind = GOMP_MAP_FIRSTPRIVATE_INT; + else + tkind = GOMP_MAP_TO; tkind_zero = tkind; break; case OMP_CLAUSE_TO: diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 861f3df..5127af5 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -231,9 +231,12 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start - + tgt->list[i].key->tgt_offset - + tgt->list[i].offset); + if (tgt->list[i].key != NULL) + devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset); + else + devaddrs[i] = NULL; acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, async, dims, tgt); diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 6492e5f..a6e20bf 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1314,7 +1314,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, /* Copy the array of arguments to the mapped page. */ hp = alloca(sizeof(void *) * mapnum); for (i = 0; i < mapnum; i++) - ((void **) hp)[i] = devaddrs[i]; + ((void **) hp)[i] = devaddrs[i] != 0 ? devaddrs[i] : hostaddrs[i]; /* Copy the (device) pointers to arguments to the device */ CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp, diff --git a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C new file mode 100644 index 0000000..86b8722 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C @@ -0,0 +1,83 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimization on various types. + This test is similer to the test in libgomp.oacc-c-c++-common, but + it focuses on reference types. */ + +#include +#include +#include + +void test_ref (int8_t &i8i, int8_t &i8o, int16_t &i16i, int16_t &i16o, + int32_t &i32i, int32_t &i32o, int64_t &i64i, int64_t &i64o, + uint8_t &u8i, uint8_t &u8o, uint16_t &u16i, uint16_t &u16o, + uint32_t &u32i, uint32_t &u32o, uint64_t &u64i, uint64_t &u64o, + float &r32i, float &r32o, double &r64i, double &r64o, + int _Complex &cii, int _Complex &cio, + float _Complex &cfi, float _Complex &cfo, + double _Complex &cdi, double _Complex &cdo) +{ +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } +} + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + + test_ref (i8i, i8o, i16i, i16o, i32i, i32o, i64i, i64o, u8i, u8o, u16i, + u16o, u32i, u32o, u64i, u64o, r32i, r32o, r64i, r64o, cii, cio, + cfi, cfo, cdi, cdo); + + assert (i8o == i8i); + assert (i16o == i16i); + assert (i32o == i32i); + assert (i64o == i64i); + + assert (u8o == u8i); + assert (u16o == u16i); + assert (u32o == u32i); + assert (u64o == u64i); + + assert (r32o == r32i); + assert (r64o == r64i); + + assert (cio == cii); + assert (cfo == cfi); + assert (cdo == cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c new file mode 100644 index 0000000..6d14599 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c @@ -0,0 +1,67 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimization on various types. */ + +#include +#include +#include + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } + + assert (i8o == i8i); + assert (i16o == i16i); + assert (i32o == i32i); + assert (i64o == i64i); + + assert (u8o == u8i); + assert (u16o == u16i); + assert (u32o == u32i); + assert (u64o == u64i); + + assert (r32o == r32i); + assert (r64o == r64i); + + assert (cio == cii); + assert (cfo == cfi); + assert (cdo == cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 new file mode 100644 index 0000000..3b148ce --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 @@ -0,0 +1,205 @@ +! Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. + +! { dg-do run } + +program test + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = 1 + i2i = 2 + i4i = 3 + i8i = 4 + i16i = 5 + + l1i = .true. + l2i = .false. + l4i = .true. + l8i = .true. + l16i = .false. + + r4i = .5 + r8i = .25 + + c4i = (2, -2) + c8i = (4, -4) + + ch1i = "a" + ch4i = "b" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) stop 1 + if (i2i /= i2o) stop 2 + if (i4i /= i4o) stop 3 + if (i8i /= i8o) stop 4 + if (i16i /= i16o) stop 5 + + if (l1i .neqv. l1o) stop 6 + if (l2i .neqv. l2o) stop 7 + if (l4i .neqv. l4o) stop 8 + if (l8i .neqv. l8o) stop 9 + if (l16i .neqv. l16o) stop 10 + + if (r4i /= r4o) stop 11 + if (r8i /= r8o) stop 12 + + if (c4i /= c4o) stop 13 + if (c8i /= c8o) stop 14 + + if (ch1i /= ch1o) stop 15 + if (ch4i /= ch4o) stop 16 + + call subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) +end program test + +subroutine subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = -i1i + i2i = -i2i + i4i = -i4i + i8i = -i8i + i16i = -i16i + + l1i = .not. l1i + l2i = .not. l2i + l4i = .not. l4i + l8i = .not. l8i + l16i = .not. l16i + + r4i = -r4i + r8i = -r8i + + c4i = -c4i + c8i = -c8i + + ch1i = "z" + ch4i = "y" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) stop 17 + if (i2i /= i2o) stop 18 + if (i4i /= i4o) stop 19 + if (i8i /= i8o) stop 20 + if (i16i /= i16o) stop 21 + + if (l1i .neqv. l1o) stop 22 + if (l2i .neqv. l2o) stop 23 + if (l4i .neqv. l4o) stop 24 + if (l8i .neqv. l8o) stop 25 + if (l16i .neqv. l16o) stop 26 + + if (r4i /= r4o) stop 27 + if (r8i /= r8o) stop 28 + + if (c4i /= c4o) stop 29 + if (c8i /= c8o) stop 30 + + if (ch1i /= ch1o) stop 31 + if (ch4i /= ch4o) stop 32 +end subroutine subtest