diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 4df05453604..88cc505597e 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -68,6 +68,11 @@ static bool ext_gcn_constants_init = 0; enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */ +/* Record whether the host compiler added "omp unifed memory" attributes to + any functions. We can then pass this on to mkoffload to ensure xnack is + compatible there too. */ +static bool unified_shared_memory_enabled = false; + /* Reserve this much space for LDS (for propagating variables from worker-single mode to worker-partitioned mode), per workgroup. Global analysis could calculate an exact bound, but we don't do that yet. @@ -2542,6 +2547,25 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ , if (!caller && cfun->machine->normal_function) gcn_detect_incoming_pointer_arg (fndecl); + if (fndecl && lookup_attribute ("omp unified memory", + DECL_ATTRIBUTES (fndecl))) + { + unified_shared_memory_enabled = true; + + switch (gcn_arch) + { + case PROCESSOR_FIJI: + case PROCESSOR_VEGA10: + case PROCESSOR_VEGA20: + error ("GPU architecture does not support Unified Shared Memory"); + default: + ; + } + + if (flag_xnack == HSACO_ATTR_OFF) + error ("Unified Shared Memory is enabled, but XNACK is disabled"); + } + reinit_regs (); } @@ -5458,12 +5482,14 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree) assemble_name (file, name); fputs (":\n", file); - /* This comment is read by mkoffload. */ + /* These comments are read by mkoffload. */ if (flag_openacc) fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n", oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG), oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER), oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name); + if (unified_shared_memory_enabled) + fprintf (asm_out_file, "\t;; MKOFFLOAD OPTIONS: USM+\n"); } /* Implement TARGET_ASM_SELECT_SECTION. diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index cb8903c27cb..5741d0a917b 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -80,6 +80,8 @@ == EF_AMDGPU_FEATURE_XNACK_ANY_V4) #define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \ == EF_AMDGPU_FEATURE_XNACK_ON_V4) +#define TEST_XNACK_OFF(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \ + == EF_AMDGPU_FEATURE_XNACK_OFF_V4) #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \ | EF_AMDGPU_FEATURE_SRAMECC_ON_V4) @@ -474,6 +476,7 @@ static void process_asm (FILE *in, FILE *out, FILE *cfile) { int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0; + bool unified_shared_memory_enabled = false; struct obstack fns_os, dims_os, regcounts_os; obstack_init (&fns_os); obstack_init (&dims_os); @@ -498,6 +501,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile) fn_count += 2; char buf[1000]; + char dummy; enum { IN_CODE, IN_METADATA, @@ -517,6 +521,9 @@ process_asm (FILE *in, FILE *out, FILE *cfile) dims_count++; } + if (sscanf (buf, " ;; MKOFFLOAD OPTIONS: USM+%c", &dummy) > 0) + unified_shared_memory_enabled = true; + break; } case IN_METADATA: @@ -565,7 +572,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile) } } - char dummy; if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0) { state = IN_VARS; @@ -617,6 +623,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile) fprintf (cfile, "#include \n"); fprintf (cfile, "#include \n"); fprintf (cfile, "#include \n\n"); + fprintf (cfile, "#include \n\n"); fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count); @@ -657,6 +664,34 @@ process_asm (FILE *in, FILE *out, FILE *cfile) } fprintf (cfile, "\n};\n\n"); + /* Emit a constructor function to set the HSA_XNACK environment variable. + This must be done before the ROCr runtime library is loaded. + We never override a user value (exit empty string), but we do emit a + useful diagnostic in the wrong mode (the ROCr message is not good. */ + if (TEST_XNACK_OFF (elf_flags) && unified_shared_memory_enabled) + fatal_error (input_location, + "conflicting settings; XNACK is forced off but Unified " + "Shared Memory is on"); + if (!TEST_XNACK_ANY (elf_flags) || unified_shared_memory_enabled) + fprintf (cfile, + "static __attribute__((constructor))\n" + "void configure_xnack (void)\n" + "{\n" + " const char *val = getenv (\"HSA_XNACK\");\n" + " if (!val || val[0] == '\\0')\n" + " setenv (\"HSA_XNACK\", \"%d\", true);\n" + " else if (%s)\n" + " {\n" + " fprintf (stderr, \"error: HSA_XNACK=%%s is incompatible; " + "please unset\\n\", val);\n" + " exit (1);\n" + " }\n" + "}\n\n", + unified_shared_memory_enabled || TEST_XNACK_ON (elf_flags), + (unified_shared_memory_enabled || TEST_XNACK_ON (elf_flags) + ? "val[0] != '1' || val[1] != '\\0'" + : "val[0] == '1' && val[1] == '\\0'")); + obstack_free (&fns_os, NULL); for (i = 0; i < dims_count; i++) free (dims[i].name); diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 7d1a2a0d795..239446beb52 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -2107,6 +2107,10 @@ create_omp_child_function (omp_context *ctx, bool task_copy) DECL_ATTRIBUTES (decl) = tree_cons (get_identifier (target_attr), NULL_TREE, DECL_ATTRIBUTES (decl)); + if (flag_offload_memory == OFFLOAD_MEMORY_UNIFIED) + DECL_ATTRIBUTES (decl) + = tree_cons (get_identifier ("omp unified memory"), + NULL_TREE, DECL_ATTRIBUTES (decl)); } t = build_decl (DECL_SOURCE_LOCATION (decl),