commit eb1fe25b125cd153b2c661f1c54e61fcecf4754b Author: Sandra Loosemore Date: Sat Oct 15 02:03:03 2022 +0000 OpenMP: Generate SIMD clones for functions with "declare target" This patch causes the IPA simdclone pass to generate clones for functions with the "omp declare target" attribute as if they had "omp declare simd", provided the function appears to be suitable for SIMD execution. The filter is conservative, rejecting functions that write memory or that call other functions not known to be safe. A new option -fopenmp-target-simd-clone is added to control this transformation; it's enabled at -O2 and higher. gcc/ChangeLog: * common.opt (fopenmp-target-simd-clone): New option. * config/aarch64/aarch64.cc (aarch64_simd_clone_compute_vecsize_and_simdlen): Update. * config/gcn/gcn.cc (gcn_simd_clone_compute_vecsize_and_simdlen): Update. * config/i386/i386.cc (ix86_simd_clone_compute_vecsize_and_simdlen): Update. * doc/invoke.texi (-fopenmp-target-simd-clone): Document. * doc/tm.texi: Regenerated. * function.h (struct function): Add auto_simd_clone_candidate and auto_simd_call_candidate bitfields. * lto-streamer-in.cc (input_struct_function_base): Handle the new bitfields. * lto-streamer-out.cc (output_struct_function_base): Likewise. * omp-low.cc (auto_declare_simd_signature_ok): New. (check_auto_declare_simd_stmt): New. (check_auto_declare_simd_op): New. (maybe_mark_auto_declare_simd): New. (auto_declare_simd_scan_for_calls): New. (auto_declare_simd_scan_for_loops): New. (execute_auto_declare_simd): New. (pass_data_omp_auto_declare_simd): New. (pass_omp_auto_declare_simd): New. (make_pass_omp_auto_declare_simd): New. * omp-simd-clone.cc (simd_clone_create): Add force_local argument, make the symbol have internal linkage if it is true. (expand_simd_clones): Also check for cloneable functions with "omp declare target". Pass explicit_p argument to simd_clone.compute_vecsize_and_simdlen target hook. * opts.cc (default_options_table): Add -fopenmp-target-simd-clone. * passes.def: Add pass_omp_auto_declare_simd. * target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): Add bool explicit_p argument. * tree-pass.h: (make_pass_omp_auto_declare_simd): Declare. gcc/testsuite/ChangeLog: * g++.dg/gomp/target-simd-clone-1.C: New. * g++.dg/gomp/target-simd-clone-2.C: New. * gcc.dg/gomp/target-simd-clone-1.c: New. * gcc.dg/gomp/target-simd-clone-2.c: New. * gcc.dg/gomp/target-simd-clone-3.c: New. * gcc.dg/gomp/target-simd-clone-4.c: New. * gcc.dg/gomp/target-simd-clone-5.c: New. * gcc.dg/gomp/target-simd-clone-6.c: New. * gcc.dg/gomp/target-simd-clone-7.c: New. * gcc.dg/gomp/target-simd-clone-8.c: New. diff --git a/gcc/common.opt b/gcc/common.opt index bce3e514f65..5ee7d96d65d 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2214,6 +2214,10 @@ fomit-frame-pointer Common Var(flag_omit_frame_pointer) Optimization When possible do not generate stack frames. +fopenmp-target-simd-clone +Common Var(flag_openmp_target_simd_clone) Optimization +Generate SIMD clones for functions with the OpenMP declare target directive. + fopt-info Common Var(flag_opt_info) Optimization Enable all optimization info dumps on stderr. diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 1d0f994f281..2aafd0d2dd7 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -26698,7 +26698,8 @@ currently_supported_simd_type (tree t, tree b) static int aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, struct cgraph_simd_clone *clonei, - tree base_type, int num) + tree base_type, int num, + bool explicit_p) { tree t, ret_type; unsigned int elt_bits, count; @@ -26716,8 +26717,9 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, || const_simdlen > 1024 || (const_simdlen & (const_simdlen - 1)) != 0)) { - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %wd", const_simdlen); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %wd", const_simdlen); return 0; } @@ -26725,7 +26727,9 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (TREE_CODE (ret_type) != VOID_TYPE && !currently_supported_simd_type (ret_type, base_type)) { - if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type)) + if (!explicit_p) + ; + else if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support mixed size types " "for % functions"); @@ -26752,7 +26756,9 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM && !currently_supported_simd_type (arg_type, base_type)) { - if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type)) + if (!explicit_p) + ; + else if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support mixed size types " "for % functions"); @@ -26782,9 +26788,11 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (clonei->simdlen.is_constant (&const_simdlen) && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U)) { - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support simdlen %wd for type %qT", - const_simdlen, base_type); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support simdlen %wd for " + "type %qT", + const_simdlen, base_type); return 0; } } diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 3dc294c2d2f..d7059a2091c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -5024,7 +5024,8 @@ static int gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node), struct cgraph_simd_clone *clonei, tree base_type, - int ARG_UNUSED (num)) + int ARG_UNUSED (num), + bool explicit_p) { if (known_eq (clonei->simdlen, 0U)) clonei->simdlen = 64; @@ -5032,9 +5033,10 @@ gcn_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *ARG_UNUSED (node { /* Note that x86 has a similar message that is likely to trigger on sizes that are OK for gcn; the user can't win. */ - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %wd (amdgcn)", - clonei->simdlen.to_constant ()); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %wd (amdgcn)", + clonei->simdlen.to_constant ()); return 0; } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 480db35f6cd..5d8bb693ca9 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -23626,7 +23626,8 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) static int ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, struct cgraph_simd_clone *clonei, - tree base_type, int num) + tree base_type, int num, + bool explicit_p) { int ret = 1; @@ -23635,8 +23636,9 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, || clonei->simdlen > 1024 || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) { - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %wd", clonei->simdlen.to_constant ()); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %wd", clonei->simdlen.to_constant ()); return 0; } @@ -23656,8 +23658,9 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, break; /* FALLTHRU */ default: - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported return type %qT for simd", ret_type); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd", ret_type); return 0; } @@ -23686,13 +23689,14 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, default: if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) break; - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported argument type %qT for simd", arg_type); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd", arg_type); return 0; } } - if (!TREE_PUBLIC (node->decl)) + if (!TREE_PUBLIC (node->decl) || !explicit_p) { /* If the function isn't exported, we can pick up just one ISA for the clones. */ @@ -23763,9 +23767,10 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, cnt /= clonei->vecsize_float; if (cnt > (TARGET_64BIT ? 16 : 8)) { - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported simdlen %wd", - clonei->simdlen.to_constant ()); + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %wd", + clonei->simdlen.to_constant ()); return 0; } } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c176e2dc646..75ed54e8cce 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -204,7 +204,7 @@ in the following sections. -flax-vector-conversions -fms-extensions @gol -foffload=@var{arg} -foffload-options=@var{arg} @gol -fopenacc -fopenacc-dim=@var{geom} @gol --fopenmp -fopenmp-simd @gol +-fopenmp -fopenmp-simd -fopenmp-target-simd-clone @gol -fpermitted-flt-eval-methods=@var{standard} @gol -fplan9-extensions -fsigned-bitfields -funsigned-bitfields @gol -fsigned-char -funsigned-char -fstrict-flex-arrays[=@var{n}] @gol @@ -2761,6 +2761,16 @@ Enable handling of OpenMP's @code{simd}, @code{declare simd}, @code{[[omp::directive(...)]]} and @code{[[omp::sequence(...)]]} in C++ and @code{!$omp} in Fortran. Other OpenMP directives are ignored. +@item -fopenmp-target-simd-clone +@opindex fopenmp-target-simd-clone +@cindex OpenMP target SIMD clone +In addition to generating SIMD clones for functions marked with the +@code{declare simd} directive, GCC also generates clones +for functions marked with the OpenMP @code{declare target} directive +that are suitable for vectorization when this option is in effect. +It is enabled by default at @option{-O2} and higher (but not @option{-Os} +or @option{-Og}). + @item -fpermitted-flt-eval-methods=@var{style} @opindex fpermitted-flt-eval-methods @opindex fpermitted-flt-eval-methods=c11 diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 110f8dfa0a9..78b8871fb87 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6272,7 +6272,7 @@ The default is @code{NULL_TREE} which means to not vectorize scatter stores. @end deftypefn -@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}) +@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool}) This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float} fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also @var{simdlen} field if it was previously 0. diff --git a/gcc/function.h b/gcc/function.h index 098613766be..a706fdd0476 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -438,6 +438,15 @@ struct GTY(()) function { /* Set if there are any OMP_TARGET regions in the function. */ unsigned int has_omp_target : 1; + + /* Set if this function is suitable for auto-creation of OpenMP SIMD + clones. */ + unsigned int auto_simd_clone_candidate : 1; + + /* Set if this function is called in a context where an OpenMP SIMD + clone might be used. Only useful on functions that are also identified + as auto_simd_clone_candidate. */ + unsigned int auto_simd_call_candidate : 1; }; /* Add the decl D to the local_decls list of FUN. */ diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc index fa896340daf..f4c054c2c53 100644 --- a/gcc/lto-streamer-in.cc +++ b/gcc/lto-streamer-in.cc @@ -1318,6 +1318,8 @@ input_struct_function_base (struct function *fn, class data_in *data_in, fn->calls_eh_return = bp_unpack_value (&bp, 1); fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1); fn->has_simduid_loops = bp_unpack_value (&bp, 1); + fn->auto_simd_clone_candidate = bp_unpack_value (&bp, 1); + fn->auto_simd_call_candidate = bp_unpack_value (&bp, 1); fn->va_list_fpr_size = bp_unpack_value (&bp, 8); fn->va_list_gpr_size = bp_unpack_value (&bp, 8); fn->last_clique = bp_unpack_value (&bp, sizeof (short) * 8); diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc index 2e7af03888b..a434e221664 100644 --- a/gcc/lto-streamer-out.cc +++ b/gcc/lto-streamer-out.cc @@ -2278,6 +2278,8 @@ output_struct_function_base (struct output_block *ob, struct function *fn) bp_pack_value (&bp, fn->calls_eh_return, 1); bp_pack_value (&bp, fn->has_force_vectorize_loops, 1); bp_pack_value (&bp, fn->has_simduid_loops, 1); + bp_pack_value (&bp, fn->auto_simd_clone_candidate, 1); + bp_pack_value (&bp, fn->auto_simd_call_candidate, 1); bp_pack_value (&bp, fn->va_list_fpr_size, 8); bp_pack_value (&bp, fn->va_list_gpr_size, 8); bp_pack_value (&bp, fn->last_clique, sizeof (short) * 8); diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index dc42c752017..c7b8b0b894d 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -15055,4 +15055,335 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) } +/* Simple pass to identify functions marked "omp declare target" that are + also candidates for "omp declare simd". The simd_clone IPA pass + generates the clones, here we just mark candidates. It's easier to do + the semantic checks on both functions and call sites before OMP + lowering. */ + + +/* Predicate to check whether the type signature and attributes of + FNDECL appear to be suitable for auto-declare-simd. This does not + check its body, only that it has one. */ +static bool +auto_declare_simd_signature_ok (tree fndecl) +{ + + /* Ignore functions without a definition. */ + if (!gimple_has_body_p (fndecl)) + return false; + + /* Ignore functions that are marked "noclone", "omp declare simd", + or that aren't marked "omp declare target". */ + tree attribs = DECL_ATTRIBUTES (fndecl); + if (lookup_attribute ("noclone", attribs) + || lookup_attribute ("omp declare simd", attribs) + || !lookup_attribute ("omp declare target", attribs)) + return false; + + /* FIXME: OpenACC "#pragma acc routine" translates into + "omp declare target", but appears also to have some other effects + that conflict with generating SIMD clones, causing ICEs. So don't + do this if we've got OpenACC instead of OpenMP. */ + if (oacc_get_fn_attrib (fndecl)) + return false; + + /* Filter out functions with argument types that are not vectorizable. + Backends will check for vectorizable arguments/return types in a + target-specific way when clones are expanded in the simd_clone pass, + but we can immediately filter out functions that have non-scalar + arguments/return types. Also, atomic types trigger warnings in + simd_clone_clauses_extract. */ + if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)) == NULL_TREE) + return false; + tree t = TREE_TYPE (TREE_TYPE (fndecl)); + machine_mode m = TYPE_MODE (t); + if (!(VOID_TYPE_P (t) || is_a (m)) || TYPE_ATOMIC (t)) + return false; + + if (TYPE_ARG_TYPES (TREE_TYPE (fndecl))) + { + for (tree temp = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + temp; temp = TREE_CHAIN (temp)) + { + t = TREE_VALUE (temp); + m = TYPE_MODE (t); + if (!(VOID_TYPE_P (t) || is_a (m)) || TYPE_ATOMIC (t)) + return false; + } + } + else + { + for (tree temp = DECL_ARGUMENTS (fndecl); temp; temp = DECL_CHAIN (temp)) + { + t = TREE_TYPE (temp); + m = TYPE_MODE (t); + if (!(VOID_TYPE_P (t) || is_a (m)) || TYPE_ATOMIC (t)) + return false; + } + } + + /* Success! */ + return true; +} + +/* This code walker is used on the body of a function that is potentially + auto-declare-simd, to filter out things that are not suitable for + SIMD cloning. */ +static tree +check_auto_declare_simd_stmt (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p, + struct walk_stmt_info *wi ATTRIBUTE_UNUSED) +{ + gimple *stmt = gsi_stmt (*gsi_p); + tree decl, attribs; + + /* Arbitrary non-null value to return to indicate failed check. */ + tree retval = integer_one_node; + + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + decl = gimple_call_fndecl (stmt); + + /* We can't know whether indirect calls are safe. */ + if (decl == NULL_TREE) + { + *handled_ops_p = true; + return retval; + } + + /* Calls to functions that are CONST or PURE are ok. */ + if (gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)) + break; + + /* Calls to functions that are already marked "omp declare simd" are + OK. */ + attribs = DECL_ATTRIBUTES (decl); + if (lookup_attribute ("omp declare simd", attribs) + || lookup_attribute ("omp declare auto-simd", attribs)) + break; + + /* Other function calls are not permitted. */ + *handled_ops_p = true; + return retval; + + /* OpenMP directives are not permitted. */ + CASE_GIMPLE_OMP: + *handled_ops_p = true; + return retval; + + /* Conservatively reject all EH-related constructs. */ + case GIMPLE_CATCH: + case GIMPLE_EH_FILTER: + case GIMPLE_EH_MUST_NOT_THROW: + case GIMPLE_EH_ELSE: + case GIMPLE_EH_DISPATCH: + case GIMPLE_RESX: + case GIMPLE_TRY: + *handled_ops_p = true; + return retval; + + /* Asms are not permitted since we don't know what they do. */ + case GIMPLE_ASM: + *handled_ops_p = true; + return retval; + + default: + break; + } + + /* Memory writes are not permitted, nor are statements with volatile + operands. + FIXME: this could be relaxed a little to permit writes to + function-local variables that could not alias other instances + of the function running in parallel. */ + if (gimple_store_p (stmt) || gimple_has_volatile_ops (stmt)) + { + *handled_ops_p = true; + return retval; + } + else + return NULL_TREE; +} + +/* Walker to detect operands that would be invalid in an + auto-declare-simd function, specifically volatile memory accesses. + It seems that gimple_has_volatile_ops has not been + initialized yet when this pass runs, so we have to do our own scan + for this. */ +static tree +check_auto_declare_simd_op (tree *op, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if (TREE_THIS_VOLATILE (*op)) + return *op; + else + return NULL_TREE; +} + + +/* Check whether FNDECL is suitable for auto declare simd, and set + the attribute on it if so. */ +void +maybe_mark_auto_declare_simd (tree fndecl) +{ + /* Ignore functions that already have been identified as auto-simd + candidates. */ + struct function *fn = DECL_STRUCT_FUNCTION (fndecl); + gcc_assert (fn); + if (fn->auto_simd_clone_candidate) + return; + + /* Check the type signature and attributes. */ + if (!auto_declare_simd_signature_ok (fndecl)) + return; + + /* Walk the function body to check for things that would interfere with + vectorization, or be invalid in an "omp declare simd" function. */ + gimple_seq body = gimple_body (fndecl); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + if (walk_gimple_seq (body, check_auto_declare_simd_stmt, + check_auto_declare_simd_op, &wi)) + return; + + /* Mark the function as a candidate, for further processing by the + simd_clone pass. */ + fn->auto_simd_clone_candidate = 1; +} + +/* This code walker is used to scan for calls to functions in contexts + where the vectorizer might be able to make use of a SIMD clone. */ + +tree +auto_declare_simd_scan_for_calls (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p ATTRIBUTE_UNUSED, + struct walk_stmt_info *wi ATTRIBUTE_UNUSED) +{ + gimple *stmt = gsi_stmt (*gsi_p); + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl && auto_declare_simd_signature_ok (fndecl)) + { + struct function *fn = DECL_STRUCT_FUNCTION (fndecl); + if (fn) + fn->auto_simd_call_candidate = 1; + } + } + break; + default: + break; + } + return NULL_TREE; +} + +/* This code walker is used to scan for OMP loops where we might find + calls to functions that are candidates for SIMD cloning. */ + +tree +auto_declare_simd_scan_for_loops (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p ATTRIBUTE_UNUSED, + struct walk_stmt_info *wi ATTRIBUTE_UNUSED) +{ + gimple *stmt = gsi_stmt (*gsi_p); + switch (gimple_code (stmt)) + { + case GIMPLE_OMP_FOR: + { + struct walk_stmt_info inner_wi; + memset (&inner_wi, 0, sizeof (inner_wi)); + walk_gimple_seq (gimple_omp_body (stmt), + auto_declare_simd_scan_for_calls, NULL, &inner_wi); + } + *handled_ops_p = true; + break; + + default: + break; + } + return NULL_TREE; +} + + +/* Main function for this pass. */ + +static unsigned int +execute_auto_declare_simd (void) +{ + gimple_seq body = gimple_body (current_function_decl); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + + /* First scan the current function to see if it is a candidate for auto + declare simd. */ + maybe_mark_auto_declare_simd (current_function_decl); + + /* Now scan the current function to see if it contains calls to things that + might be candidates for use of auto declare simd clones, in a context + where such a clone might actually be used -- namely, if this function is + already marked as "declare simd", or has calls inside an OMP loop. + + Note that the caller and callee may be compiled in either order, that + is why we have to use two different bits in the struct function and + combine them later in the simd_clone IPA pass. */ + + tree attribs = DECL_ATTRIBUTES (current_function_decl); + if (lookup_attribute ("omp declare simd", attribs)) + walk_gimple_seq (body, auto_declare_simd_scan_for_calls, NULL, &wi); + else + walk_gimple_seq (body, auto_declare_simd_scan_for_loops, NULL, &wi); + return 0; +} + + +namespace { + +const pass_data pass_data_omp_auto_declare_simd = +{ + GIMPLE_PASS, /* type */ + "*omp_auto_declare_simd", /* name */ + OPTGROUP_OMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_omp_auto_declare_simd : public gimple_opt_pass +{ +public: + pass_omp_auto_declare_simd (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_auto_declare_simd, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *) final override + { + return ((flag_openacc || flag_openmp || flag_openmp_simd) + && targetm.simd_clone.compute_vecsize_and_simdlen != NULL + && flag_openmp_target_simd_clone + && flag_tree_loop_vectorize); + } + unsigned int execute (function *) final override + { + return execute_auto_declare_simd (); + } + +}; // class pass_omp_auto_declare_simd + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_auto_declare_simd (gcc::context *ctxt) +{ + return new pass_omp_auto_declare_simd (ctxt); +} + + #include "gt-omp-low.h" diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 34cbee5afcd..a1ab05600dd 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "attribs.h" #include "omp-simd-clone.h" +#include "omp-low.h" +#include "omp-general.h" /* Return the number of elements in vector type VECTYPE, which is associated with a SIMD clone. At present these always have a constant length. */ @@ -430,10 +432,12 @@ simd_clone_mangle (struct cgraph_node *node, return get_identifier (str); } -/* Create a simd clone of OLD_NODE and return it. */ +/* Create a simd clone of OLD_NODE and return it. If FORCE_LOCAL is true, + create it as a local symbol, otherwise copy the symbol linkage and + visibility attributes from OLD_NODE. */ static struct cgraph_node * -simd_clone_create (struct cgraph_node *old_node) +simd_clone_create (struct cgraph_node *old_node, bool force_local) { struct cgraph_node *new_node; if (old_node->definition) @@ -463,23 +467,38 @@ simd_clone_create (struct cgraph_node *old_node) return new_node; set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0); - TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); - DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); - DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); - DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl); - DECL_VISIBILITY_SPECIFIED (new_node->decl) - = DECL_VISIBILITY_SPECIFIED (old_node->decl); - DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl); - DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl); - if (DECL_ONE_ONLY (old_node->decl)) - make_decl_one_only (new_node->decl, DECL_ASSEMBLER_NAME (new_node->decl)); - - /* The method cgraph_version_clone_with_body () will force the new - symbol local. Undo this, and inherit external visibility from - the old node. */ - new_node->local = old_node->local; - new_node->externally_visible = old_node->externally_visible; - new_node->calls_declare_variant_alt = old_node->calls_declare_variant_alt; + if (force_local) + { + TREE_PUBLIC (new_node->decl) = 0; + DECL_COMDAT (new_node->decl) = 0; + DECL_WEAK (new_node->decl) = 0; + DECL_EXTERNAL (new_node->decl) = 0; + DECL_VISIBILITY_SPECIFIED (new_node->decl) = 0; + DECL_VISIBILITY (new_node->decl) = VISIBILITY_DEFAULT; + DECL_DLLIMPORT_P (new_node->decl) = 0; + } + else + { + TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); + DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); + DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); + DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl); + DECL_VISIBILITY_SPECIFIED (new_node->decl) + = DECL_VISIBILITY_SPECIFIED (old_node->decl); + DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl); + DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl); + if (DECL_ONE_ONLY (old_node->decl)) + make_decl_one_only (new_node->decl, + DECL_ASSEMBLER_NAME (new_node->decl)); + + /* The method cgraph_version_clone_with_body () will force the new + symbol local. Undo this, and inherit external visibility from + the old node. */ + new_node->local = old_node->local; + new_node->externally_visible = old_node->externally_visible; + new_node->calls_declare_variant_alt + = old_node->calls_declare_variant_alt; + } return new_node; } @@ -1683,13 +1702,34 @@ simd_clone_adjust (struct cgraph_node *node) void expand_simd_clones (struct cgraph_node *node) { - tree attr = lookup_attribute ("omp declare simd", - DECL_ATTRIBUTES (node->decl)); - if (attr == NULL_TREE - || node->inlined_to + tree attr; + bool explicit_p = true; + + if (node->inlined_to || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl))) return; + attr = lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (node->decl)); + + /* See if we can add an "omp declare simd" directive implicitly + before giving up. */ + if (attr == NULL_TREE + && flag_openmp_target_simd_clone) + { + struct function *fn = DECL_STRUCT_FUNCTION (node->decl); + if (fn && fn->auto_simd_clone_candidate + && (fn->auto_simd_call_candidate || in_lto_p)) + { + attr = tree_cons (get_identifier ("omp declare simd"), NULL, + DECL_ATTRIBUTES (node->decl)); + DECL_ATTRIBUTES (node->decl) = attr; + explicit_p = false; + } + } + if (attr == NULL_TREE) + return; + /* Ignore #pragma omp declare simd extern int foo (); @@ -1714,13 +1754,15 @@ expand_simd_clones (struct cgraph_node *node) poly_uint64 orig_simdlen = clone_info->simdlen; tree base_type = simd_clone_compute_base_data_type (node, clone_info); + /* The target can return 0 (no simd clones should be created), 1 (just one ISA of simd clones should be created) or higher count of ISA variants. In that case, clone_info is initialized for the first ISA variant. */ int count = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info, - base_type, 0); + base_type, 0, + explicit_p); if (count == 0) continue; @@ -1745,7 +1787,8 @@ expand_simd_clones (struct cgraph_node *node) /* And call the target hook again to get the right ISA. */ targetm.simd_clone.compute_vecsize_and_simdlen (node, clone, base_type, - i / 2); + i / 2, + explicit_p); if ((i & 1) != 0) clone->inbranch = 1; } @@ -1763,7 +1806,7 @@ expand_simd_clones (struct cgraph_node *node) /* Only when we are sure we want to create the clone actually clone the function (or definitions) or create another extern FUNCTION_DECL (for prototypes without definitions). */ - struct cgraph_node *n = simd_clone_create (node); + struct cgraph_node *n = simd_clone_create (node, !explicit_p); if (n == NULL) { if (i == 0) diff --git a/gcc/opts.cc b/gcc/opts.cc index ae079fcd20e..8d5eb2bbbce 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -658,6 +658,7 @@ static const struct default_options default_options_table[] = REORDER_BLOCKS_ALGORITHM_STC }, { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_ftree_slp_vectorize, NULL, 1 }, + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fopenmp_target_simd_clone, NULL, 1 }, #ifdef INSN_SCHEDULING /* Only run the pre-regalloc scheduling pass if optimizing for speed. */ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fschedule_insns, NULL, 1 }, diff --git a/gcc/passes.def b/gcc/passes.def index 939ec3e29c8..4b29a2c5800 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_diagnose_omp_blocks); NEXT_PASS (pass_diagnose_tm_blocks); NEXT_PASS (pass_omp_oacc_kernels_decompose); + NEXT_PASS (pass_omp_auto_declare_simd); NEXT_PASS (pass_lower_omp); NEXT_PASS (pass_lower_cf); NEXT_PASS (pass_lower_tm); diff --git a/gcc/target.def b/gcc/target.def index a3d3b04a165..0e41a3796ec 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1634,7 +1634,7 @@ fields in @var{simd_clone} structure pointed by @var{clone_info} argument and al not determined by the bitsize (in which case @var{simdlen} is always used).\n\ The hook should return 0 if SIMD clones shouldn't be emitted,\n\ or number of @var{vecsize_mangle} variants that should be emitted.", -int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL) +int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int, bool), NULL) DEFHOOK (adjust, diff --git a/gcc/testsuite/g++.dg/gomp/target-simd-clone-1.C b/gcc/testsuite/g++.dg/gomp/target-simd-clone-1.C new file mode 100644 index 00000000000..85eaffc5e84 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/target-simd-clone-1.C @@ -0,0 +1,26 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are generated for functions with "declare target". */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* Although addit has external linkage, we expect clones to be generated as + for a function with internal linkage. */ + +/* { dg-final { scan-assembler "\\.type\\t_ZGVbN4vv__Z5additii, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler "\\.type\\t_ZGVbM4vv__Z5additii, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.globl\\t_ZGVbN4vv__Z5additii" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.globl\\t_ZGVbM4vv__Z5additii" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/g++.dg/gomp/target-simd-clone-2.C b/gcc/testsuite/g++.dg/gomp/target-simd-clone-2.C new file mode 100644 index 00000000000..cff5bfdcef3 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/target-simd-clone-2.C @@ -0,0 +1,23 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for "declare target" + functions that throw. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + if (a < 0) throw -1; + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "\\.type\\t_ZGVbN4vv__Z5additii, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.type\\t_ZGVbM4vv__Z5additii, @function" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-1.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-1.c new file mode 100644 index 00000000000..6b5a196169e --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-1.c @@ -0,0 +1,26 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are generated for functions with "declare target". */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* Although addit has external linkage, we expect clones to be generated as + for a function with internal linkage. */ + +/* { dg-final { scan-assembler "\\.type\\t_ZGVbN4vv_addit, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler "\\.type\\t_ZGVbM4vv_addit, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.globl\\t_ZGVbN4vv_addit" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.globl\\t_ZGVbM4vv_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-2.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-2.c new file mode 100644 index 00000000000..4049bb33784 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-2.c @@ -0,0 +1,26 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for functions with + "declare target" but unsuitable arguments. */ + +struct s { + int a; + int b; +}; + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (struct s x) +{ + return x.a + x.b; +} +#pragma omp end declare target + +void callit (struct s *ss, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (ss[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-3.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-3.c new file mode 100644 index 00000000000..a03e4764f8f --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-3.c @@ -0,0 +1,25 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for functions with + "declare target" but that call possibly side-effecting functions + in the body. */ + +extern int f (int); + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit(int a, int b) +{ + return f(a) + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ + diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-4.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-4.c new file mode 100644 index 00000000000..7161b927d7b --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-4.c @@ -0,0 +1,24 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for functions with + "declare target" but that write memory in the body. */ + +extern int save; + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit(int a, int b) +{ + save = a; + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-5.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-5.c new file mode 100644 index 00000000000..f32ff9cd5d0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-5.c @@ -0,0 +1,21 @@ +/* { dg-options "-fopenmp -Os" } */ + +/* Test that simd clones are not generated for functions with + "declare target" at -Os. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-6.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-6.c new file mode 100644 index 00000000000..2a69d2b16e8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-6.c @@ -0,0 +1,21 @@ +/* { dg-options "-fopenmp -Og" } */ + +/* Test that simd clones are not generated for functions with + "declare target" at -Og. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-7.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-7.c new file mode 100644 index 00000000000..40627d69857 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-7.c @@ -0,0 +1,22 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for functions with + "declare target" when there is no call site likely to use them. */ + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit (int a, int b) +{ + return a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + for (i = 0; i < 16; i++) + /* This call is not in an OMP loop. */ + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "\\.type\\t_ZGVbN4vv_addit, @function" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-assembler-not "\\.type\\t_ZGVbM4vv_addit, @function" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/target-simd-clone-8.c b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-8.c new file mode 100644 index 00000000000..4a05ade1d0e --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/target-simd-clone-8.c @@ -0,0 +1,23 @@ +/* { dg-options "-fopenmp -O2" } */ + +/* Test that simd clones are not generated for functions with + "declare target" but that read volatile memory in the body. */ + +extern volatile int save; + +#pragma omp declare target +__attribute__ ((__noinline__)) int addit(int a, int b) +{ + return save + a + b; +} +#pragma omp end declare target + +void callit (int *a, int *b, int *c) +{ + int i; + #pragma omp for simd + for (i = 0; i < 16; i++) + c[i] = addit (a[i], b[i]); +} + +/* { dg-final { scan-assembler-not "_Z.*_addit" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 4dfe05ed8e0..c6ac85522fa 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -424,6 +424,7 @@ extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_oacc_kernels_decompose (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_omp_auto_declare_simd (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);