diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index f75eb892f3daa7c2576efcedc8d944ab1e895cdb..122a473770eb4526ecce326f02d843608d088b5b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -995,6 +995,8 @@ namespace aarch64_sve { #ifdef GCC_TARGET_H bool verify_type_context (location_t, type_context_kind, const_tree, bool); #endif + void add_sve_type_attribute (tree, unsigned int, unsigned int, + const char *, const char *); } extern void aarch64_split_combinev16qi (rtx operands[3]); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 161a14edde7c9fb1b13b146cf50463e2d78db264..6f99c438d10daa91b7e3b623c995489f1a8a0f4c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -569,14 +569,16 @@ static bool reported_missing_registers_p; /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors and NUM_PR SVE predicates. MANGLED_NAME, if nonnull, is the ABI-defined mangling of the type. ACLE_NAME is the name of the type. */ -static void +void add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr, const char *mangled_name, const char *acle_name) { tree mangled_name_tree = (mangled_name ? get_identifier (mangled_name) : NULL_TREE); + tree acle_name_tree + = (acle_name ? get_identifier (acle_name) : NULL_TREE); - tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE); + tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE); value = tree_cons (NULL_TREE, mangled_name_tree, value); value = tree_cons (NULL_TREE, size_int (num_pr), value); value = tree_cons (NULL_TREE, size_int (num_zr), value); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 5c40b6ed22a508723bd535a7460762c3a243d441..ef93a4e9d43799df4410f152cdd798db285e8897 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -4015,13 +4015,13 @@ aarch64_takes_arguments_in_sve_regs_p (const_tree fntype) static const predefined_function_abi & aarch64_fntype_abi (const_tree fntype) { - if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) - return aarch64_simd_abi (); - if (aarch64_returns_value_in_sve_regs_p (fntype) || aarch64_takes_arguments_in_sve_regs_p (fntype)) return aarch64_sve_abi (); + if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) + return aarch64_simd_abi (); + return default_function_abi; } @@ -26968,14 +26968,21 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, } } - clonei->vecsize_mangle = 'n'; clonei->mask_mode = VOIDmode; elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); if (known_eq (clonei->simdlen, 0U)) { - count = 2; - vec_bits = (num == 0 ? 64 : 128); - clonei->simdlen = exact_div (vec_bits, elt_bits); + if (num >= 2) + { + vec_bits = poly_uint64 (128, 128); + clonei->simdlen = exact_div (vec_bits, elt_bits); + } + else + { + count = 3; + vec_bits = (num == 0 ? 64 : 128); + clonei->simdlen = exact_div (vec_bits, elt_bits); + } } else { @@ -26994,6 +27001,15 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, return 0; } } + + if (num >= 2) + { + clonei->vecsize_mangle = 's'; + clonei->inbranch = 1; + } + else + clonei->vecsize_mangle = 'n'; + clonei->vecsize_int = vec_bits; clonei->vecsize_float = vec_bits; return count; @@ -27010,17 +27026,28 @@ aarch64_simd_clone_adjust (struct cgraph_node *node) tree t = TREE_TYPE (node->decl); TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", TYPE_ATTRIBUTES (t)); + if (node->simdclone->vecsize_mangle == 's') + { + tree target = build_string (strlen ("+sve"), "+sve"); + aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0); + } } /* Implement TARGET_SIMD_CLONE_USABLE. */ static int -aarch64_simd_clone_usable (struct cgraph_node *node) +aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode) { switch (node->simdclone->vecsize_mangle) { case 'n': - if (!TARGET_SIMD) + if (!TARGET_SIMD + || aarch64_sve_mode_p (vector_mode)) + return -1; + return 0; + case 's': + if (!TARGET_SVE + || !aarch64_sve_mode_p (vector_mode)) return -1; return 0; default: @@ -27028,6 +27055,61 @@ aarch64_simd_clone_usable (struct cgraph_node *node) } } +/* Implement TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM. */ + +static tree +aarch64_simd_clone_adjust_ret_or_param (struct cgraph_node *node, tree type, + bool is_mask) +{ + if (type + && VECTOR_TYPE_P (type) + && node->simdclone->vecsize_mangle == 's') + { + cl_target_option cur_target; + cl_target_option_save (&cur_target, &global_options, &global_options_set); + tree new_target = DECL_FUNCTION_SPECIFIC_TARGET (node->decl); + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (new_target)); + aarch64_override_options_internal (&global_options); + bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; + memcpy (m_old_have_regs_of_mode, have_regs_of_mode, + sizeof (have_regs_of_mode)); + for (int i = 0; i < NUM_MACHINE_MODES; ++i) + if (aarch64_sve_mode_p ((machine_mode) i)) + have_regs_of_mode[i] = true; + poly_uint16 old_sve_vg = aarch64_sve_vg; + if (!node->simdclone->simdlen.is_constant ()) + aarch64_sve_vg = poly_uint16 (2, 2); + unsigned int num_zr = 0; + unsigned int num_pr = 0; + if (is_mask) + { + type = truth_type_for (type); + num_pr = 1; + } + else + { + num_zr = 1; + tree base_type = TREE_TYPE (type); + if (POINTER_TYPE_P (base_type)) + base_type = pointer_sized_int_node; + poly_int64 vec_size = tree_to_poly_int64 (TYPE_SIZE (type)); + scalar_mode base_mode = as_a (TYPE_MODE (base_type)); + machine_mode vec_mode + = aarch64_simd_container_mode (base_mode, vec_size); + type = build_vector_type_for_mode (base_type, vec_mode); + } + + aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL); + cl_target_option_restore (&global_options, &global_options_set, &cur_target); + aarch64_override_options_internal (&global_options); + memcpy (have_regs_of_mode, m_old_have_regs_of_mode, + sizeof (have_regs_of_mode)); + aarch64_sve_vg = old_sve_vg; + } + return type; +} + /* Implement TARGET_COMP_TYPE_ATTRIBUTES */ static int @@ -28048,6 +28130,10 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SIMD_CLONE_USABLE #define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable +#undef TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM +#define TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM \ + aarch64_simd_clone_adjust_ret_or_param + #undef TARGET_COMP_TYPE_ATTRIBUTES #define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c6c891972d1e58cd163b259ba96a599d62326865..ed12271027305a0017cb9b2ff821bad403c52836 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6306,11 +6306,16 @@ This hook should add implicit @code{attribute(target("..."))} attribute to SIMD clone @var{node} if needed. @end deftypefn -@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{}) +@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{}, @var{machine_mode}) This hook should return -1 if SIMD clone @var{node} shouldn't be used -in vectorized loops in current function, or non-negative number if it is -usable. In that case, the smaller the number is, the more desirable it is -to use it. +in vectorized loops being vectorized with mode @var{m} in current function, or +non-negative number if it is usable. In that case, the smaller the number is, +the more desirable it is to use it. +@end deftypefn + +@deftypefn {Target Hook} tree TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM (struct cgraph_node *@var{}, @var{tree}, @var{bool}) +If defined, this hook should adjust the type of the return or parameter +@var{type} to be used by the simd clone @var{node}. @end deftypefn @deftypefn {Target Hook} int TARGET_SIMT_VF (void) diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 613b2534149415f442163d599503efaf423b673b..fd0d2c8d0dcc2fd249b34745d77749d99c49d13d 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4205,6 +4205,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_SIMD_CLONE_USABLE +@hook TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM + @hook TARGET_SIMT_VF @hook TARGET_OMP_DEVICE_KIND_ARCH_ISA diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 48b480e7556d9ad8e5502e10e513ec36b17b9cbb..4808608b7a1c06802ee231480c2003cf41c11799 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -378,8 +378,9 @@ simd_clone_clauses_extract (struct cgraph_node *node, tree clauses, arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP; clone_info->args[argno].arg_type = arg_type; clone_info->args[argno].linear_step = tree_to_shwi (step); + int nargs = clone_info->nargs; gcc_assert (clone_info->args[argno].linear_step >= 0 - && clone_info->args[argno].linear_step < n); + && clone_info->args[argno].linear_step < nargs); } else { @@ -541,9 +542,12 @@ simd_clone_mangle (struct cgraph_node *node, pp_string (&pp, "_ZGV"); pp_character (&pp, vecsize_mangle); pp_character (&pp, mask); - /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ - unsigned int len = simdlen.to_constant (); - pp_decimal_int (&pp, (len)); + + unsigned long long len = 0; + if (simdlen.is_constant (&len)) + pp_decimal_int (&pp, (int) (len)); + else + pp_character (&pp, 'x'); for (n = 0; n < clone_info->nargs; ++n) { @@ -736,6 +740,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node) t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen, veclen)); } + t = targetm.simd_clone.adjust_ret_or_param (node, t, false); TREE_TYPE (TREE_TYPE (fndecl)) = t; if (!node->definition) return NULL_TREE; @@ -748,6 +753,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node) tree atype = build_array_type_nelts (orig_rettype, node->simdclone->simdlen); + atype = targetm.simd_clone.adjust_ret_or_param (node, atype, false); if (maybe_ne (veclen, node->simdclone->simdlen)) return build1 (VIEW_CONVERT_EXPR, atype, t); @@ -807,8 +813,14 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) { ipa_adjusted_param adj; memset (&adj, 0, sizeof (adj)); - tree parm = args[i]; - tree parm_type = node->definition ? TREE_TYPE (parm) : parm; + tree parm = NULL_TREE; + tree parm_type = NULL_TREE; + if(i < args.length()) + { + parm = args[i]; + parm_type = node->definition ? TREE_TYPE (parm) : parm; + } + adj.base_index = i; adj.prev_clone_index = i; @@ -874,6 +886,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) ? IDENTIFIER_POINTER (DECL_NAME (parm)) : NULL, parm_type, sc->simdlen); } + adj.type = targetm.simd_clone.adjust_ret_or_param (node, adj.type, + false); vec_safe_push (new_params, adj); } @@ -906,6 +920,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) adj.type = build_vector_type (pointer_sized_int_node, veclen); else adj.type = build_vector_type (base_type, veclen); + adj.type = targetm.simd_clone.adjust_ret_or_param (node, adj.type, + true); vec_safe_push (new_params, adj); k = vector_unroll_factor (sc->simdlen, veclen); @@ -931,6 +947,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) sc->args[i].simd_array = NULL_TREE; } sc->args[i].orig_type = base_type; + sc->args[i].vector_type = adj.type; sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; } @@ -1485,8 +1502,8 @@ simd_clone_adjust (struct cgraph_node *node) below). */ loop = alloc_loop (); cfun->has_force_vectorize_loops = true; - /* For now, simlen is always constant. */ - loop->safelen = node->simdclone->simdlen.to_constant (); + /* We can assert that safelen is the 'minimum' simdlen. */ + loop->safelen = constant_lower_bound (node->simdclone->simdlen); loop->force_vectorize = true; loop->header = body_bb; } @@ -1546,7 +1563,7 @@ simd_clone_adjust (struct cgraph_node *node) mask = gimple_assign_lhs (g); g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)), BIT_AND_EXPR, mask, - build_int_cst (TREE_TYPE (mask), 1)); + build_one_cst (TREE_TYPE (mask))); gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING); mask = gimple_assign_lhs (g); } diff --git a/gcc/target.def b/gcc/target.def index db8af0cbe81624513f114fc9bbd8be61d855f409..ffa12aa9023bb8f26a647a9848800c77f34afc67 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1645,10 +1645,18 @@ void, (struct cgraph_node *), NULL) DEFHOOK (usable, "This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\ -in vectorized loops in current function, or non-negative number if it is\n\ -usable. In that case, the smaller the number is, the more desirable it is\n\ -to use it.", -int, (struct cgraph_node *), NULL) +in vectorized loops being vectorized with mode @var{m} in current function, or\n\ +non-negative number if it is usable. In that case, the smaller the number is,\n\ +the more desirable it is to use it.", +int, (struct cgraph_node *, machine_mode), NULL) + +DEFHOOK +(adjust_ret_or_param, +"If defined, this hook should adjust the type of the return or parameter\n\ +@var{type} to be used by the simd clone @var{node}.", +tree, (struct cgraph_node *, tree, bool), +default_simd_clone_adjust_ret_or_param) + HOOK_VECTOR_END (simd_clone) diff --git a/gcc/targhooks.h b/gcc/targhooks.h index a1df260f5483dc84f18d8f12c5202484a32d5bb7..860fb8ccbf1ab00c43dc4b4d32808c1f488406e4 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -73,6 +73,9 @@ extern void default_print_operand (FILE *, rtx, int); extern void default_print_operand_address (FILE *, machine_mode, rtx); extern bool default_print_operand_punct_valid_p (unsigned char); extern tree default_mangle_assembler_name (const char *); +extern tree default_simd_clone_adjust_ret_or_param + (struct cgraph_node *,tree , bool); + extern machine_mode default_translate_mode_attribute (machine_mode); extern bool default_scalar_mode_supported_p (scalar_mode); diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index fe0116521feaf32187e7bc113bf93b1805852c79..4e54ceb0297828cf13e418dfa113651670a6f112 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -398,6 +398,16 @@ default_mangle_assembler_name (const char *name ATTRIBUTE_UNUSED) return get_identifier (stripped); } +/* The default implementation of TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM. */ + +tree +default_simd_clone_adjust_ret_or_param (struct cgraph_node *node ATTRIBUTE_UNUSED, + tree type, + bool is_return ATTRIBUTE_UNUSED) +{ + return type; +} + /* The default implementation of TARGET_TRANSLATE_MODE_ATTRIBUTE. */ machine_mode diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c85b6babc4bc5bc3111ef326dcc8f32bb25333f6..da6aa3f193bd52a1e40bb6dbe3d483f92ecd7896 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2759,7 +2759,8 @@ vect_build_all_ones_mask (vec_info *vinfo, { if (TREE_CODE (masktype) == INTEGER_TYPE) return build_int_cst (masktype, -1); - else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) + else if (VECTOR_BOOLEAN_TYPE_P (masktype) + || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) { tree mask = build_int_cst (TREE_TYPE (masktype), -1); mask = build_vector_from_val (masktype, mask); @@ -4136,14 +4137,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, } poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - if (!vf.is_constant ()) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not considering SIMD clones; not yet supported" - " for variable-width vectors.\n"); - return false; - } unsigned int badness = 0; struct cgraph_node *bestn = NULL; @@ -4156,20 +4149,17 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int this_badness = 0; unsigned int num_calls; if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls) - || n->simdclone->nargs != nargs) + || n->simdclone->nargs != (nargs + n->simdclone->inbranch)) continue; if (num_calls != 1) this_badness += exact_log2 (num_calls) * 4096; if (n->simdclone->inbranch) this_badness += 8192; - int target_badness = targetm.simd_clone.usable (n); + int target_badness = targetm.simd_clone.usable (n, vinfo->vector_mode); if (target_badness < 0) continue; this_badness += target_badness * 512; - /* FORNOW: Have to add code to add the mask argument. */ - if (n->simdclone->inbranch) - continue; - for (i = 0; i < nargs; i++) + for (i = 0; i < n->simdclone->nargs; i++) { switch (n->simdclone->args[i].arg_type) { @@ -4206,16 +4196,22 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, i = -1; break; case SIMD_CLONE_ARG_TYPE_MASK: - gcc_unreachable (); + /* Penalize using a predicated SIMD clone in a non-masked loop, + as we'd have to needlessly construct an all-true mask. */ + if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + this_badness += 64; + break; } if (i == (size_t) -1) break; - if (n->simdclone->args[i].alignment > arginfo[i].align) + if (i < nargs + && n->simdclone->args[i].alignment > arginfo[i].align) { i = -1; break; } - if (arginfo[i].align) + if (i < nargs + && arginfo[i].align) this_badness += (exact_log2 (arginfo[i].align) - exact_log2 (n->simdclone->args[i].alignment)); } @@ -4248,6 +4244,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, fndecl = bestn->decl; nunits = bestn->simdclone->simdlen; ncopies = vector_unroll_factor (vf, nunits); + nargs = bestn->simdclone->nargs; /* If the function isn't const, only allow it in simd loops where user has asserted that at least nunits consecutive iterations can be @@ -4331,11 +4328,45 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, for (i = 0; i < nargs; i++) { - unsigned int k, l, m, o; + unsigned long long k, l, m, o; tree atype; - op = gimple_call_arg (stmt, i); + if (i < gimple_call_num_args (stmt)) + op = gimple_call_arg (stmt, i); + else + op = NULL_TREE; + switch (bestn->simdclone->args[i].arg_type) { + case SIMD_CLONE_ARG_TYPE_MASK: + { + tree mask; + atype = bestn->simdclone->args[i].vector_type; + if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + { + vec_loop_masks *loop_masks + = &LOOP_VINFO_MASKS (loop_vinfo); + mask = vect_get_loop_mask (gsi, loop_masks, ncopies, + vectype, j); + } + else + { + tree mask_type = bestn->simdclone->args[i].vector_type; + mask + = vect_build_all_ones_mask (vinfo, stmt_info, + mask_type); + } + if (!useless_type_conversion_p (TREE_TYPE (mask), atype)) + { + mask = build1 (VIEW_CONVERT_EXPR, atype, mask); + gassign *new_stmt + = gimple_build_assign (make_ssa_name (atype), mask); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + mask = gimple_assign_lhs (new_stmt); + } + vargs.safe_push (mask); + } + break; case SIMD_CLONE_ARG_TYPE_VECTOR: atype = bestn->simdclone->args[i].vector_type; o = vector_unroll_factor (nunits,