diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 70303d6fd953e0c397b9138ede8858c2db2e53db..d7888c95a4999fad1a4c55d5cd2287c2040302c8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1001,6 +1001,8 @@ namespace aarch64_sve { #ifdef GCC_TARGET_H bool verify_type_context (location_t, type_context_kind, const_tree, bool); #endif + void add_sve_type_attribute (tree, unsigned int, unsigned int, + const char *, const char *); } extern void aarch64_split_combinev16qi (rtx operands[3]); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 161a14edde7c9fb1b13b146cf50463e2d78db264..6f99c438d10daa91b7e3b623c995489f1a8a0f4c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -569,14 +569,16 @@ static bool reported_missing_registers_p; /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors and NUM_PR SVE predicates. MANGLED_NAME, if nonnull, is the ABI-defined mangling of the type. ACLE_NAME is the name of the type. */ -static void +void add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr, const char *mangled_name, const char *acle_name) { tree mangled_name_tree = (mangled_name ? get_identifier (mangled_name) : NULL_TREE); + tree acle_name_tree + = (acle_name ? get_identifier (acle_name) : NULL_TREE); - tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE); + tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE); value = tree_cons (NULL_TREE, mangled_name_tree, value); value = tree_cons (NULL_TREE, size_int (num_pr), value); value = tree_cons (NULL_TREE, size_int (num_zr), value); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a13d3fba05f9f9d2989b36c681bc77d71e943e0d..492acb9ce081866162faa8dfca777e4cb943797f 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -4034,13 +4034,13 @@ aarch64_takes_arguments_in_sve_regs_p (const_tree fntype) static const predefined_function_abi & aarch64_fntype_abi (const_tree fntype) { - if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) - return aarch64_simd_abi (); - if (aarch64_returns_value_in_sve_regs_p (fntype) || aarch64_takes_arguments_in_sve_regs_p (fntype)) return aarch64_sve_abi (); + if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) + return aarch64_simd_abi (); + return default_function_abi; } @@ -27327,7 +27327,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, int num, bool explicit_p) { tree t, ret_type; - unsigned int nds_elt_bits; + unsigned int nds_elt_bits, wds_elt_bits; int count; unsigned HOST_WIDE_INT const_simdlen; poly_uint64 vec_bits; @@ -27374,10 +27374,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, if (TREE_CODE (ret_type) != VOID_TYPE) { nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type); + wds_elt_bits = nds_elt_bits; vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits)); } else - nds_elt_bits = POINTER_SIZE; + { + nds_elt_bits = POINTER_SIZE; + wds_elt_bits = 0; + } int i; tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); @@ -27385,30 +27389,36 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; t && t != void_list_node; t = TREE_CHAIN (t), i++) { - tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); + tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM - && !supported_simd_type (arg_type)) + && !supported_simd_type (type)) { if (!explicit_p) ; - else if (COMPLEX_FLOAT_TYPE_P (ret_type)) + else if (COMPLEX_FLOAT_TYPE_P (type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support argument type %qT " - "for simd", arg_type); + "for simd", type); else warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "unsupported argument type %qT for simd", - arg_type); + type); return 0; } - unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type); + unsigned lane_bits = lane_size (clonei->args[i].arg_type, type); if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - vec_elts.safe_push (std::make_pair (arg_type, lane_bits)); + vec_elts.safe_push (std::make_pair (type, lane_bits)); if (nds_elt_bits > lane_bits) nds_elt_bits = lane_bits; + else if (wds_elt_bits < lane_bits) + wds_elt_bits = lane_bits; } - clonei->vecsize_mangle = 'n'; + /* If we could not determine the WDS type from available parameters/return, + then fallback to using uintptr_t. */ + if (wds_elt_bits == 0) + wds_elt_bits = POINTER_SIZE; + clonei->mask_mode = VOIDmode; poly_uint64 simdlen; auto_vec simdlens (2); @@ -27419,6 +27429,11 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, simdlen = exact_div (poly_uint64 (64), nds_elt_bits); simdlens.safe_push (simdlen); simdlens.safe_push (simdlen * 2); + /* Only create a SVE simd clone if we aren't dealing with an unprototyped + function. */ + if (DECL_ARGUMENTS (node->decl) != 0 + || type_arg_types != 0) + simdlens.safe_push (exact_div (poly_uint64 (128, 128), wds_elt_bits)); } else simdlens.safe_push (clonei->simdlen); @@ -27439,19 +27454,20 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, while (j < count && !simdlens.is_empty ()) { bool remove_simdlen = false; - for (auto elt : vec_elts) - if (known_gt (simdlens[j] * elt.second, 128U)) - { - /* Don't issue a warning for every simdclone when there is no - specific simdlen clause. */ - if (explicit_p && known_ne (clonei->simdlen, 0U)) - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support simdlen %wd for " - "type %qT", - constant_lower_bound (simdlens[j]), elt.first); - remove_simdlen = true; - break; - } + if (simdlens[j].is_constant ()) + for (auto elt : vec_elts) + if (known_gt (simdlens[j] * elt.second, 128U)) + { + /* Don't issue a warning for every simdclone when there is no + specific simdlen clause. */ + if (explicit_p && known_ne (clonei->simdlen, 0U)) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support simdlen %wd for " + "type %qT", + constant_lower_bound (simdlens[j]), elt.first); + remove_simdlen = true; + break; + } if (remove_simdlen) { count--; @@ -27479,6 +27495,13 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, gcc_assert (num < count); clonei->simdlen = simdlens[num]; + if (clonei->simdlen.is_constant ()) + clonei->vecsize_mangle = 'n'; + else + { + clonei->vecsize_mangle = 's'; + clonei->inbranch = 1; + } return count; } @@ -27493,6 +27516,11 @@ aarch64_simd_clone_adjust (struct cgraph_node *node) tree t = TREE_TYPE (node->decl); TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default", TYPE_ATTRIBUTES (t)); + if (node->simdclone->vecsize_mangle == 's') + { + tree target = build_string (strlen ("+sve"), "+sve"); + aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0); + } } /* Implement TARGET_SIMD_CLONE_USABLE. */ @@ -27517,6 +27545,57 @@ aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode) } } +/* Implement TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM. */ + +static tree +aarch64_simd_clone_adjust_ret_or_param (cgraph_node *node, tree type, + bool is_mask) +{ + if (type + && VECTOR_TYPE_P (type) + && node->simdclone->vecsize_mangle == 's') + { + cl_target_option cur_target; + cl_target_option_save (&cur_target, &global_options, &global_options_set); + tree new_target = DECL_FUNCTION_SPECIFIC_TARGET (node->decl); + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (new_target)); + aarch64_override_options_internal (&global_options); + bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; + memcpy (m_old_have_regs_of_mode, have_regs_of_mode, + sizeof (have_regs_of_mode)); + for (int i = 0; i < NUM_MACHINE_MODES; ++i) + if (aarch64_sve_mode_p ((machine_mode) i)) + have_regs_of_mode[i] = true; + poly_uint16 old_sve_vg = aarch64_sve_vg; + if (!node->simdclone->simdlen.is_constant ()) + aarch64_sve_vg = poly_uint16 (2, 2); + unsigned int num_zr = 0; + unsigned int num_pr = 0; + type = TREE_TYPE (type); + type = build_vector_type (type, node->simdclone->simdlen); + if (is_mask) + { + type = truth_type_for (type); + num_pr = 1; + } + else + num_zr = 1; + + aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL); + cl_target_option_restore (&global_options, &global_options_set, &cur_target); + aarch64_override_options_internal (&global_options); + memcpy (have_regs_of_mode, m_old_have_regs_of_mode, + sizeof (have_regs_of_mode)); + aarch64_sve_vg = old_sve_vg; + } + else if (type + && VECTOR_TYPE_P (type) + && is_mask) + type = truth_type_for (type); + return type; +} + /* Implement TARGET_COMP_TYPE_ATTRIBUTES */ static int @@ -28590,6 +28669,10 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SIMD_CLONE_ADJUST #define TARGET_SIMD_CLONE_ADJUST aarch64_simd_clone_adjust +#undef TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM +#define TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM \ + aarch64_simd_clone_adjust_ret_or_param + #undef TARGET_SIMD_CLONE_USABLE #define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index c2fd4d3be878e56b6394e34097d2de826a0ba1ff..091f194f1829fb9f70827d8674fd4dae44282d55 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -541,9 +541,12 @@ simd_clone_mangle (struct cgraph_node *node, pp_string (&pp, "_ZGV"); pp_character (&pp, vecsize_mangle); pp_character (&pp, mask); - /* For now, simdlen is always constant, while variable simdlen pp 'n'. */ - unsigned int len = simdlen.to_constant (); - pp_decimal_int (&pp, (len)); + + unsigned long long len = 0; + if (simdlen.is_constant (&len)) + pp_decimal_int (&pp, (int) (len)); + else + pp_character (&pp, 'x'); for (n = 0; n < clone_info->nargs; ++n) { @@ -1499,8 +1502,8 @@ simd_clone_adjust (struct cgraph_node *node) below). */ loop = alloc_loop (); cfun->has_force_vectorize_loops = true; - /* For now, simlen is always constant. */ - loop->safelen = node->simdclone->simdlen.to_constant (); + /* We can assert that safelen is the 'minimum' simdlen. */ + loop->safelen = constant_lower_bound (node->simdclone->simdlen); loop->force_vectorize = true; loop->header = body_bb; } diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c index e3668893afe33a58c029cddd433d9bf43cce2bfa..12f8b3b839b7f3ff9e4f99768e59c0e1c5339062 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c @@ -21,7 +21,7 @@ test1 (int x) shall call f01 with score 8. */ /* { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } } */ /* { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target { !aarch64*-*-* } } } } */ - /* { dg-final { scan-tree-dump-times "f03 \\\(x" 10 "optimized" { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-tree-dump-times "f03 \\\(x" 12 "optimized" { target { aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { !aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { aarch64*-*-* } } } } */ int a = f04 (x); diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c index aab8c17f0c442a7cda4dce23cc18162a0b7f676e..add6e7c93019834fbd5bed5ead18b52d4cdd0a37 100644 --- a/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c @@ -4,28 +4,39 @@ extern "C" { #endif #pragma omp declare simd -int __attribute__ ((const)) f00 (int a , char b) /* { dg-warning {GCC does not currently support a simdclone with simdlens 8 and 16 for these types.} } */ +int __attribute__ ((const)) f00 (int a , char b) { return a + b; } +/* { dg-final { scan-assembler-not {_ZGVn[a-z0-9]+_f00} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvv_f00} } } */ + #pragma omp declare simd -long long __attribute__ ((const)) f01 (int a , short b) /* { dg-warning {GCC does not currently support a simdclone with simdlens 4 and 8 for these types.} } */ +long long __attribute__ ((const)) f01 (int a , short b) { return a + b; } +/* { dg-final { scan-assembler-not {_ZGVn[a-z0-9]+_f01} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvv_f01} } } */ #pragma omp declare simd linear(b) -long long __attribute__ ((const)) f02 (short *b, int a) /* { dg-warning {GCC does not currently support a simdclone with simdlens 4 and 8 for these types.} } */ +long long __attribute__ ((const)) f02 (short *b, int a) { return a + *b; } +/* { dg-final { scan-assembler-not {_ZGVn[a-z0-9]+_f02} } } */ +/* { dg-final { scan-assembler {_ZGVsMxl2v_f02} } } */ + #pragma omp declare simd uniform(b) -void f03 (char b, int a) /* { dg-warning {GCC does not currently support a simdclone with simdlens 8 and 16 for these types.} } */ +void f03 (char b, int a) { } +/* { dg-final { scan-assembler-not {_ZGVn[a-z0-9]+_f03} } } */ +/* { dg-final { scan-assembler {_ZGVsMxuv_f03} } } */ + #pragma omp declare simd simdlen(4) double f04 (void) /* { dg-warning {GCC does not currently support simdlen 4 for type 'double'} } */ { diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c index abb128ffc9cd2c1353b99eb38aae72377746e6d6..604869a30456e4db988bba86e059a27f19dda589 100644 --- a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c @@ -10,6 +10,7 @@ short __attribute__ ((const)) f00 (short a , char b) } /* { dg-final { scan-assembler {_ZGVnN8vv_f00:} } } */ /* { dg-final { scan-assembler {_ZGVnM8vv_f00:} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvv_f00:} } } */ #pragma omp declare simd notinbranch short __attribute__ ((const)) f01 (int a , short b) @@ -17,6 +18,7 @@ short __attribute__ ((const)) f01 (int a , short b) return a + b; } /* { dg-final { scan-assembler {_ZGVnN4vv_f01:} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvv_f01:} } } */ #pragma omp declare simd linear(b) inbranch int __attribute__ ((const)) f02 (int a, short *b) @@ -24,6 +26,7 @@ int __attribute__ ((const)) f02 (int a, short *b) return a + *b; } /* { dg-final { scan-assembler {_ZGVnM4vl2_f02:} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvl2_f02:} } } */ #pragma omp declare simd uniform(a) notinbranch void f03 (char b, int a) @@ -31,6 +34,7 @@ void f03 (char b, int a) } /* { dg-final { scan-assembler {_ZGVnN8vu_f03:} } } */ /* { dg-final { scan-assembler {_ZGVnN16vu_f03:} } } */ +/* { dg-final { scan-assembler {_ZGVsMxvu_f03:} } } */ #pragma omp declare simd simdlen(2) float f04 (double a) @@ -39,6 +43,7 @@ float f04 (double a) } /* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */ /* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */ +/* { dg-final { scan-assembler-not {_ZGVs[0-9a-z]*_f04:} } } */ #pragma omp declare simd uniform(a) linear (b) void f05 (short a, short *b, short c) @@ -50,6 +55,7 @@ void f05 (short a, short *b, short c) /* { dg-final { scan-assembler {_ZGVnN4ul2v_f05:} } } */ /* { dg-final { scan-assembler {_ZGVnM8ul2v_f05:} } } */ /* { dg-final { scan-assembler {_ZGVnM8ul2v_f05:} } } */ +/* { dg-final { scan-assembler {_ZGVsMxul2v_f05:} } } */ #ifdef __cplusplus } #endif diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 index 6319df0558f37b95f1b2eb17374bdb4ecbc33295..38677b8f7a76b960ce9363b1c0cabf6fc5086ab6 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 @@ -41,7 +41,7 @@ contains ! shall call f01 with score 8. ! { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } } ! { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target { !aarch64*-*-* } } } } - ! { dg-final { scan-tree-dump-times "f03 \\\(x" 6 "optimized" { target { aarch64*-*-* } } } } + ! { dg-final { scan-tree-dump-times "f03 \\\(x" 8 "optimized" { target { aarch64*-*-* } } } } ! { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { !aarch64*-*-* } } } } ! { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { aarch64*-*-* } } } } a = f04 (x)