From 0eda8e538c7f7d4036d9decceb714acf3314f885 Mon Sep 17 00:00:00 2001 From: Zhijin Zeng Date: Thu, 31 Oct 2024 18:13:19 +0800 Subject: [PATCH] RISC-V: support vector math library for risc-v Add risc-v vector function mangling rules as follow: _ZGVNv_ 'x' is the LMUL, if the LMUL is 1/2/4/8 and 'x' is 1/2/4/8. 'y' is the count of elements also 'simdlen' in gcc. 'func_name' is the scalar function name. gcc/ChangeLog: * config/riscv/riscv.cc (INCLUDE_STRING): (riscv_vector_type_p): (supported_simd_type): (lane_size): (riscv_simd_clone_compute_vecsize_and_simdlen): (riscv_simd_clone_adjust): (riscv_simd_clone_usable): (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN): (TARGET_SIMD_CLONE_ADJUST): (TARGET_SIMD_CLONE_USABLE): --- gcc/config/riscv/riscv.cc | 241 +++++++++++++++++++++++++++++++++++++- 1 file changed, 240 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 4f8e3ab931a..9b44d36b171 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #define IN_TARGET_CODE 1 #define INCLUDE_STRING +#include #include "config.h" #include "system.h" #include "coretypes.h" @@ -33,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "insn-config.h" #include "insn-attr.h" #include "recog.h" +#include "cgraph.h" #include "output.h" #include "alias.h" #include "tree.h" @@ -5197,7 +5199,9 @@ riscv_vector_type_p (const_tree type) { /* Currently, only builtin scalabler vector type is allowed, in the future, more vector types may be allowed, such as GNU vector type, etc. */ - return riscv_vector::builtin_type_p (type); + if (!type) + return false; + return riscv_vector::builtin_type_p (type) || VECTOR_TYPE_P (type); } static unsigned int @@ -11099,6 +11103,231 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } +/* Return true for types that could be supported as SIMD return or + argument types. */ + +static bool +supported_simd_type (tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) || INTEGRAL_TYPE_P (t)) + { + HOST_WIDE_INT s = tree_to_shwi (TYPE_SIZE_UNIT (t)); + return s == 1 || s == 2 || s == 4 || s == 8; + } + return false; +} + +static unsigned +lane_size (cgraph_simd_clone_arg_type clone_arg_type, tree type) +{ + gcc_assert (clone_arg_type != SIMD_CLONE_ARG_TYPE_MASK); + + if (INTEGRAL_TYPE_P (type) + || SCALAR_FLOAT_TYPE_P (type)) + switch (TYPE_PRECISION (type) / BITS_PER_UNIT) + { + default: + break; + case 1: + case 2: + case 4: + case 8: + return TYPE_PRECISION (type); + } + gcc_unreachable (); +} + +/* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */ + +static int +riscv_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + struct cgraph_simd_clone *clonei, + tree base_type ATTRIBUTE_UNUSED, + int num, bool explicit_p) +{ + tree t, ret_type; + unsigned int elt_bit = 0; + unsigned HOST_WIDE_INT const_simdlen; + + if (!TARGET_VECTOR) + return 0; + + if (maybe_ne (clonei->simdlen, 0U) + && clonei->simdlen.is_constant (&const_simdlen) + && (const_simdlen < 2 + || const_simdlen > 1024 + || (const_simdlen & (const_simdlen - 1)) != 0)) + { + if (explicit_p) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported simdlen %wd", const_simdlen); + return 0; + } + + ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + if (TREE_CODE (ret_type) != VOID_TYPE + && !supported_simd_type (ret_type)) + { + if (!explicit_p) + ; + else if (COMPLEX_FLOAT_TYPE_P (ret_type)) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support return type %qT " + "for simd", ret_type); + else + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported return type %qT for simd", + ret_type); + return 0; + } + + auto_vec> vec_elts (clonei->nargs + 1); + if (TREE_CODE (ret_type) != VOID_TYPE) + { + elt_bit = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type); + vec_elts.safe_push (std::make_pair (ret_type, elt_bit)); + } + + int i; + tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); + bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); + for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; + t && t != void_list_node; t = TREE_CHAIN (t), i++) + { + tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); + if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM + && !supported_simd_type (arg_type)) + { + if (!explicit_p) + ; + else if (COMPLEX_FLOAT_TYPE_P (ret_type)) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support argument type %qT " + "for simd", arg_type); + else + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd", + arg_type); + return 0; + } + unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type); + if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + vec_elts.safe_push (std::make_pair (arg_type, lane_bits)); + if (!elt_bit) + elt_bit = lane_bits; + if (elt_bit != lane_bits) + return 0; + } + + if (!elt_bit) + return 0; + + clonei->vecsize_mangle = 'n'; + clonei->mask_mode = VOIDmode; + poly_uint64 simdlen; + auto_vec simdlens (2); + + clonei->vecsize_int = 0; + clonei->vecsize_float = 0; + + if ((unsigned int)TARGET_MIN_VLEN <= elt_bit) + return 0; + + /* Keep track of the possible simdlens the clones of this function can have, + and check them later to see if we support them. */ + if (known_eq (clonei->simdlen, 0U)) + { + if (TARGET_MAX_LMUL >= RVV_M1) + simdlens.safe_push ( + exact_div (poly_uint64 (TARGET_MIN_VLEN * RVV_M1), elt_bit)); + if (TARGET_MAX_LMUL >= RVV_M2) + simdlens.safe_push ( + exact_div (poly_uint64 (TARGET_MIN_VLEN * RVV_M2), elt_bit)); + if (TARGET_MAX_LMUL >= RVV_M4) + simdlens.safe_push ( + exact_div (poly_uint64 (TARGET_MIN_VLEN * RVV_M4), elt_bit)); + if (TARGET_MAX_LMUL >= RVV_M8) + simdlens.safe_push ( + exact_div (poly_uint64 (TARGET_MIN_VLEN * RVV_M8), elt_bit)); + } + else + simdlens.safe_push (clonei->simdlen); + + unsigned j = 0; + while (j < simdlens.length ()) + { + bool remove_simdlen = false; + for (auto elt : vec_elts) + if (known_gt (simdlens[j] * elt.second, + TARGET_MIN_VLEN * TARGET_MAX_LMUL)) + { + /* Don't issue a warning for every simdclone when there is no + specific simdlen clause. */ + if (explicit_p && maybe_ne (clonei->simdlen, 0U)) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support simdlen %wd for " + "type %qT", + constant_lower_bound (simdlens[j]), elt.first); + remove_simdlen = true; + break; + } + if (remove_simdlen) + simdlens.ordered_remove (j); + else + j++; + } + + int count = simdlens.length (); + if (count == 0) + { + if (explicit_p && known_eq (clonei->simdlen, 0U)) + { + /* Warn the user if we can't generate any simdclone. */ + //simdlen = exact_div (TARGET_MIN_VLEN * LMUL, elt_bit); + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support a simdclone with simdlens" + " %wd and %wd for these types.", + constant_lower_bound (simdlen), + constant_lower_bound (simdlen*2)); + } + return 0; + } + + gcc_assert (num < count); + clonei->vecsize_mangle = std::exp2 (num) + '0'; + clonei->simdlen = simdlens[num]; + return count; +} + +/* Implement TARGET_SIMD_CLONE_ADJUST. */ + +static void +riscv_simd_clone_adjust (struct cgraph_node *node) +{ + tree t = TREE_TYPE (node->decl); + TYPE_ATTRIBUTES (t) = make_attribute ("riscv_vector_cc", "default", + TYPE_ATTRIBUTES (t)); +} + +/* Implement TARGET_SIMD_CLONE_USABLE. */ + +static int +riscv_simd_clone_usable (struct cgraph_node *node) +{ + switch (node->simdclone->vecsize_mangle) + { + case '1': + case '2': + case '4': + case '8': + if (!TARGET_VECTOR) + return -1; + return 0; + default: + gcc_unreachable (); + } +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11451,6 +11680,16 @@ riscv_get_raw_result_mode (int regno) #undef TARGET_GET_RAW_RESULT_MODE #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode +#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN +#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ + riscv_simd_clone_compute_vecsize_and_simdlen + +#undef TARGET_SIMD_CLONE_ADJUST +#define TARGET_SIMD_CLONE_ADJUST riscv_simd_clone_adjust + +#undef TARGET_SIMD_CLONE_USABLE +#define TARGET_SIMD_CLONE_USABLE riscv_simd_clone_usable + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-riscv.h" -- 2.25.1