diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f2e3d905dbbeb2949f2947f5cfd68208c94c9272..47368e09b106e5b43640bd4f113abd0b9a15b9c8 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -7564,12 +7564,18 @@ (set_attr "speculation_barrier" "true")] ) +(define_expand "ftrunc2" + [(set (match_operand:VSFDF 0 "register_operand" "=w") + (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")] + FRINTNZ))] + "TARGET_FRINT" +) + (define_insn "aarch64_" [(set (match_operand:VSFDF 0 "register_operand" "=w") (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")] FRINTNZX))] - "TARGET_FRINT && TARGET_FLOAT - && !(VECTOR_MODE_P (mode) && !TARGET_SIMD)" + "TARGET_FRINT" "\\t%0, %1" [(set_attr "type" "f_rint")] ) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index a8ad4e5ff215ade06c3ca13a24ef18d259afcb6c..b1f78d87fbe6118e792b00580c6beb23ce63e27c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -173,7 +173,11 @@ SF DF]) ;; Scalar and vetor modes for SF, DF. -(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF]) +(define_mode_iterator VSFDF [(V2SF "TARGET_SIMD") + (V4SF "TARGET_SIMD") + (V2DF "TARGET_SIMD") + (DF "TARGET_FLOAT") + (SF "TARGET_FLOAT")]) ;; Advanced SIMD single Float modes. (define_mode_iterator VDQSF [V2SF V4SF]) @@ -3136,6 +3140,8 @@ (define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X UNSPEC_FRINT64Z UNSPEC_FRINT64X]) +(define_int_iterator FRINTNZ [UNSPEC_FRINT32Z UNSPEC_FRINT64Z]) + (define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB]) (define_int_iterator SVE_BRKP [UNSPEC_BRKPA UNSPEC_BRKPB]) @@ -3545,6 +3551,8 @@ (define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x") (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")]) +(define_int_attr frintnz_mode [(UNSPEC_FRINT32Z "si") (UNSPEC_FRINT64Z "di")]) + ;; The condition associated with an UNSPEC_COND_. (define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq") (UNSPEC_COND_CMPGE_WIDE "ge") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..722a03de79004c9d2f291882b346fecb74f9df1b 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6076,6 +6076,14 @@ operands; otherwise, it may not. This pattern is not allowed to @code{FAIL}. +@cindex @code{ftrunc@var{m}@var{n}2} instruction pattern +@item @samp{ftrunc@var{m}@var{n}2} +Truncate operand 1 to a @var{n} mode signed integer, towards zero, and store +the result in operand 0. Both operands have mode @var{m}, which is a scalar or +vector floating-point mode. An exception must be raised if operand 1 does not +fit in a @var{n} mode signed integer as it would have if the truncation +happened through separate floating point to integer conversion. + @cindex @code{round@var{m}2} instruction pattern @item @samp{round@var{m}2} Round operand 1 to the nearest integer, rounding away from zero in the diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index a12175b649848c7dd7802ae960f1360cd9261b88..926f1859becfe32b5a157eba8031d9ed2f7fd249 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2317,6 +2317,10 @@ Like @code{aarch64_sve_hw}, but also test for an exact hardware vector length. @item aarch64_fjcvtzs_hw AArch64 target that is able to generate and execute armv8.3-a FJCVTZS instruction. + +@item aarch64_frintzx_ok +AArch64 target that is able to generate the Armv8.5-a FRINT32Z, FRINT64Z, +FRINT32X and FRINT64X instructions. @end table @subsubsection MIPS-specific attributes diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 9471f543191edf0aea125ff0fc426511b2306169..cce4cd153cb59751f54dfdf82eee3bdd4fc394fd 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -105,32 +105,33 @@ init_internal_fns () /* Create static initializers for the information returned by direct_internal_fn. */ -#define not_direct { -2, -2, false } -#define mask_load_direct { -1, 2, false } -#define load_lanes_direct { -1, -1, false } -#define mask_load_lanes_direct { -1, -1, false } -#define gather_load_direct { 3, 1, false } -#define len_load_direct { -1, -1, false } -#define mask_store_direct { 3, 2, false } -#define store_lanes_direct { 0, 0, false } -#define mask_store_lanes_direct { 0, 0, false } -#define vec_cond_mask_direct { 1, 0, false } -#define vec_cond_direct { 2, 0, false } -#define scatter_store_direct { 3, 1, false } -#define len_store_direct { 3, 3, false } -#define vec_set_direct { 3, 3, false } -#define unary_direct { 0, 0, true } -#define unary_convert_direct { -1, 0, true } -#define binary_direct { 0, 0, true } -#define ternary_direct { 0, 0, true } -#define cond_unary_direct { 1, 1, true } -#define cond_binary_direct { 1, 1, true } -#define cond_ternary_direct { 1, 1, true } -#define while_direct { 0, 2, false } -#define fold_extract_direct { 2, 2, false } -#define fold_left_direct { 1, 1, false } -#define mask_fold_left_direct { 1, 1, false } -#define check_ptrs_direct { 0, 0, false } +#define not_direct { -2, -2, false, false } +#define mask_load_direct { -1, 2, false, false } +#define load_lanes_direct { -1, -1, false, false } +#define mask_load_lanes_direct { -1, -1, false, false } +#define gather_load_direct { 3, 1, false, false } +#define len_load_direct { -1, -1, false, false } +#define mask_store_direct { 3, 2, false, false } +#define store_lanes_direct { 0, 0, false, false } +#define mask_store_lanes_direct { 0, 0, false, false } +#define vec_cond_mask_direct { 1, 0, false, false } +#define vec_cond_direct { 2, 0, false, false } +#define scatter_store_direct { 3, 1, false, false } +#define len_store_direct { 3, 3, false, false } +#define vec_set_direct { 3, 3, false, false } +#define unary_direct { 0, 0, false, true } +#define unary_convert_direct { -1, 0, false, true } +#define binary_direct { 0, 0, false, true } +#define ternary_direct { 0, 0, false, true } +#define cond_unary_direct { 1, 1, false, true } +#define cond_binary_direct { 1, 1, false, true } +#define cond_ternary_direct { 1, 1, false, true } +#define while_direct { 0, 2, false, false } +#define fold_extract_direct { 2, 2, false, false } +#define fold_left_direct { 1, 1, false, false } +#define mask_fold_left_direct { 1, 1, false, false } +#define check_ptrs_direct { 0, 0, false, false } +#define ftrunc_int_direct { 0, 1, true, true } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, @@ -237,6 +238,29 @@ get_multi_vector_move (tree array_type, convert_optab optab) return convert_optab_handler (optab, imode, vmode); } +/* Expand FTRUNC_INT call STMT using optab OPTAB. */ + +static void +expand_ftrunc_int_optab_fn (internal_fn, gcall *stmt, convert_optab optab) +{ + class expand_operand ops[2]; + tree lhs, float_type, int_type; + rtx target, op; + + lhs = gimple_call_lhs (stmt); + target = expand_normal (lhs); + op = expand_normal (gimple_call_arg (stmt, 0)); + + float_type = TREE_TYPE (lhs); + int_type = TREE_TYPE (gimple_call_arg (stmt, 1)); + + create_output_operand (&ops[0], target, TYPE_MODE (float_type)); + create_input_operand (&ops[1], op, TYPE_MODE (float_type)); + + expand_insn (convert_optab_handler (optab, TYPE_MODE (float_type), + TYPE_MODE (int_type)), 2, ops); +} + /* Expand LOAD_LANES call STMT using optab OPTAB. */ static void @@ -3848,6 +3872,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p #define direct_check_ptrs_optab_supported_p direct_optab_supported_p #define direct_vec_set_optab_supported_p direct_optab_supported_p +#define direct_ftrunc_int_optab_supported_p convert_optab_supported_p /* Return the optab used by internal function FN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 61516dab66dc90e016622c47e832b790db8ea867..976869e5dba2a3d067b6eb64c7bddde04ba6fb78 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -69,6 +69,9 @@ along with GCC; see the file COPYING3. If not see - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode - check_ptrs: used for check_{raw,war}_ptrs + - ftrunc_int: a unary conversion optab that takes and returns values of the + same mode, but internally converts via another mode. This second mode is + specified using a dummy final function argument. DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that maps to one of two optabs, depending on the signedness of an input. @@ -298,6 +301,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary) DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary) DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary) DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary) +DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int) /* Binary math functions. */ DEF_INTERNAL_FLT_FN (ATAN2, ECF_CONST, atan2, binary) diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 21b1ce43df6a926a59e4b9eaf9ce06d2440845e7..416b5fe42e356acf97c90efaebaa207d29551769 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -144,12 +144,14 @@ struct direct_internal_fn_info function isn't directly mapped to an optab. */ signed int type0 : 8; signed int type1 : 8; + /* Indicates whether type1 is a scalar type. */ + unsigned int type1_is_scalar_p : 1; /* True if the function is pointwise, so that it can be vectorized by converting the return type and all argument types to vectors of the same number of elements. E.g. we can vectorize an IFN_SQRT on floats as an IFN_SQRT on vectors of N floats. - This only needs 1 bit, but occupies the full 16 to ensure a nice + This only needs 1 bit, but occupies the full 15 to ensure a nice layout. */ unsigned int vectorizable : 16; }; diff --git a/gcc/match.pd b/gcc/match.pd index 194ba8f5188e17056b9c9af790e9725e3e65bff4..c835a3922115c775131160679060fadccbdf1633 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4022,12 +4022,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) trapping behaviour, so require !flag_trapping_math. */ #if GIMPLE (simplify - (float (fix_trunc @0)) - (if (!flag_trapping_math - && types_match (type, TREE_TYPE (@0)) - && direct_internal_fn_supported_p (IFN_TRUNC, type, - OPTIMIZE_FOR_BOTH)) - (IFN_TRUNC @0))) + (float (fix_trunc@1 @0)) + (if (types_match (type, TREE_TYPE (@0))) + (with { + tree int_type = element_type (@1); + } + (if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED + && direct_internal_fn_supported_p (IFN_FTRUNC_INT, type, int_type, + OPTIMIZE_FOR_BOTH)) + (IFN_FTRUNC_INT @0 { + wide_int_to_tree (int_type, wi::max_value (TYPE_PRECISION (int_type), + SIGNED)); }) + (if (!flag_trapping_math + && direct_internal_fn_supported_p (IFN_TRUNC, type, + OPTIMIZE_FOR_BOTH)) + (IFN_TRUNC @0)))))) #endif /* If we have a narrowing conversion to an integral type that is fed by a diff --git a/gcc/optabs.def b/gcc/optabs.def index a6db2342bed6baf13ecbd84112c8432c6972e6fe..8c1c681a39b5aad4ee2058739b7676c0c5829ace 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -63,6 +63,7 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2") OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", gen_satfract_conv_libfunc) OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, "satfractuns", gen_satfractuns_conv_libfunc) +OPTAB_CD(ftruncint_optab, "ftrunc$a$b2") OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2") OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2") diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h index 22c915909385fd4bc1c68a4f58479322e9e90666..6f78491a8fa6dbb6798c637277f71f4b99eea5cb 100644 --- a/gcc/stor-layout.h +++ b/gcc/stor-layout.h @@ -36,7 +36,6 @@ extern void place_field (record_layout_info, tree); extern void compute_record_mode (tree); extern void finish_bitfield_layout (tree); extern void finish_record_layout (record_layout_info, int); -extern unsigned int element_precision (const_tree); extern void finalize_size_functions (void); extern void fixup_unsigned_type (tree); extern void initialize_sizetypes (void); diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c b/gcc/testsuite/gcc.target/aarch64/frintnz.c new file mode 100644 index 0000000000000000000000000000000000000000..7a8e53e221e09d3da297f064fa3f4970ad0a4539 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c @@ -0,0 +1,92 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O2" } */ +/* { dg-require-effective-target aarch64_frintnzx_ok } */ +/* { dg-add-options aarch64_frintnzx } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** f1: +** frint32z s0, s0 +** ret +*/ +float +f1 (float x) +{ + int y = x; + return (float) y; +} + +/* +** f2: +** frint64z s0, s0 +** ret +*/ +float +f2 (float x) +{ + long long int y = x; + return (float) y; +} + +/* +** f3: +** frint32z d0, d0 +** ret +*/ +double +f3 (double x) +{ + int y = x; + return (double) y; +} + +/* +** f4: +** frint64z d0, d0 +** ret +*/ +double +f4 (double x) +{ + long long int y = x; + return (double) y; +} + +float +f1_dont (float x) +{ + unsigned int y = x; + return (float) y; +} + +float +f2_dont (float x) +{ + unsigned long long int y = x; + return (float) y; +} + +double +f3_dont (double x) +{ + unsigned int y = x; + return (double) y; +} + +double +f4_dont (double x) +{ + unsigned long long int y = x; + return (double) y; +} + +double +f5_dont (double x) +{ + signed short y = x; + return (double) y; +} + +/* Make sure the 'dont's don't generate any frintNz. */ +/* { dg-final { scan-assembler-times {frint32z} 2 } } */ +/* { dg-final { scan-assembler-times {frint64z} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz_slp.c b/gcc/testsuite/gcc.target/aarch64/frintnz_slp.c new file mode 100644 index 0000000000000000000000000000000000000000..208a328ce84df3c3ae7654c3db254e81d027c231 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/frintnz_slp.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O3" } */ +/* { dg-require-effective-target aarch64_frintnzx_ok } */ +/* { dg-add-options aarch64_frintnzx } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define TEST(name,float_type,int_type) \ +void \ +name (float_type * __restrict__ x, float_type * __restrict__ y, int n) \ +{ \ + for (int i = 0; i < n; i +=2) \ + { \ + int_type x_i0 = x[i]; \ + int_type x_i1 = x[i + 1]; \ + y[i] = (float_type) x_i1; \ + y[i + 1] = (float_type) x_i0; \ + } \ +} + +/* +** f1: +** ... +** frint32z v[0-9]+\.4s, v[0-9]+\.4s +** ... +*/ +TEST(f1, float, int) + +/* +** f2: +** ... +** frint64z v[0-9]+\.4s, v[0-9]+\.4s +** ... +*/ +TEST(f2, float, long long) + +/* +** f3: +** ... +** frint32z v[0-9]+\.2d, v[0-9]+\.2d +** ... +*/ +TEST(f3, double, int) + +/* +** f4: +** ... +** frint64z v[0-9]+\.2d, v[0-9]+\.2d +** ... +*/ +TEST(f4, double, long long) diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c new file mode 100644 index 0000000000000000000000000000000000000000..52232cb02649a3c3f65ab2fad13fdbd7ff9a0524 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99 -O3" } */ +/* { dg-require-effective-target aarch64_frintnzx_ok } */ +/* { dg-add-options aarch64_frintnzx } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#define TEST(name,float_type,int_type) \ +void \ +name (float_type * __restrict__ x, float_type * __restrict__ y, int n) \ +{ \ + for (int i = 0; i < n; ++i) \ + { \ + int_type x_i = x[i]; \ + y[i] = (float_type) x_i; \ + } \ +} + +/* +** f1: +** ... +** frint32z v[0-9]+\.4s, v[0-9]+\.4s +** ... +*/ +TEST(f1, float, int) + +/* +** f2: +** ... +** frint64z v[0-9]+\.4s, v[0-9]+\.4s +** ... +*/ +TEST(f2, float, long long) + +/* +** f3: +** ... +** frint32z v[0-9]+\.2d, v[0-9]+\.2d +** ... +*/ +TEST(f3, double, int) + +/* +** f4: +** ... +** frint64z v[0-9]+\.2d, v[0-9]+\.2d +** ... +*/ +TEST(f4, double, long long) diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c index 07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3d80871c4cebd5fb5cac0714b3feee27038f05fd 100644 --- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c +++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ffast-math" } */ +/* { dg-skip-if "" { aarch64_frintnzx_ok } } */ float f1 (float x) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 750897d085480d791010c593b81e6910df246169..b76e7371d5c0c37d0b79eabd374ea8178af0c5dc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -11729,6 +11729,45 @@ proc check_effective_target_arm_v8_3a_bkey_directive { } { }] } + +proc add_options_for_aarch64_frintnzx { flags } { + if { ! [check_effective_target_aarch64_frintnzx_ok] } { + return "$flags" + } + global et_aarch64_frintnzx_flags + return "$flags $et_aarch64_frintnzx_flags" +} + +# Return 1 if the target supports Armv8.5-A scalar and Advanced SIMD +# FRINT32[ZX] andd FRINT64[ZX] instructions, 0 otherwise. The test is valid for +# AArch64. +proc check_effective_target_aarch64_frintnzx_ok_nocache { } { + global et_aarch64_frintnzx_flags + set et_aarch64_frintnzx_flags "" + if { ![istarget aarch64*-*-*] } { + return 0; + } + + foreach flags {"" "-march=armv8.5-a"} { + if { [check_no_compiler_messages_nocache \ + aarch64_frintnzx_ok assembly { + #if !defined (__ARM_FEATURE_FRINT) + #error "__ARM_FEATURE_FRINT not defined" + #endif + } $flags ] } { + set et_aarch64_frintnzx_flags $flags + return 1; + } + } + + return 0; +} + +proc check_effective_target_aarch64_frintnzx_ok { } { + return [check_cached_effective_target aarch64_frintnzx_ok \ + check_effective_target_aarch64_frintnzx_ok_nocache] +} + # Return 1 if the target supports executing the Armv8.1-M Mainline Low # Overhead Loop, 0 otherwise. The test is valid for ARM. diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index e54414f6befadcea95419bf9b84904b9cb4245b9..3d01e0506699b48b6e64c9fea7d37571292cbe68 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -6081,6 +6081,35 @@ vect_prologue_cost_for_slp (slp_tree node, } } +/* Check whether this NODE contains statements with an expected scalar argument + at INDEX. */ + +static bool +check_scalar_arg_ok (slp_tree node, int index) +{ + if (index != 1) + return false; + + enum stmt_vec_info_type stmt_type + = STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)); + + if (stmt_type == shift_vec_info_type) + return true; + else if (stmt_type == call_vec_info_type) + { + combined_fn cfn + = gimple_call_combined_fn (SLP_TREE_REPRESENTATIVE (node)->stmt); + if (!internal_fn (cfn)) + return false; + internal_fn ifn = as_internal_fn (cfn); + + return direct_internal_fn_p (ifn) + && direct_internal_fn (ifn).type1_is_scalar_p; + } + + return false; +} + /* Analyze statements contained in SLP tree NODE after recursively analyzing the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. @@ -6180,10 +6209,10 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, { /* For shifts with a scalar argument we don't need to cost or code-generate anything. + The same is true for internal functions where + type1_is_scalar_p. ??? Represent this more explicitely. */ - gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node)) - == shift_vec_info_type) - && j == 1); + gcc_assert (check_scalar_arg_ok (node, j)); continue; } unsigned group_size = SLP_TREE_LANES (child); @@ -8064,6 +8093,7 @@ vect_get_slp_defs (vec_info *, if (n == -1U) n = SLP_TREE_CHILDREN (slp_node).length (); + vec_oprnds->reserve (n); for (unsigned i = 0; i < n; ++i) { slp_tree child = SLP_TREE_CHILDREN (slp_node)[i]; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 4e0d75e0d7586ad57a37850d8a70f6182ecb13d0..e77c43efdffba7d7d8b5c625acd6eb333b1ebd11 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1654,13 +1654,16 @@ vect_finish_stmt_generation (vec_info *vinfo, } /* We want to vectorize a call to combined function CFN with function - decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN - as the types of all inputs. Check whether this is possible using - an internal function, returning its code if so or IFN_LAST if not. */ + decl FNDECL, using VECTYPE_OUT as the type of the output, VECTYPES to find + the type for each argument as described in the direct_internal_fn_info or + if none is described there use VECTYPE_IN instead. Check whether vectorizing + this call is possible using an internal function, returning its code if so + or IFN_LAST if not. */ static internal_fn vectorizable_internal_function (combined_fn cfn, tree fndecl, - tree vectype_out, tree vectype_in) + tree vectype_out, tree vectype_in, + tree *vectypes) { internal_fn ifn; if (internal_fn_p (cfn)) @@ -1672,8 +1675,12 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl, const direct_internal_fn_info &info = direct_internal_fn (ifn); if (info.vectorizable) { - tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); - tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); + tree type0 = (info.type0 < 0 ? vectype_out : vectypes[info.type0]); + if (!type0) + type0 = vectype_in; + tree type1 = (info.type1 < 0 ? vectype_out : vectypes[info.type1]); + if (!type1) + type1 = vectype_in; if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1), OPTIMIZE_FOR_SPEED)) return ifn; @@ -3259,6 +3266,23 @@ simple_integer_narrowing (tree vectype_out, tree vectype_in, return true; } +/* Function vect_get_scalar_oprnds. + + This is a helper function for vectorizable_call to fill VEC_CSTS with the + ARGNO'th argument of the calls in SLP_NODE. */ + +static void +vect_get_scalar_oprnds (slp_tree slp_node, int argno, vec *vec_csts) +{ + unsigned j; + stmt_vec_info stmt_vinfo; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), j, stmt_vinfo) + { + gcc_assert (gimple_code (stmt_vinfo->stmt) == GIMPLE_CALL); + vec_csts->safe_push (gimple_call_arg (stmt_vinfo->stmt, argno)); + } +} + /* Function vectorizable_call. Check if STMT_INFO performs a function call that can be vectorized. @@ -3340,9 +3364,20 @@ vectorizable_call (vec_info *vinfo, rhs_type = unsigned_type_node; } + /* The argument that is not of the same type as the others. */ int mask_opno = -1; + int scalar_opno = -1; if (internal_fn_p (cfn)) - mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + { + internal_fn ifn = as_internal_fn (cfn); + if (direct_internal_fn_p (ifn) + && direct_internal_fn (ifn).type1_is_scalar_p) + scalar_opno = direct_internal_fn (ifn).type1; + else + /* For masked operations this represents the argument that carries the + mask. */ + mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + } for (i = 0; i < nargs; i++) { @@ -3353,6 +3388,11 @@ vectorizable_call (vec_info *vinfo, return false; continue; } + else if ((int) i == scalar_opno) + { + vectypes[i] = TREE_TYPE (gimple_call_arg (stmt, i)); + continue; + } if (!vect_is_simple_use (vinfo, stmt_info, slp_node, i, &op, &slp_op[i], &dt[i], &vectypes[i])) @@ -3467,8 +3507,8 @@ vectorizable_call (vec_info *vinfo, || (modifier == NARROW && simple_integer_narrowing (vectype_out, vectype_in, &convert_code)))) - ifn = vectorizable_internal_function (cfn, callee, vectype_out, - vectype_in); + ifn = vectorizable_internal_function (cfn, callee, vectype_out, vectype_in, + &vectypes[0]); /* If that fails, try asking for a target-specific built-in function. */ if (ifn == IFN_LAST) @@ -3608,6 +3648,10 @@ vectorizable_call (vec_info *vinfo, vect_get_slp_defs (vinfo, slp_node, &vec_defs); vec_oprnds0 = vec_defs[0]; + unsigned int children_n = SLP_TREE_CHILDREN (slp_node).length (); + auto_vec scalar_defs (children_n); + if (scalar_opno > -1) + vect_get_scalar_oprnds (slp_node, scalar_opno, &scalar_defs); /* Arguments are ready. Create the new vector stmt. */ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0) @@ -3624,8 +3668,15 @@ vectorizable_call (vec_info *vinfo, size_t k; for (k = 0; k < nargs; k++) { - vec vec_oprndsk = vec_defs[k]; - vargs[varg++] = vec_oprndsk[i]; + tree operand; + if (scalar_opno == (int) k) + operand = scalar_defs[i]; + else + { + vec vec_oprndsk = vec_defs[k]; + operand = vec_oprndsk[i]; + } + vargs[varg++] = operand; } if (masked_loop_p && reduc_idx >= 0) vargs[varg++] = vargs[reduc_idx + 1]; diff --git a/gcc/tree.h b/gcc/tree.h index d6a5fdf6d81bf10044249c015083e6db8b35b519..42b2ad74d260041118f079f05083f7498a60fba4 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -6698,4 +6698,12 @@ extern tree get_attr_nonstring_decl (tree, tree * = NULL); extern int get_target_clone_attr_len (tree); +/* Return the type, or for a complex or vector type the type of its + elements. */ +extern tree element_type (tree); + +/* Return the precision of the type, or for a complex or vector type the + precision of the type of its elements. */ +extern unsigned int element_precision (const_tree); + #endif /* GCC_TREE_H */ diff --git a/gcc/tree.cc b/gcc/tree.cc index 172098787dd924ec23101e7495cf0e67ca47d787..127c2d1fad3fe488ff4d60119ce0ec8c13a78528 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -6699,11 +6699,11 @@ valid_constant_size_p (const_tree size, cst_size_error *perr /* = NULL */) return true; } -/* Return the precision of the type, or for a complex or vector type the - precision of the type of its elements. */ +/* Return the type, or for a complex or vector type the type of its + elements. */ -unsigned int -element_precision (const_tree type) +tree +element_type (tree type) { if (!TYPE_P (type)) type = TREE_TYPE (type); @@ -6711,7 +6711,16 @@ element_precision (const_tree type) if (code == COMPLEX_TYPE || code == VECTOR_TYPE) type = TREE_TYPE (type); - return TYPE_PRECISION (type); + return type; +} + +/* Return the precision of the type, or for a complex or vector type the + precision of the type of its elements. */ + +unsigned int +element_precision (const_tree type) +{ + return TYPE_PRECISION (element_type (const_cast (type))); } /* Return true if CODE represents an associative tree code. Otherwise