diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bfc98a8d943467b33390defab9682f44efab5907..ffbbecb9409e1c2835d658c2a8855cd0e955c0f2 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4626,7 +4626,7 @@ [(set_attr "type" "neon__long")] ) -(define_expand "vec_widen_addl_lo_" +(define_expand "vec_widen_add_lo_" [(match_operand: 0 "register_operand") (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] @@ -4638,7 +4638,7 @@ DONE; }) -(define_expand "vec_widen_addl_hi_" +(define_expand "vec_widen_add_hi_" [(match_operand: 0 "register_operand") (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] @@ -4650,7 +4650,7 @@ DONE; }) -(define_expand "vec_widen_subl_lo_" +(define_expand "vec_widen_sub_lo_" [(match_operand: 0 "register_operand") (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] @@ -4662,7 +4662,7 @@ DONE; }) -(define_expand "vec_widen_subl_hi_" +(define_expand "vec_widen_sub_hi_" [(match_operand: 0 "register_operand") (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index 8b2882da4fe7da07d22b4e5384d049ba7d3907bf..5e36dac2b1a10257616f12cdfb0b12d0f2879ae9 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1811,10 +1811,16 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @tindex VEC_RSHIFT_EXPR @tindex VEC_WIDEN_MULT_HI_EXPR @tindex VEC_WIDEN_MULT_LO_EXPR -@tindex VEC_WIDEN_PLUS_HI_EXPR -@tindex VEC_WIDEN_PLUS_LO_EXPR -@tindex VEC_WIDEN_MINUS_HI_EXPR -@tindex VEC_WIDEN_MINUS_LO_EXPR +@tindex IFN_VEC_WIDEN_PLUS +@tindex IFN_VEC_WIDEN_PLUS_HI +@tindex IFN_VEC_WIDEN_PLUS_LO +@tindex IFN_VEC_WIDEN_PLUS_EVEN +@tindex IFN_VEC_WIDEN_PLUS_ODD +@tindex IFN_VEC_WIDEN_MINUS +@tindex IFN_VEC_WIDEN_MINUS_HI +@tindex IFN_VEC_WIDEN_MINUS_LO +@tindex IFN_VEC_WIDEN_MINUS_EVEN +@tindex IFN_VEC_WIDEN_MINUS_ODD @tindex VEC_UNPACK_HI_EXPR @tindex VEC_UNPACK_LO_EXPR @tindex VEC_UNPACK_FLOAT_HI_EXPR @@ -1861,6 +1867,82 @@ vector of @code{N/2} products. In the case of @code{VEC_WIDEN_MULT_LO_EXPR} the low @code{N/2} elements of the two vector are multiplied to produce the vector of @code{N/2} products. +@item IFN_VEC_WIDEN_PLUS +This internal function represents widening vector addition of two input +vectors. Its operands are vectors that contain the same number of elements +(@code{N}) of the same integral type. The result is a vector that contains +the same amount (@code{N}) of elements, of an integral type whose size is twice +as wide, as the input vectors. If the current target does not implement the +corresponding optabs the vectorizer may choose to split it into either a pair +of @code{IFN_VEC_WIDEN_PLUS_HI} and @code{IFN_VEC_WIDEN_PLUS_LO} or +@code{IFN_VEC_WIDEN_PLUS_EVEN} and @code{IFN_VEC_WIDEN_PLUS_ODD}, depending +on what optabs the target implements. + +@item IFN_VEC_WIDEN_PLUS_HI +@itemx IFN_VEC_WIDEN_PLUS_LO +These internal functions represent widening vector addition of the high and low +parts of the two input vectors, respectively. Their operands are vectors that +contain the same number of elements (@code{N}) of the same integral type. The +result is a vector that contains half as many elements, of an integral type +whose size is twice as wide. In the case of @code{IFN_VEC_WIDEN_PLUS_HI} the +high @code{N/2} elements of the two vectors are added to produce the vector of +@code{N/2} additions. In the case of @code{IFN_VEC_WIDEN_PLUS_LO} the low +@code{N/2} elements of the two vectors are added to produce the vector of +@code{N/2} additions. + +@item IFN_VEC_WIDEN_PLUS_EVEN +@itemx IFN_VEC_WIDEN_PLUS_ODD +These internal functions represent widening vector addition of the even and odd +elements of the two input vectors, respectively. Their operands are vectors +that contain the same number of elements (@code{N}) of the same integral type. +The result is a vector that contains half as many elements, of an integral type +whose size is twice as wide. In the case of @code{IFN_VEC_WIDEN_PLUS_EVEN} the +even @code{N/2} elements of the two vectors are added to produce the vector of +@code{N/2} additions. In the case of @code{IFN_VEC_WIDEN_PLUS_ODD} the odd +@code{N/2} elements of the two vectors are added to produce the vector of +@code{N/2} additions. + +@item IFN_VEC_WIDEN_MINUS +This internal function represents widening vector subtraction of two input +vectors. Its operands are vectors that contain the same number of elements +(@code{N}) of the same integral type. The result is a vector that contains +the same amount (@code{N}) of elements, of an integral type whose size is twice +as wide, as the input vectors. If the current target does not implement the +corresponding optabs the vectorizer may choose to split it into either a pair +of @code{IFN_VEC_WIDEN_MINUS_HI} and @code{IFN_VEC_WIDEN_MINUS_LO} or +@code{IFN_VEC_WIDEN_MINUS_EVEN} and @code{IFN_VEC_WIDEN_MINUS_ODD}, depending +on what optabs the target implements. + +@item IFN_VEC_WIDEN_MINUS_HI +@itemx IFN_VEC_WIDEN_MINUS_LO +These internal functions represent widening vector subtraction of the high and +low parts of the two input vectors, respectively. Their operands are vectors +that contain the same number of elements (@code{N}) of the same integral type. +The high/low elements of the second vector are subtracted from the high/low +elements of the first. The result is a vector that contains half as many +elements, of an integral type whose size is twice as wide. In the case of +@code{IFN_VEC_WIDEN_MINUS_HI} the high @code{N/2} elements of the second +vector are subtracted from the high @code{N/2} of the first to produce the +vector of @code{N/2} subtractions. In the case of +@code{IFN_VEC_WIDEN_MINUS_LO} the low @code{N/2} elements of the second +vector are subtracted from the low @code{N/2} of the first to produce the +vector of @code{N/2} subtractions. + +@item IFN_VEC_WIDEN_MINUS_EVEN +@itemx IFN_VEC_WIDEN_MINUS_ODD +These internal functions represent widening vector subtraction of the even and +odd parts of the two input vectors, respectively. Their operands are vectors +that contain the same number of elements (@code{N}) of the same integral type. +The even/odd elements of the second vector are subtracted from the even/odd +elements of the first. The result is a vector that contains half as many +elements, of an integral type whose size is twice as wide. In the case of +@code{IFN_VEC_WIDEN_MINUS_EVEN} the even @code{N/2} elements of the second +vector are subtracted from the even @code{N/2} of the first to produce the +vector of @code{N/2} subtractions. In the case of +@code{IFN_VEC_WIDEN_MINUS_ODD} the odd @code{N/2} elements of the second +vector are subtracted from the odd @code{N/2} of the first to produce the +vector of @code{N/2} subtractions. + @item VEC_WIDEN_PLUS_HI_EXPR @itemx VEC_WIDEN_PLUS_LO_EXPR These nodes represent widening vector addition of the high and low parts of diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc index 594bd3043f0e944299ddfff219f757ef15a3dd61..33f4b7064a2a22aad49f27b24b409e91a5b89c69 100644 --- a/gcc/gimple-range-op.cc +++ b/gcc/gimple-range-op.cc @@ -1187,6 +1187,7 @@ gimple_range_op_handler::maybe_non_standard () { range_operator *signed_op = ptr_op_widen_mult_signed; range_operator *unsigned_op = ptr_op_widen_mult_unsigned; + bool signed1, signed2, signed_ret; if (gimple_code (m_stmt) == GIMPLE_ASSIGN) switch (gimple_assign_rhs_code (m_stmt)) { @@ -1202,32 +1203,55 @@ gimple_range_op_handler::maybe_non_standard () m_op1 = gimple_assign_rhs1 (m_stmt); m_op2 = gimple_assign_rhs2 (m_stmt); tree ret = gimple_assign_lhs (m_stmt); - bool signed1 = TYPE_SIGN (TREE_TYPE (m_op1)) == SIGNED; - bool signed2 = TYPE_SIGN (TREE_TYPE (m_op2)) == SIGNED; - bool signed_ret = TYPE_SIGN (TREE_TYPE (ret)) == SIGNED; - - /* Normally these operands should all have the same sign, but - some passes and violate this by taking mismatched sign args. At - the moment the only one that's possible is mismatch inputs and - unsigned output. Once ranger supports signs for the operands we - can properly fix it, for now only accept the case we can do - correctly. */ - if ((signed1 ^ signed2) && signed_ret) - return; - - m_valid = true; - if (signed2 && !signed1) - std::swap (m_op1, m_op2); - - if (signed1 || signed2) - m_int = signed_op; - else - m_int = unsigned_op; + signed1 = TYPE_SIGN (TREE_TYPE (m_op1)) == SIGNED; + signed2 = TYPE_SIGN (TREE_TYPE (m_op2)) == SIGNED; + signed_ret = TYPE_SIGN (TREE_TYPE (ret)) == SIGNED; break; } default: - break; + return; + } + else if (gimple_code (m_stmt) == GIMPLE_CALL + && gimple_call_internal_p (m_stmt) + && gimple_get_lhs (m_stmt) != NULL_TREE) + switch (gimple_call_internal_fn (m_stmt)) + { + case IFN_VEC_WIDEN_PLUS_LO: + case IFN_VEC_WIDEN_PLUS_HI: + { + signed_op = ptr_op_widen_plus_signed; + unsigned_op = ptr_op_widen_plus_unsigned; + m_valid = false; + m_op1 = gimple_call_arg (m_stmt, 0); + m_op2 = gimple_call_arg (m_stmt, 1); + tree ret = gimple_get_lhs (m_stmt); + signed1 = TYPE_SIGN (TREE_TYPE (m_op1)) == SIGNED; + signed2 = TYPE_SIGN (TREE_TYPE (m_op2)) == SIGNED; + signed_ret = TYPE_SIGN (TREE_TYPE (ret)) == SIGNED; + break; + } + default: + return; } + else + return; + + /* Normally these operands should all have the same sign, but some passes + and violate this by taking mismatched sign args. At the moment the only + one that's possible is mismatch inputs and unsigned output. Once ranger + supports signs for the operands we can properly fix it, for now only + accept the case we can do correctly. */ + if ((signed1 ^ signed2) && signed_ret) + return; + + m_valid = true; + if (signed2 && !signed1) + std::swap (m_op1, m_op2); + + if (signed1 || signed2) + m_int = signed_op; + else + m_int = unsigned_op; } // Set up a gimple_range_op_handler for any built in function which can be diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 5c9da73ea11f8060b18dcf513599c9694fa4f2ad..348bee35a35ae4ed9a8652f5349f430c2733e1cb 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -90,6 +90,71 @@ lookup_internal_fn (const char *name) return entry ? *entry : IFN_LAST; } +/* Given an internal_fn IFN that is either a widening or narrowing function, return its + corresponding LO and HI internal_fns. */ + +extern void +lookup_hilo_internal_fn (internal_fn ifn, internal_fn *lo, internal_fn *hi) +{ + gcc_assert (widening_fn_p (ifn) || narrowing_fn_p (ifn)); + + switch (ifn) + { + default: + gcc_unreachable (); +#undef DEF_INTERNAL_FN +#undef DEF_INTERNAL_WIDENING_OPTAB_FN +#undef DEF_INTERNAL_NARROWING_OPTAB_FN +#define DEF_INTERNAL_FN(NAME, FLAGS, TYPE) +#define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \ + case IFN_##NAME: \ + *lo = internal_fn (IFN_##NAME##_LO); \ + *hi = internal_fn (IFN_##NAME##_HI); \ + break; +#define DEF_INTERNAL_NARROWING_OPTAB_FN(NAME, F, O, T) \ + case IFN_##NAME: \ + *lo = internal_fn (IFN_##NAME##_LO); \ + *hi = internal_fn (IFN_##NAME##_HI); \ + break; +#include "internal-fn.def" +#undef DEF_INTERNAL_FN +#undef DEF_INTERNAL_WIDENING_OPTAB_FN +#undef DEF_INTERNAL_NARROWING_OPTAB_FN + } +} + +extern void +lookup_evenodd_internal_fn (internal_fn ifn, internal_fn *even, + internal_fn *odd) +{ + gcc_assert (widening_fn_p (ifn) || narrowing_fn_p (ifn)); + + switch (ifn) + { + default: + gcc_unreachable (); +#undef DEF_INTERNAL_FN +#undef DEF_INTERNAL_WIDENING_OPTAB_FN +#undef DEF_INTERNAL_NARROWING_OPTAB_FN +#define DEF_INTERNAL_FN(NAME, FLAGS, TYPE) +#define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \ + case IFN_##NAME: \ + *even = internal_fn (IFN_##NAME##_EVEN); \ + *odd = internal_fn (IFN_##NAME##_ODD); \ + break; +#define DEF_INTERNAL_NARROWING_OPTAB_FN(NAME, F, O, T) \ + case IFN_##NAME: \ + *even = internal_fn (IFN_##NAME##_EVEN); \ + *odd = internal_fn (IFN_##NAME##_ODD); \ + break; +#include "internal-fn.def" +#undef DEF_INTERNAL_FN +#undef DEF_INTERNAL_WIDENING_OPTAB_FN +#undef DEF_INTERNAL_NARROWING_OPTAB_FN + } +} + + /* Fnspec of each internal function, indexed by function number. */ const_tree internal_fn_fnspec_array[IFN_LAST + 1]; @@ -3852,7 +3917,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, /* Return the optab used by internal function FN. */ -static optab +optab direct_internal_fn_optab (internal_fn fn, tree_pair types) { switch (fn) @@ -3971,6 +4036,9 @@ commutative_binary_fn_p (internal_fn fn) case IFN_UBSAN_CHECK_MUL: case IFN_ADD_OVERFLOW: case IFN_MUL_OVERFLOW: + case IFN_VEC_WIDEN_PLUS: + case IFN_VEC_WIDEN_PLUS_LO: + case IFN_VEC_WIDEN_PLUS_HI: return true; default: @@ -4044,6 +4112,68 @@ first_commutative_argument (internal_fn fn) } } +/* Return true if this CODE describes an internal_fn that returns a vector with + elements twice as wide as the element size of the input vectors. */ + +bool +widening_fn_p (code_helper code) +{ + if (!code.is_fn_code ()) + return false; + + if (!internal_fn_p ((combined_fn) code)) + return false; + + internal_fn fn = as_internal_fn ((combined_fn) code); + switch (fn) + { + #undef DEF_INTERNAL_WIDENING_OPTAB_FN + #define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, F, S, SO, UO, T) \ + case IFN_##NAME: \ + case IFN_##NAME##_HI: \ + case IFN_##NAME##_LO: \ + case IFN_##NAME##_EVEN: \ + case IFN_##NAME##_ODD: \ + return true; + #include "internal-fn.def" + #undef DEF_INTERNAL_WIDENING_OPTAB_FN + + default: + return false; + } +} + +/* Return true if this CODE describes an internal_fn that returns a vector with + elements twice as narrow as the element size of the input vectors. */ + +bool +narrowing_fn_p (code_helper code) +{ + if (!code.is_fn_code ()) + return false; + + if (!internal_fn_p ((combined_fn) code)) + return false; + + internal_fn fn = as_internal_fn ((combined_fn) code); + switch (fn) + { + #undef DEF_INTERNAL_NARROWING_OPTAB_FN + #define DEF_INTERNAL_NARROWING_OPTAB_FN(NAME, F, O, T) \ + case IFN_##NAME##: \ + case IFN_##NAME##_HI: \ + case IFN_##NAME##_LO: \ + case IFN_##NAME##_HI: \ + case IFN_##NAME##_LO: \ + return true; + #include "internal-fn.def" + #undef DEF_INTERNAL_NARROWING_OPTAB_FN + + default: + return false; + } +} + /* Return true if IFN_SET_EDOM is supported. */ bool @@ -4072,6 +4202,8 @@ set_edom_supported_p (void) expand_##TYPE##_optab_fn (fn, stmt, which_optab); \ } #include "internal-fn.def" +#undef DEF_INTERNAL_OPTAB_FN +#undef DEF_INTERNAL_SIGNED_OPTAB_FN /* Routines to expand each internal function, indexed by function number. Each routine has the prototype: @@ -4080,6 +4212,7 @@ set_edom_supported_p (void) where STMT is the statement that performs the call. */ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = { + #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) expand_##CODE, #include "internal-fn.def" 0 diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 7fe742c2ae713e7152ab05cfdfba86e4e0aa3456..e9edaa201ad4ad171a49119efa9d6bff49add9f4 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -85,6 +85,34 @@ along with GCC; see the file COPYING3. If not see says that the function extends the C-level BUILT_IN_{,L,LL,IMAX} group of functions to any integral mode (including vector modes). + DEF_INTERNAL_WIDENING_OPTAB_FN is a wrapper that defines five internal + functions with DEF_INTERNAL_SIGNED_OPTAB_FN: + - one that describes a widening operation with the same number of elements + in the output and input vectors, + - two that describe a pair of high-low widening operations where the output + vectors each have half the number of elements of the input vectors, + corresponding to the result of the widening operation on the top half and + bottom half, these have the suffixes _HI and _LO, + - and two that describe a pair of even-odd widening operations where the + output vectors each have half the number of elements of the input vectors, + corresponding to the result of the widening operation on the even and odd + elements, these have the suffixes _EVEN and _ODD. + These five internal functions will require two optabs each, a SIGNED_OPTAB + and an UNSIGNED_OTPAB. + + DEF_INTERNAL_NARROWING_OPTAB_FN is a wrapper that defines five internal + functions with DEF_INTERNAL_OPTAB_FN: + - one that describes a narrowing operation with the same number of elements + in the output and input vectors, + - two that describe a pair of high-low narrowing operations where the output + vector has the same number of elements in the top or bottom halves as the + full input vectors, these have the suffixes _HI and _LO. + - and two that describe a pair of even-odd narrowing operations where the + output vector has the same number of elements, in the even or odd positions, + as the full input vectors, these have the suffixes _EVEN and _ODD. + These five internal functions will require an optab each. + + Each entry must have a corresponding expander of the form: void expand_NAME (gimple_call stmt) @@ -123,6 +151,24 @@ along with GCC; see the file COPYING3. If not see DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE) #endif +#ifndef DEF_INTERNAL_WIDENING_OPTAB_FN +#define DEF_INTERNAL_WIDENING_OPTAB_FN(NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SOPTAB, UOPTAB, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME ## _LO, FLAGS, SELECTOR, SOPTAB##_lo, UOPTAB##_lo, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME ## _HI, FLAGS, SELECTOR, SOPTAB##_hi, UOPTAB##_hi, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME ## _EVEN, FLAGS, SELECTOR, SOPTAB##_even, UOPTAB##_even, TYPE) \ + DEF_INTERNAL_SIGNED_OPTAB_FN (NAME ## _ODD, FLAGS, SELECTOR, SOPTAB##_odd, UOPTAB##_odd, TYPE) +#endif + +#ifndef DEF_INTERNAL_NARROWING_OPTAB_FN +#define DEF_INTERNAL_NARROWING_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \ + DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE) \ + DEF_INTERNAL_OPTAB_FN (NAME ## _LO, FLAGS, OPTAB##_lo, TYPE) \ + DEF_INTERNAL_OPTAB_FN (NAME ## _HI, FLAGS, OPTAB##_hi, TYPE) \ + DEF_INTERNAL_OPTAB_FN (NAME ## _EVEN, FLAGS, OPTAB##_even, TYPE) \ + DEF_INTERNAL_OPTAB_FN (NAME ## _ODD, FLAGS, OPTAB##_odd, TYPE) +#endif + DEF_INTERNAL_OPTAB_FN (MASK_LOAD, ECF_PURE, maskload, mask_load) DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes) DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE, @@ -315,6 +361,16 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary) DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary) +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_PLUS, + ECF_CONST | ECF_NOTHROW, + first, + vec_widen_sadd, vec_widen_uadd, + binary) +DEF_INTERNAL_WIDENING_OPTAB_FN (VEC_WIDEN_MINUS, + ECF_CONST | ECF_NOTHROW, + first, + vec_widen_ssub, vec_widen_usub, + binary) DEF_INTERNAL_OPTAB_FN (VEC_FMADDSUB, ECF_CONST, vec_fmaddsub, ternary) DEF_INTERNAL_OPTAB_FN (VEC_FMSUBADD, ECF_CONST, vec_fmsubadd, ternary) diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 08922ed4254898f5fffca3f33973e96ed9ce772f..3904ba3ca36949d844532a6a9303f550533311a4 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -20,6 +20,10 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_INTERNAL_FN_H #define GCC_INTERNAL_FN_H +#include "insn-codes.h" +#include "insn-opinit.h" + + /* INTEGER_CST values for IFN_UNIQUE function arg-0. UNSPEC: Undifferentiated UNIQUE. @@ -112,6 +116,10 @@ internal_fn_name (enum internal_fn fn) } extern internal_fn lookup_internal_fn (const char *); +extern void lookup_hilo_internal_fn (internal_fn, internal_fn *, internal_fn *); +extern void lookup_evenodd_internal_fn (internal_fn, internal_fn *, + internal_fn *); +extern optab direct_internal_fn_optab (internal_fn, tree_pair); /* Return the ECF_* flags for function FN. */ @@ -210,6 +218,8 @@ extern bool commutative_binary_fn_p (internal_fn); extern bool commutative_ternary_fn_p (internal_fn); extern int first_commutative_argument (internal_fn); extern bool associative_binary_fn_p (internal_fn); +extern bool widening_fn_p (code_helper); +extern bool narrowing_fn_p (code_helper); extern bool set_edom_supported_p (void); diff --git a/gcc/optabs.cc b/gcc/optabs.cc index c8e39c82d57a7d726e7da33d247b80f32ec9236c..5a08d91e550b2d92e9572211f811fdba99a33a38 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -1314,7 +1314,15 @@ commutative_optab_p (optab binoptab) || binoptab == smul_widen_optab || binoptab == umul_widen_optab || binoptab == smul_highpart_optab - || binoptab == umul_highpart_optab); + || binoptab == umul_highpart_optab + || binoptab == vec_widen_saddl_hi_optab + || binoptab == vec_widen_saddl_lo_optab + || binoptab == vec_widen_uaddl_hi_optab + || binoptab == vec_widen_uaddl_lo_optab + || binoptab == vec_widen_sadd_hi_optab + || binoptab == vec_widen_sadd_lo_optab + || binoptab == vec_widen_uadd_hi_optab + || binoptab == vec_widen_uadd_lo_optab); } /* X is to be used in mode MODE as operand OPN to BINOPTAB. If we're diff --git a/gcc/optabs.def b/gcc/optabs.def index 695f5911b300c9ca5737de9be809fa01aabe5e01..d41ed6e1afaddd019c7470f965c0ad21c8b2b9d7 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -410,6 +410,16 @@ OPTAB_D (vec_widen_ssubl_hi_optab, "vec_widen_ssubl_hi_$a") OPTAB_D (vec_widen_ssubl_lo_optab, "vec_widen_ssubl_lo_$a") OPTAB_D (vec_widen_saddl_hi_optab, "vec_widen_saddl_hi_$a") OPTAB_D (vec_widen_saddl_lo_optab, "vec_widen_saddl_lo_$a") +OPTAB_D (vec_widen_ssub_optab, "vec_widen_ssub_$a") +OPTAB_D (vec_widen_ssub_hi_optab, "vec_widen_ssub_hi_$a") +OPTAB_D (vec_widen_ssub_lo_optab, "vec_widen_ssub_lo_$a") +OPTAB_D (vec_widen_ssub_odd_optab, "vec_widen_ssub_odd_$a") +OPTAB_D (vec_widen_ssub_even_optab, "vec_widen_ssub_even_$a") +OPTAB_D (vec_widen_sadd_optab, "vec_widen_sadd_$a") +OPTAB_D (vec_widen_sadd_hi_optab, "vec_widen_sadd_hi_$a") +OPTAB_D (vec_widen_sadd_lo_optab, "vec_widen_sadd_lo_$a") +OPTAB_D (vec_widen_sadd_odd_optab, "vec_widen_sadd_odd_$a") +OPTAB_D (vec_widen_sadd_even_optab, "vec_widen_sadd_even_$a") OPTAB_D (vec_widen_sshiftl_hi_optab, "vec_widen_sshiftl_hi_$a") OPTAB_D (vec_widen_sshiftl_lo_optab, "vec_widen_sshiftl_lo_$a") OPTAB_D (vec_widen_umult_even_optab, "vec_widen_umult_even_$a") @@ -422,6 +432,16 @@ OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a") OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a") OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a") OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a") +OPTAB_D (vec_widen_usub_optab, "vec_widen_usub_$a") +OPTAB_D (vec_widen_usub_hi_optab, "vec_widen_usub_hi_$a") +OPTAB_D (vec_widen_usub_lo_optab, "vec_widen_usub_lo_$a") +OPTAB_D (vec_widen_usub_odd_optab, "vec_widen_usub_odd_$a") +OPTAB_D (vec_widen_usub_even_optab, "vec_widen_usub_even_$a") +OPTAB_D (vec_widen_uadd_optab, "vec_widen_uadd_$a") +OPTAB_D (vec_widen_uadd_hi_optab, "vec_widen_uadd_hi_$a") +OPTAB_D (vec_widen_uadd_lo_optab, "vec_widen_uadd_lo_$a") +OPTAB_D (vec_widen_uadd_odd_optab, "vec_widen_uadd_odd_$a") +OPTAB_D (vec_widen_uadd_even_optab, "vec_widen_uadd_even_$a") OPTAB_D (vec_addsub_optab, "vec_addsub$a3") OPTAB_D (vec_fmaddsub_optab, "vec_fmaddsub$a4") OPTAB_D (vec_fmsubadd_optab, "vec_fmsubadd$a4") diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c index 220bd9352a4c7acd2e3713e441d74898d3e92b30..7037673d32bd780e1c9b58a51e58e2bac3b30b7e 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-add.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -save-temps" } */ +/* { dg-options "-O3 -save-temps -fdump-tree-vect-all" } */ #include #include @@ -86,6 +86,8 @@ main() return 0; } +/* { dg-final { scan-tree-dump "add new stmt.*VEC_WIDEN_PLUS_LO" "vect" } } */ +/* { dg-final { scan-tree-dump "add new stmt.*VEC_WIDEN_PLUS_HI" "vect" } } */ /* { dg-final { scan-assembler-times {\tuaddl\t} 1} } */ /* { dg-final { scan-assembler-times {\tuaddl2\t} 1} } */ /* { dg-final { scan-assembler-times {\tsaddl\t} 1} } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c index a2bed63affbd091977df95a126da1f5b8c1d41d2..83bc1edb6105f47114b665e24a13e6194b2179a2 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-sub.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -save-temps" } */ +/* { dg-options "-O3 -save-temps -fdump-tree-vect-all" } */ #include #include @@ -86,6 +86,8 @@ main() return 0; } +/* { dg-final { scan-tree-dump "add new stmt.*VEC_WIDEN_MINUS_LO" "vect" } } */ +/* { dg-final { scan-tree-dump "add new stmt.*VEC_WIDEN_MINUS_HI" "vect" } } */ /* { dg-final { scan-assembler-times {\tusubl\t} 1} } */ /* { dg-final { scan-assembler-times {\tusubl2\t} 1} } */ /* { dg-final { scan-assembler-times {\tssubl\t} 1} } */ diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 0aeebb67fac864db284985f4a6f0653af281d62b..0e847cd04ca6e33f67a86a78a36d35d42aba2627 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -65,6 +65,7 @@ along with GCC; see the file COPYING3. If not see #include "asan.h" #include "profile.h" #include "sreal.h" +#include "internal-fn.h" /* This file contains functions for building the Control Flow Graph (CFG) for a function tree. */ @@ -3411,6 +3412,40 @@ verify_gimple_call (gcall *stmt) debug_generic_stmt (fn); return true; } + internal_fn ifn = gimple_call_internal_fn (stmt); + if (ifn == IFN_LAST) + { + error ("gimple call has an invalid IFN"); + debug_generic_stmt (fn); + return true; + } + else if (widening_fn_p (ifn) + || narrowing_fn_p (ifn)) + { + tree lhs = gimple_get_lhs (stmt); + if (!lhs) + { + error ("vector IFN call with no lhs"); + debug_generic_stmt (fn); + return true; + } + + bool non_vector_operands = false; + for (unsigned i = 0; i < gimple_call_num_args (stmt); ++i) + if (!VECTOR_TYPE_P (TREE_TYPE (gimple_call_arg (stmt, i)))) + { + non_vector_operands = true; + break; + } + + if (non_vector_operands + || !VECTOR_TYPE_P (TREE_TYPE (lhs))) + { + error ("invalid non-vector operands in vector IFN call"); + debug_generic_stmt (fn); + return true; + } + } } else { diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index 63a19f8d1d89c6bd5d8e55a299cbffaa324b4b84..d74d8db2173b1ab117250fea89de5212d5e354ec 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -4433,7 +4433,20 @@ estimate_num_insns (gimple *stmt, eni_weights *weights) tree decl; if (gimple_call_internal_p (stmt)) - return 0; + { + internal_fn fn = gimple_call_internal_fn (stmt); + switch (fn) + { + case IFN_VEC_WIDEN_PLUS_HI: + case IFN_VEC_WIDEN_PLUS_LO: + case IFN_VEC_WIDEN_MINUS_HI: + case IFN_VEC_WIDEN_MINUS_LO: + return 1; + + default: + return 0; + } + } else if ((decl = gimple_call_fndecl (stmt)) && fndecl_built_in_p (decl)) { diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 1778af0242898e3dc73d94d22a5b8505628a53b5..dcd4b5561600346a2c10bd5133507329206e8837 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -562,21 +562,30 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type) static unsigned int vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, - tree_code widened_code, bool shift_p, + code_helper widened_code, bool shift_p, unsigned int max_nops, vect_unpromoted_value *unprom, tree *common_type, enum optab_subtype *subtype = NULL) { /* Check for an integer operation with the right code. */ - gassign *assign = dyn_cast (stmt_info->stmt); - if (!assign) + gimple* stmt = stmt_info->stmt; + if (!(is_gimple_assign (stmt) || is_gimple_call (stmt))) + return 0; + + code_helper rhs_code; + if (is_gimple_assign (stmt)) + rhs_code = gimple_assign_rhs_code (stmt); + else if (is_gimple_call (stmt)) + rhs_code = gimple_call_combined_fn (stmt); + else return 0; - tree_code rhs_code = gimple_assign_rhs_code (assign); - if (rhs_code != code && rhs_code != widened_code) + if (rhs_code != code + && rhs_code != widened_code) return 0; - tree type = TREE_TYPE (gimple_assign_lhs (assign)); + tree lhs = gimple_get_lhs (stmt); + tree type = TREE_TYPE (lhs); if (!INTEGRAL_TYPE_P (type)) return 0; @@ -589,7 +598,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, { vect_unpromoted_value *this_unprom = &unprom[next_op]; unsigned int nops = 1; - tree op = gimple_op (assign, i + 1); + tree op = gimple_arg (stmt, i); if (i == 1 && TREE_CODE (op) == INTEGER_CST) { /* We already have a common type from earlier operands. @@ -1343,7 +1352,8 @@ vect_recog_sad_pattern (vec_info *vinfo, /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi inside the loop (in case we are analyzing an outer-loop). */ vect_unpromoted_value unprom[2]; - if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR, + if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, + IFN_VEC_WIDEN_MINUS, false, 2, unprom, &half_type)) return NULL; @@ -1395,14 +1405,16 @@ static gimple * vect_recog_widen_op_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, tree *type_out, tree_code orig_code, code_helper wide_code, - bool shift_p, const char *name) + bool shift_p, const char *name, + optab_subtype *subtype = NULL) { gimple *last_stmt = last_stmt_info->stmt; vect_unpromoted_value unprom[2]; tree half_type; if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code, - shift_p, 2, unprom, &half_type)) + shift_p, 2, unprom, &half_type, subtype)) + return NULL; /* Pattern detected. */ @@ -1468,6 +1480,20 @@ vect_recog_widen_op_pattern (vec_info *vinfo, type, pattern_stmt, vecctype); } +static gimple * +vect_recog_widen_op_pattern (vec_info *vinfo, + stmt_vec_info last_stmt_info, tree *type_out, + tree_code orig_code, internal_fn wide_ifn, + bool shift_p, const char *name, + optab_subtype *subtype = NULL) +{ + combined_fn ifn = as_combined_fn (wide_ifn); + return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, + orig_code, ifn, shift_p, name, + subtype); +} + + /* Try to detect multiplication on widened inputs, converting MULT_EXPR to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */ @@ -1481,26 +1507,30 @@ vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, } /* Try to detect addition on widened inputs, converting PLUS_EXPR - to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */ + to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */ static gimple * vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, tree *type_out) { + optab_subtype subtype; return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, - PLUS_EXPR, WIDEN_PLUS_EXPR, false, - "vect_recog_widen_plus_pattern"); + PLUS_EXPR, IFN_VEC_WIDEN_PLUS, + false, "vect_recog_widen_plus_pattern", + &subtype); } /* Try to detect subtraction on widened inputs, converting MINUS_EXPR - to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */ + to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */ static gimple * vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, tree *type_out) { + optab_subtype subtype; return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, - MINUS_EXPR, WIDEN_MINUS_EXPR, false, - "vect_recog_widen_minus_pattern"); + MINUS_EXPR, IFN_VEC_WIDEN_MINUS, + false, "vect_recog_widen_minus_pattern", + &subtype); } /* Function vect_recog_ctz_ffs_pattern @@ -3078,7 +3108,7 @@ vect_recog_average_pattern (vec_info *vinfo, vect_unpromoted_value unprom[3]; tree new_type; unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR, - WIDEN_PLUS_EXPR, false, 3, + IFN_VEC_WIDEN_PLUS, false, 3, unprom, &new_type); if (nops == 0) return NULL; @@ -6469,6 +6499,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_mask_conversion_pattern, "mask_conversion" }, { vect_recog_widen_plus_pattern, "widen_plus" }, { vect_recog_widen_minus_pattern, "widen_minus" }, + /* These must come after the double widening ones. */ }; const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index d152ae9ab10b361b88c0f839d6951c43b954750a..132c0337b7f541bfb114c0a3d2abbeffdad79880 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -5038,7 +5038,8 @@ vectorizable_conversion (vec_info *vinfo, bool widen_arith = (code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR || code == WIDEN_MULT_EXPR - || code == WIDEN_LSHIFT_EXPR); + || code == WIDEN_LSHIFT_EXPR + || widening_fn_p (code)); if (!widen_arith && !CONVERT_EXPR_CODE_P (code) @@ -5088,8 +5089,8 @@ vectorizable_conversion (vec_info *vinfo, gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR || code == WIDEN_PLUS_EXPR - || code == WIDEN_MINUS_EXPR); - + || code == WIDEN_MINUS_EXPR + || widening_fn_p (code)); op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) : gimple_call_arg (stmt, 0); @@ -12478,26 +12479,69 @@ supportable_widening_operation (vec_info *vinfo, optab1 = vec_unpacks_sbool_lo_optab; optab2 = vec_unpacks_sbool_hi_optab; } - else - { - optab1 = optab_for_tree_code (c1, vectype, optab_default); - optab2 = optab_for_tree_code (c2, vectype, optab_default); + + vec_mode = TYPE_MODE (vectype); + if (widening_fn_p (code)) + { + /* If this is an internal fn then we must check whether the target + supports either a low-high split or an even-odd split. */ + internal_fn ifn = as_internal_fn ((combined_fn) code); + + internal_fn lo, hi, even, odd; + lookup_hilo_internal_fn (ifn, &lo, &hi); + *code1 = as_combined_fn (lo); + *code2 = as_combined_fn (hi); + optab1 = direct_internal_fn_optab (lo, {vectype, vectype}); + optab2 = direct_internal_fn_optab (hi, {vectype, vectype}); + + /* If we don't support low-high, then check for even-odd. */ + if (!optab1 + || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing + || !optab2 + || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) + { + lookup_evenodd_internal_fn (ifn, &even, &odd); + *code1 = as_combined_fn (even); + *code2 = as_combined_fn (odd); + optab1 = direct_internal_fn_optab (even, {vectype, vectype}); + optab2 = direct_internal_fn_optab (odd, {vectype, vectype}); + } + } + else if (code.is_tree_code ()) + { + if (code == FIX_TRUNC_EXPR) + { + /* The signedness is determined from output operand. */ + optab1 = optab_for_tree_code (c1, vectype_out, optab_default); + optab2 = optab_for_tree_code (c2, vectype_out, optab_default); + } + else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ()) + && VECTOR_BOOLEAN_TYPE_P (wide_vectype) + && VECTOR_BOOLEAN_TYPE_P (vectype) + && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype) + && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) + { + /* If the input and result modes are the same, a different optab + is needed where we pass in the number of units in vectype. */ + optab1 = vec_unpacks_sbool_lo_optab; + optab2 = vec_unpacks_sbool_hi_optab; + } + else + { + optab1 = optab_for_tree_code (c1, vectype, optab_default); + optab2 = optab_for_tree_code (c2, vectype, optab_default); + } + *code1 = c1; + *code2 = c2; } if (!optab1 || !optab2) return false; - vec_mode = TYPE_MODE (vectype); if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing) return false; - if (code.is_tree_code ()) - { - *code1 = c1; - *code2 = c2; - } - if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) diff --git a/gcc/tree.def b/gcc/tree.def index 90ceeec0b512bfa5f983359c0af03cc71de32007..b37b0b35927b92a6536e5c2d9805ffce8319a240 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1374,15 +1374,16 @@ DEFTREECODE (DOT_PROD_EXPR, "dot_prod_expr", tcc_expression, 3) DEFTREECODE (WIDEN_SUM_EXPR, "widen_sum_expr", tcc_binary, 2) /* Widening sad (sum of absolute differences). - The first two arguments are of type t1 which should be integer. - The third argument and the result are of type t2, such that t2 is at least - twice the size of t1. Like DOT_PROD_EXPR, SAD_EXPR (arg1,arg2,arg3) is + The first two arguments are of type t1 which should be a vector of integers. + The third argument and the result are of type t2, such that the size of + the elements of t2 is at least twice the size of the elements of t1. + Like DOT_PROD_EXPR, SAD_EXPR (arg1,arg2,arg3) is equivalent to: - tmp = WIDEN_MINUS_EXPR (arg1, arg2) + tmp = IFN_VEC_WIDEN_MINUS_EXPR (arg1, arg2) tmp2 = ABS_EXPR (tmp) arg3 = PLUS_EXPR (tmp2, arg3) or: - tmp = WIDEN_MINUS_EXPR (arg1, arg2) + tmp = IFN_VEC_WIDEN_MINUS_EXPR (arg1, arg2) tmp2 = ABS_EXPR (tmp) arg3 = WIDEN_SUM_EXPR (tmp2, arg3) */