diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index d166a0debedf4d8edf55c842bcf4ff4690b3e9ce..baf20416e63745097825fc30fdf2e66bc80d7d23 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5440,11 +5440,13 @@ Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand @item @samp{sdot_prod@var{m}} @cindex @code{udot_prod@var{m}} instruction pattern @itemx @samp{udot_prod@var{m}} +@cindex @code{usdot_prod@var{m}} instruction pattern +@itemx @samp{usdot_prod@var{m}} Compute the sum of the products of two signed/unsigned elements. -Operand 1 and operand 2 are of the same mode. Their product, which is of a -wider mode, is computed and added to operand 3. Operand 3 is of a mode equal or -wider than the mode of the product. The result is placed in operand 0, which -is of the same mode as operand 3. +Operand 1 and operand 2 are of the same mode but may differ in signs. Their +product, which is of a wider mode, is computed and added to operand 3. +Operand 3 is of a mode equal or wider than the mode of the product. The +result is placed in operand 0, which is of the same mode as operand 3. @cindex @code{ssad@var{m}} instruction pattern @item @samp{ssad@var{m}} diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h index c3aaa1a416991e856d3e24da45968a92ebada82c..ebc23ac86fe99057f375781c2f1990e0548ba08d 100644 --- a/gcc/optabs-tree.h +++ b/gcc/optabs-tree.h @@ -27,11 +27,29 @@ along with GCC; see the file COPYING3. If not see shift amount vs. machines that take a vector for the shift amount. */ enum optab_subtype { - optab_default, - optab_scalar, - optab_vector + optab_default = 1 << 0, + optab_scalar = 1 << 1, + optab_vector = 1 << 2, + optab_signed_to_unsigned = 1 << 3, + optab_unsigned_to_signed = 1 << 4 }; +/* Override the OrEqual-operator so we can use optab_subtype as a bit flag. */ +inline enum optab_subtype& +operator |= (enum optab_subtype& a, enum optab_subtype b) +{ + return a = static_cast(static_cast(a) + | static_cast(b)); +} + +/* Override the Or-operator so we can use optab_subtype as a bit flag. */ +inline enum optab_subtype +operator | (enum optab_subtype a, enum optab_subtype b) +{ + return static_cast(static_cast(a) + | static_cast(b)); +} + /* Return the optab used for computing the given operation on the type given by the second argument. The third argument distinguishes between the types of vector shifts and rotates. */ diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 95ffe397c23e80c105afea52e9d47216bf52f55a..2f60004545defc53182e004eea1e5c22b7453072 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -127,7 +127,17 @@ optab_for_tree_code (enum tree_code code, const_tree type, return TYPE_UNSIGNED (type) ? usum_widen_optab : ssum_widen_optab; case DOT_PROD_EXPR: - return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab; + { + gcc_assert (subtype & optab_default + || subtype & optab_vector + || subtype & optab_signed_to_unsigned + || subtype & optab_unsigned_to_signed); + + if (subtype & (optab_unsigned_to_signed | optab_signed_to_unsigned)) + return usdot_prod_optab; + + return (TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab); + } case SAD_EXPR: return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab; diff --git a/gcc/optabs.c b/gcc/optabs.c index f4614a394587787293dc8b680a38901f7906f61c..2e18b76de1412eab71971753ac678597c0d00098 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -262,6 +262,11 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, bool sbool = false; oprnd0 = ops->op0; + if (nops >= 2) + oprnd1 = ops->op1; + if (nops >= 3) + oprnd2 = ops->op2; + tmode0 = TYPE_MODE (TREE_TYPE (oprnd0)); if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) @@ -285,6 +290,27 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, ? vec_unpacks_sbool_hi_optab : vec_unpacks_sbool_lo_optab); sbool = true; } + else if (ops->code == DOT_PROD_EXPR) + { + enum optab_subtype subtype = optab_default; + signop sign1 = TYPE_SIGN (TREE_TYPE (oprnd0)); + signop sign2 = TYPE_SIGN (TREE_TYPE (oprnd1)); + if (sign1 == sign2) + ; + else if (sign1 == SIGNED && sign2 == UNSIGNED) + { + subtype |= optab_signed_to_unsigned; + /* Same as optab_unsigned_to_signed but flip the operands. */ + std::swap (op0, op1); + } + else if (sign1 == UNSIGNED && sign2 == SIGNED) + subtype |= optab_unsigned_to_signed; + else + gcc_unreachable (); + + widen_pattern_optab + = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), subtype); + } else widen_pattern_optab = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); @@ -298,10 +324,7 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, gcc_assert (icode != CODE_FOR_nothing); if (nops >= 2) - { - oprnd1 = ops->op1; - tmode1 = TYPE_MODE (TREE_TYPE (oprnd1)); - } + tmode1 = TYPE_MODE (TREE_TYPE (oprnd1)); else if (sbool) { nops = 2; @@ -316,7 +339,6 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, { gcc_assert (tmode1 == tmode0); gcc_assert (op1); - oprnd2 = ops->op2; wmode = TYPE_MODE (TREE_TYPE (oprnd2)); } diff --git a/gcc/optabs.def b/gcc/optabs.def index b192a9d070b8aa72e5676b2eaa020b5bdd7ffcc8..f470c2168378cec840edf7fbdb7c18615baae928 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -352,6 +352,7 @@ OPTAB_D (uavg_ceil_optab, "uavg$a3_ceil") OPTAB_D (sdot_prod_optab, "sdot_prod$I$a") OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3") OPTAB_D (udot_prod_optab, "udot_prod$I$a") +OPTAB_D (usdot_prod_optab, "usdot_prod$I$a") OPTAB_D (usum_widen_optab, "widen_usum$I$a3") OPTAB_D (usad_optab, "usad$I$a") OPTAB_D (ssad_optab, "ssad$I$a") diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 7e3aae5f9c28a49feedc7cc66e8ac0d476b9f28a..58b55bb648ad97d514f1fa18bb00808fd2678b42 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4421,7 +4421,8 @@ verify_gimple_assign_ternary (gassign *stmt) && !SCALAR_FLOAT_TYPE_P (rhs1_type)) || (!INTEGRAL_TYPE_P (lhs_type) && !SCALAR_FLOAT_TYPE_P (lhs_type)))) - || !types_compatible_p (rhs1_type, rhs2_type) + || (!types_compatible_p (rhs1_type, rhs2_type) + && TYPE_SIGN (rhs1_type) == TYPE_SIGN (rhs2_type)) || !useless_type_conversion_p (lhs_type, rhs3_type) || maybe_lt (GET_MODE_SIZE (element_mode (rhs3_type)), 2 * GET_MODE_SIZE (element_mode (rhs1_type)))) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 93fa2928e001c154bd4a9a73ac1dbbbf73c456df..cb8f5fbb6abca181c4171194d19fec29ec6e4176 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6401,6 +6401,33 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask, } } +/* Determine the optab_subtype to use for the given CODE and STMT. For + most CODE this will be optab_vector, however for certain operations such as + DOT_PROD_EXPR where the operation can different signs for the operands we + need to be able to pick the right optabs. */ + +static enum optab_subtype +vect_determine_dot_kind (tree_code code, stmt_vec_info stmt_vinfo) +{ + enum optab_subtype subtype = optab_vector; + switch (code) + { + case DOT_PROD_EXPR: + { + gassign *stmt = as_a (STMT_VINFO_STMT (stmt_vinfo)); + signop rhs1_sign = TYPE_SIGN (TREE_TYPE (gimple_assign_rhs1 (stmt))); + signop rhs2_sign = TYPE_SIGN (TREE_TYPE (gimple_assign_rhs2 (stmt))); + if (rhs1_sign != rhs2_sign) + subtype |= optab_unsigned_to_signed; + break; + } + default: + break; + } + + return subtype; +} + /* Function vectorizable_reduction. Check if STMT_INFO performs a reduction operation that can be vectorized. @@ -7189,7 +7216,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, bool ok = true; /* 4.1. check support for the operation in the loop */ - optab optab = optab_for_tree_code (code, vectype_in, optab_vector); + enum optab_subtype subtype = vect_determine_dot_kind (code, stmt_info); + optab optab = optab_for_tree_code (code, vectype_in, subtype); if (!optab) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 441d6cd28c4eaded7abd756164890dbcffd2f3b8..943c001fb13777b4d1513841fa84942316846d5e 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -201,7 +201,8 @@ vect_get_external_def_edge (vec_info *vinfo, tree var) static bool vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, tree itype, tree *vecotype_out, - tree *vecitype_out = NULL) + tree *vecitype_out = NULL, + enum optab_subtype subtype = optab_default) { tree vecitype = get_vectype_for_scalar_type (vinfo, itype); if (!vecitype) @@ -211,7 +212,7 @@ vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, if (!vecotype) return false; - optab optab = optab_for_tree_code (code, vecitype, optab_default); + optab optab = optab_for_tree_code (code, vecitype, subtype); if (!optab) return false; @@ -487,14 +488,31 @@ vect_joust_widened_integer (tree type, bool shift_p, tree op, } /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE - is narrower than type, storing the supertype in *COMMON_TYPE if so. */ + is narrower than type, storing the supertype in *COMMON_TYPE if so. + If ALLOW_SHORT_SIGN_MISMATCH then accept that *COMMON_TYPE and NEW_TYPE + may be of different signs but equal precision. */ static bool -vect_joust_widened_type (tree type, tree new_type, tree *common_type) +vect_joust_widened_type (tree type, tree new_type, tree *common_type, + bool allow_short_sign_mismatch = false) { if (types_compatible_p (*common_type, new_type)) return true; + /* Check if the mismatch is only in the sign and if we have + allow_short_sign_mismatch then allow it. */ + if (allow_short_sign_mismatch + && TYPE_SIGN (*common_type) != TYPE_SIGN (new_type)) + { + bool sign = TYPE_SIGN (*common_type) == UNSIGNED; + tree eq_type + = build_nonstandard_integer_type (TYPE_PRECISION (new_type), + sign); + + if (types_compatible_p (*common_type, eq_type)) + return true; + } + /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */ if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type)) && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type))) @@ -532,6 +550,9 @@ vect_joust_widened_type (tree type, tree new_type, tree *common_type) to a type that (a) is narrower than the result of STMT_INFO and (b) can hold all leaf operand values. + If ALLOW_SHORT_SIGN_MISMATCH then allow that the signs of the operands + may differ in signs but not in precision. + Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE exists. */ @@ -539,7 +560,8 @@ static unsigned int vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, tree_code widened_code, bool shift_p, unsigned int max_nops, - vect_unpromoted_value *unprom, tree *common_type) + vect_unpromoted_value *unprom, tree *common_type, + bool allow_short_sign_mismatch = false) { /* Check for an integer operation with the right code. */ gassign *assign = dyn_cast (stmt_info->stmt); @@ -600,7 +622,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, = vinfo->lookup_def (this_unprom->op); nops = vect_widened_op_tree (vinfo, def_stmt_info, code, widened_code, shift_p, max_nops, - this_unprom, common_type); + this_unprom, common_type, + allow_short_sign_mismatch); if (nops == 0) return 0; @@ -617,7 +640,8 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, if (i == 0) *common_type = this_unprom->type; else if (!vect_joust_widened_type (type, this_unprom->type, - common_type)) + common_type, + allow_short_sign_mismatch)) return 0; } } @@ -888,21 +912,24 @@ vect_reassociating_reduction_p (vec_info *vinfo, Try to find the following pattern: - type x_t, y_t; + type1a x_t + type1b y_t; TYPE1 prod; TYPE2 sum = init; loop: sum_0 = phi S1 x_t = ... S2 y_t = ... - S3 x_T = (TYPE1) x_t; - S4 y_T = (TYPE1) y_t; + S3 x_T = (TYPE3) x_t; + S4 y_T = (TYPE4) y_t; S5 prod = x_T * y_T; [S6 prod = (TYPE2) prod; #optional] S7 sum_1 = prod + sum_0; - where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the - same size of 'TYPE1' or bigger. This is a special case of a reduction + where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', + the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of + 'type1a' and 'type1b' can differ. 'TYPE2' is the same size of 'TYPE1' or + bigger and must be the same sign. This is a special case of a reduction computation. Input: @@ -939,15 +966,16 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, /* Look for the following pattern DX = (TYPE1) X; - DY = (TYPE1) Y; + DY = (TYPE2) Y; DPROD = DX * DY; - DDPROD = (TYPE2) DPROD; + DDPROD = (TYPE3) DPROD; sum_1 = DDPROD + sum_0; In which - DX is double the size of X - DY is double the size of Y - DX, DY, DPROD all have the same type but the sign - between DX, DY and DPROD can differ. + between DX, DY and DPROD can differ. The sign of DPROD + is one of the signs of DX or DY. - sum is the same size of DPROD or bigger - sum has been recognized as a reduction variable. @@ -986,14 +1014,41 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, inside the loop (in case we are analyzing an outer-loop). */ vect_unpromoted_value unprom0[2]; if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, - false, 2, unprom0, &half_type)) + false, 2, unprom0, &half_type, true)) return NULL; + /* Check to see if there is a sign change happening in the operands of the + multiplication and pick the appropriate optab subtype. */ + enum optab_subtype subtype; + tree rhs_type1 = unprom0[0].type; + tree rhs_type2 = unprom0[1].type; + if (TYPE_SIGN (rhs_type1) == TYPE_SIGN (rhs_type2)) + subtype = optab_default; + else if (TYPE_SIGN (rhs_type1) == SIGNED + && TYPE_SIGN (rhs_type2) == UNSIGNED) + subtype = optab_signed_to_unsigned; + else if (TYPE_SIGN (rhs_type1) == UNSIGNED + && TYPE_SIGN (rhs_type2) == SIGNED) + subtype = optab_unsigned_to_signed; + else + gcc_unreachable (); + + /* If we have a sign changing dot product we need to check that the + promoted type if unsigned has at least the same precision as the final + type of the dot-product. */ + if (subtype != optab_default) + { + tree mult_type = TREE_TYPE (unprom_mult.op); + if (TYPE_SIGN (mult_type) == UNSIGNED + && TYPE_PRECISION (mult_type) < TYPE_PRECISION (type)) + return NULL; + } + vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt); tree half_vectype; if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type, - type_out, &half_vectype)) + type_out, &half_vectype, subtype)) return NULL; /* Get the inputs in the appropriate types. */ @@ -1002,8 +1057,22 @@ vect_recog_dot_prod_pattern (vec_info *vinfo, unprom0, half_vectype); var = vect_recog_temp_ssa_var (type, NULL); + + /* If we have a sign changing dot-product the dot-product itself does any + sign conversions, so consume the type and use the unpromoted types. */ + tree mult_arg1, mult_arg2; + if (subtype == optab_default) + { + mult_arg1 = mult_oprnd[0]; + mult_arg2 = mult_oprnd[1]; + } + else + { + mult_arg1 = unprom0[0].op; + mult_arg2 = unprom0[1].op; + } pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, - mult_oprnd[0], mult_oprnd[1], oprnd1); + mult_arg1, mult_arg2, oprnd1); return pattern_stmt; }