Index: tree-vectorizer.h =================================================================== --- tree-vectorizer.h (revision 174475) +++ tree-vectorizer.h (working copy) @@ -896,7 +896,7 @@ extern void vect_slp_transform_bb (basic_block); /* Pattern recognition functions. Additional pattern recognition functions can (and will) be added in the future. */ -typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); +typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); #define NUM_PATTERNS 4 void vect_pattern_recog (loop_vec_info); Index: tree-vect-patterns.c =================================================================== --- tree-vect-patterns.c (revision 174475) +++ tree-vect-patterns.c (working copy) @@ -38,16 +38,11 @@ along with GCC; see the file COPYING3. If not see #include "recog.h" #include "diagnostic-core.h" -/* Function prototypes */ -static void vect_pattern_recog_1 - (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); -static bool widened_name_p (tree, gimple, tree *, gimple *); - /* Pattern recognition functions */ -static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); -static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); -static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); -static gimple vect_recog_pow_pattern (gimple, tree *, tree *); +static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); +static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); +static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); +static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, @@ -61,10 +56,12 @@ static vect_recog_func_ptr vect_vect_recog_func_pt is a result of a type-promotion, such that: DEF_STMT: NAME = NOP (name0) where the type of name0 (HALF_TYPE) is smaller than the type of NAME. -*/ + If CHECK_SIGN is TRUE, check that either both types are signed or both are + unsigned. */ static bool -widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) +widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, + bool check_sign) { tree dummy; gimple dummy_gimple; @@ -98,7 +95,7 @@ static bool *half_type = TREE_TYPE (oprnd0); if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) - || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) + || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) return false; @@ -168,12 +165,12 @@ vect_recog_temp_ssa_var (tree type, gimple stmt) inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; tree oprnd00, oprnd01; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); tree type, half_type; gimple pattern_stmt; tree prod_type; @@ -181,10 +178,10 @@ static gimple struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Look for the following pattern DX = (TYPE1) X; @@ -210,7 +207,7 @@ static gimple /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) @@ -231,14 +228,14 @@ static gimple if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - stmt = last_stmt; + stmt = *last_stmt; - if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) + if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) { stmt = def_stmt; oprnd0 = gimple_assign_rhs1 (stmt); @@ -247,7 +244,7 @@ static gimple half_type = type; } - /* So far so good. Since last_stmt was detected as a (summation) reduction, + /* So far so good. Since *last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a (widen_)mult_expr */ @@ -293,10 +290,10 @@ static gimple if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) return NULL; - if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) + if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) return NULL; oprnd00 = gimple_assign_rhs1 (def_stmt); - if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) + if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) return NULL; oprnd01 = gimple_assign_rhs1 (def_stmt); if (!types_compatible_p (half_type0, half_type1)) @@ -322,7 +319,7 @@ static gimple /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); return pattern_stmt; } @@ -342,24 +339,47 @@ static gimple where type 'TYPE' is at least double the size of type 'type'. + Also detect unsgigned cases: + + unsigned type a_t, b_t; + unsigned TYPE u_prod_T; + TYPE a_T, b_T, prod_T; + + S1 a_t = ; + S2 b_t = ; + S3 a_T = (TYPE) a_t; + S4 b_T = (TYPE) b_t; + S5 prod_T = a_T * b_T; + S6 u_prod_T = (unsigned TYPE) prod_T; + + and multiplication by constants: + + type a_t; + TYPE a_T, prod_T; + + S1 a_t = ; + S3 a_T = (TYPE) a_t; + S5 prod_T = a_T * CONST; + Input: - * LAST_STMT: A stmt from which the pattern search begins. In the example, - when this function is called with S5, the pattern {S3,S4,S5} is be detected. + * LAST_STMT: A stmt from which the pattern search begins. In the example, + when this function is called with S5, the pattern {S3,S4,S5,(S6)} is + detected. Output: * TYPE_IN: The type of the input arguments to the pattern. - * TYPE_OUT: The type of the output of this pattern. + * TYPE_OUT: The type of the output of this pattern. * Return value: A new stmt that will be used to replace the sequence of - stmts that constitute the pattern. In this case it will be: + stmts that constitute the pattern. In this case it will be: WIDEN_MULT */ static gimple -vect_recog_widen_mult_pattern (gimple last_stmt, +vect_recog_widen_mult_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { @@ -367,40 +387,111 @@ static gimple tree oprnd0, oprnd1; tree type, half_type0, half_type1; gimple pattern_stmt; - tree vectype, vectype_out; + tree vectype, vectype_out = NULL_TREE; tree dummy; tree var; enum tree_code dummy_code; int dummy_int; VEC (tree, heap) *dummy_vec; + bool op0_ok, op1_ok; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* Check argument 0 */ - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) - return NULL; - oprnd0 = gimple_assign_rhs1 (def_stmt0); + /* Check argument 0. */ + op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); + /* Check argument 1. */ + op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); - /* Check argument 1 */ - if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) + /* In case of multiplication by a constant one of the operands may not match + the pattern, but not both. */ + if (!op0_ok && !op1_ok) return NULL; - oprnd1 = gimple_assign_rhs1 (def_stmt1); + if (op0_ok && op1_ok) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else if (!op0_ok) + { + if (CONSTANT_CLASS_P (oprnd0) + && TREE_CODE (half_type1) == INTEGER_TYPE + && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) + && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) + { + /* OPRND0 is a constant of HALF_TYPE1. */ + half_type0 = half_type1; + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else + return NULL; + } + else if (!op1_ok) + { + if (CONSTANT_CLASS_P (oprnd1) + && TREE_CODE (half_type0) == INTEGER_TYPE + && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) + && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) + { + /* OPRND1 is a constant of HALF_TYPE0. */ + half_type1 = half_type0; + oprnd0 = gimple_assign_rhs1 (def_stmt0); + } + else + return NULL; + } + + /* Handle unsigned case. Look for + S6 u_prod_T = (unsigned TYPE) prod_T; + Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) + { + tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; + imm_use_iterator imm_iter; + use_operand_p use_p; + int nuses = 0; + gimple use_stmt = NULL; + tree use_type; + + if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) + return NULL; + + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + use_stmt = USE_STMT (use_p); + nuses++; + } + + if (nuses != 1 || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) + return NULL; + + use_lhs = gimple_assign_lhs (use_stmt); + use_type = TREE_TYPE (use_lhs); + if (!INTEGRAL_TYPE_P (use_type) + || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) + || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) + return NULL; + + type = use_type; + *last_stmt = use_stmt; + } + if (!types_compatible_p (half_type0, half_type1)) return NULL; @@ -413,7 +504,7 @@ static gimple vectype_out = get_vectype_for_scalar_type (type); if (!vectype || !vectype_out - || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, vectype_out, vectype, &dummy, &dummy, &dummy_code, &dummy_code, &dummy_int, &dummy_vec)) @@ -462,16 +553,16 @@ static gimple */ static gimple -vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { tree fn, base, exp = NULL; gimple stmt; tree var; - if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) + if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) return NULL; - fn = gimple_call_fndecl (last_stmt); + fn = gimple_call_fndecl (*last_stmt); if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) return NULL; @@ -481,8 +572,8 @@ static gimple case BUILT_IN_POWI: case BUILT_IN_POWF: case BUILT_IN_POW: - base = gimple_call_arg (last_stmt, 0); - exp = gimple_call_arg (last_stmt, 1); + base = gimple_call_arg (*last_stmt, 0); + exp = gimple_call_arg (*last_stmt, 1); if (TREE_CODE (exp) != REAL_CST && TREE_CODE (exp) != INTEGER_CST) return NULL; @@ -574,21 +665,21 @@ static gimple inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); tree type, half_type; gimple pattern_stmt; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Look for the following pattern DX = (TYPE) X; @@ -600,25 +691,25 @@ static gimple /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* So far so good. Since last_stmt was detected as a (summation) reduction, + /* So far so good. Since *last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a cast from type 'type' to type 'TYPE'. */ - if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) + if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) return NULL; oprnd0 = gimple_assign_rhs1 (stmt); @@ -639,7 +730,7 @@ static gimple /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); return pattern_stmt; } @@ -669,23 +760,27 @@ static gimple static void vect_pattern_recog_1 ( - gimple (* vect_recog_func) (gimple, tree *, tree *), + gimple (* vect_recog_func) (gimple *, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info stmt_info; stmt_vec_info pattern_stmt_info; - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + loop_vec_info loop_vinfo; tree pattern_vectype; tree type_in, type_out; enum tree_code code; int i; gimple next; - pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); + pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); if (!pattern_stmt) return; + si = gsi_for_stmt (stmt); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + if (VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern @@ -832,7 +927,7 @@ vect_pattern_recog (loop_vec_info loop_vinfo) unsigned int nbbs = loop->num_nodes; gimple_stmt_iterator si; unsigned int i, j; - gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); + gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_pattern_recog ==="); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 174475) +++ tree-vect-stmts.c (working copy) @@ -3232,6 +3232,33 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "use not simple."); return false; } + + op_type = TREE_CODE_LENGTH (code); + if (op_type == binary_op) + { + bool ok; + + op1 = gimple_assign_rhs2 (stmt); + if (code == WIDEN_MULT_EXPR) + { + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ + if (CONSTANT_CLASS_P (op0)) + ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, + &def_stmt, &def, &dt[1], &vectype_in); + else + ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, + &dt[1]); + + if (!ok) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; + } + } + } + /* If op0 is an external or constant def use a vector type with the same size as the output vector type. */ if (!vectype_in) @@ -3264,18 +3291,6 @@ vectorizable_type_promotion (gimple stmt, gimple_s gcc_assert (ncopies >= 1); - op_type = TREE_CODE_LENGTH (code); - if (op_type == binary_op) - { - op1 = gimple_assign_rhs2 (stmt); - if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "use not simple."); - return false; - } - } - /* Supportable by target? */ if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, &decl1, &decl2, &code1, &code2, @@ -3301,6 +3316,14 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", ncopies); + if (code == WIDEN_MULT_EXPR) + { + if (CONSTANT_CLASS_P (op0)) + op0 = fold_convert (TREE_TYPE (op1), op0); + else if (CONSTANT_CLASS_P (op1)) + op1 = fold_convert (TREE_TYPE (op0), op1); + } + /* Handle def. */ /* In case of multi-step promotion, we first generate promotion operations to the intermediate types, and then from that types to the final one.