diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c5006afc00d..f53068c5c53 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6088,13 +6088,13 @@ for the given scalar type @var{type}. @var{is_packed} is false if the scalar access using @var{type} is known to be naturally aligned. @end deftypefn -@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) +@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode @var{mode}, machine_mode @var{op_mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const vec_perm_indices @var{&sel}) This hook is used to test whether the target can permute up to two -vectors of mode @var{mode} using the permutation vector @code{sel}, and +vectors of mode @var{op_mode} using the permutation vector @code{sel}, and also to emit such a permutation. In the former case @var{in0}, @var{in1} and @var{out} are all null. In the latter case @var{in0} and @var{in1} are -the source vectors and @var{out} is the destination vector; all three are -operands of mode @var{mode}. @var{in1} is the same as @var{in0} if +the source vectors of mode @var{op_mode} and @var{out} is the destination vector +of mode @var{mode}. @var{in1} is the same as @var{in0} if @var{sel} describes a permutation on one vector instead of two. Return true if the operation is possible, emitting instructions for it diff --git a/gcc/match.pd b/gcc/match.pd index f5efa77560c..b06d7013603 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7703,12 +7703,14 @@ and, 2-argument version. */ tree oldop2 = op2; if (sel.ninputs () == 2 - || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) + || can_vec_perm_const_p (TYPE_MODE (type), TYPE_MODE (type), + sel, false)) op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); else { vec_perm_indices sel2 (builder, 2, nelts); - if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) + if (can_vec_perm_const_p (TYPE_MODE (type), TYPE_MODE (type), + sel2, false)) op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); else /* Not directly supported with either encoding, diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 68dc679cc6a..809482b8092 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -407,9 +407,9 @@ can_vec_perm_var_p (machine_mode mode) } /* Return true if the target directly supports VEC_PERM_EXPRs on vectors - of mode MODE using the selector SEL. ALLOW_VARIABLE_P is true if it - is acceptable to force the selector into a register and use a variable - permute (if the target supports that). + of mode OP_MODE and result vector of mode MODE using the selector SEL. + ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a + register and use a variable permute (if the target supports that). Note that additional permutations representing whole-vector shifts may also be handled via the vec_shr or vec_shl optab, but only where the @@ -417,8 +417,8 @@ can_vec_perm_var_p (machine_mode mode) with here. */ bool -can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, - bool allow_variable_p) +can_vec_perm_const_p (machine_mode mode, machine_mode op_mode, + const vec_perm_indices &sel, bool allow_variable_p) { /* If the target doesn't implement a vector mode for the vector type, then no operations are supported. */ @@ -448,7 +448,7 @@ can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, if (targetm.vectorize.vec_perm_const != NULL) { - if (targetm.vectorize.vec_perm_const (mode, NULL_RTX, NULL_RTX, + if (targetm.vectorize.vec_perm_const (mode, op_mode, NULL_RTX, NULL_RTX, NULL_RTX, sel)) return true; @@ -534,7 +534,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) + (i & ~1) + ((i & 1) ? nunits : 0)); vec_perm_indices indices (sel, 2, nunits); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return 2; } } @@ -550,7 +550,7 @@ can_mult_highpart_p (machine_mode mode, bool uns_p) for (unsigned int i = 0; i < 3; ++i) sel.quick_push (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1)); vec_perm_indices indices (sel, 2, nunits); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return 3; } } diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h index b9c9fd6f64d..32e52b20109 100644 --- a/gcc/optabs-query.h +++ b/gcc/optabs-query.h @@ -178,7 +178,7 @@ bool can_conditionally_move_p (machine_mode mode); opt_machine_mode qimode_for_vec_perm (machine_mode); bool selector_fits_mode_p (machine_mode, const vec_perm_indices &); bool can_vec_perm_var_p (machine_mode); -bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &, +bool can_vec_perm_const_p (machine_mode, machine_mode, const vec_perm_indices &, bool = true); /* Find a widening optab even if it doesn't widen as much as we want. */ #define find_widening_optab_handler(A, B, C) \ diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 3d8fa3abdfe..c0a68471d2d 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -6250,7 +6250,10 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, if (single_arg_p) v1 = v0; - if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices)) + gcc_checking_assert (GET_MODE (v0) == GET_MODE (v1)); + machine_mode op_mode = GET_MODE (v0); + if (targetm.vectorize.vec_perm_const (mode, op_mode, target, v0, v1, + indices)) return target; } @@ -6264,7 +6267,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, v0_qi = gen_lowpart (qimode, v0); v1_qi = gen_lowpart (qimode, v1); if (targetm.vectorize.vec_perm_const != NULL - && targetm.vectorize.vec_perm_const (qimode, target_qi, v0_qi, + && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi, v1_qi, qimode_indices)) return gen_lowpart (mode, target_qi); } diff --git a/gcc/target.def b/gcc/target.def index d85adf36a39..e8537b3eaf4 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1878,11 +1878,11 @@ access using @var{type} is known to be naturally aligned.", DEFHOOK (vec_perm_const, "This hook is used to test whether the target can permute up to two\n\ -vectors of mode @var{mode} using the permutation vector @code{sel}, and\n\ +vectors of mode @var{op_mode} using the permutation vector @code{sel}, and\n\ also to emit such a permutation. In the former case @var{in0}, @var{in1}\n\ and @var{out} are all null. In the latter case @var{in0} and @var{in1} are\n\ -the source vectors and @var{out} is the destination vector; all three are\n\ -operands of mode @var{mode}. @var{in1} is the same as @var{in0} if\n\ +the source vectors of mode @var{op_mode} and @var{out} is the destination vector\n\ +of mode @var{mode}. @var{in1} is the same as @var{in0} if\n\ @var{sel} describes a permutation on one vector instead of two.\n\ \n\ Return true if the operation is possible, emitting instructions for it\n\ @@ -1894,7 +1894,7 @@ try the equivalent byte operation. If that also fails, it will try forcing\n\ the selector into a register and using the @var{vec_perm@var{mode}}\n\ instruction pattern. There is no need for the hook to handle these two\n\ implementation approaches itself.", - bool, (machine_mode mode, rtx output, rtx in0, rtx in1, + bool, (machine_mode mode, machine_mode op_mode, rtx output, rtx in0, rtx in1, const vec_perm_indices &sel), NULL) diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 48cab5844e0..e253e848796 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -3016,7 +3016,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) : (elts[0].second == 0 && elts[0].first == 0 ? 0 : refnelts) + i); vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts); - if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices)) + machine_mode vmode = TYPE_MODE (perm_type); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), @@ -3065,7 +3066,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) sel.quick_push (elts[i].first ? elts[i].second + nelts : i); vec_perm_indices indices (sel, 2, nelts); - if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) + machine_mode vmode = TYPE_MODE (type); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; mask_type = build_vector_type (build_nonstandard_integer_type (elem_size, 1), diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 09223baf718..d20a10a1524 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -5347,7 +5347,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[3 * i + nelt2] = 0; } indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -5365,7 +5365,7 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[3 * i + nelt2] = nelt + j2++; } indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf (MSG_MISSED_OPTIMIZATION, @@ -5390,12 +5390,12 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) sel[i * 2 + 1] = i + nelt; } vec_perm_indices indices (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) { for (i = 0; i < 6; i++) sel[i] += exact_div (nelt, 2); indices.new_vector (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return true; } } @@ -5963,7 +5963,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, else sel[i] = 0; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5977,7 +5977,7 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, else sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++); indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6000,12 +6000,12 @@ vect_grouped_load_supported (tree vectype, bool single_element_p, for (i = 0; i < 3; i++) sel[i] = i * 2; vec_perm_indices indices (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) { for (i = 0; i < 3; i++) sel[i] = i * 2 + 1; indices.new_vector (sel, 2, nelt); - if (can_vec_perm_const_p (mode, indices)) + if (can_vec_perm_const_p (mode, mode, indices)) return true; } } @@ -6327,6 +6327,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, gimple *perm_stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); + machine_mode vmode = TYPE_MODE (vectype); unsigned int i; loop_vec_info loop_vinfo = dyn_cast (vinfo); @@ -6351,7 +6352,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2 + 1; vec_perm_indices indices (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6366,7 +6367,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt / 2; ++i) sel[nelt / 2 + i] = i * 2; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6381,7 +6382,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt; i++) sel[i] = nelt / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6397,7 +6398,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = nelt / 2; i < nelt; i++) sel[i] = nelt + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6461,7 +6462,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, k++; } vec_perm_indices indices (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6476,7 +6477,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6490,7 +6491,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + 1 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6504,7 +6505,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt; i++) sel[i] = (nelt / 3) + (nelt % 3) / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6518,7 +6519,7 @@ vect_shift_permute_load_chain (vec_info *vinfo, vec dr_chain, for (i = 0; i < nelt; i++) sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i; indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index d99e3207fbe..17de3375127 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1527,7 +1527,8 @@ lower_vec_perm (gimple_stmt_iterator *gsi) && tree_to_vec_perm_builder (&sel_int, mask)) { vec_perm_indices indices (sel_int, 2, elements); - if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices)) + machine_mode vmode = TYPE_MODE (vect_type); + if (can_vec_perm_const_p (vmode, vmode, indices)) { gimple_assign_set_rhs3 (stmt, mask); update_stmt (stmt); diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 1d4337eb261..11dc6cbf576 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -312,7 +312,8 @@ interleave_supported_p (vec_perm_indices *indices, tree vectype, sel.quick_push (base + i + nelts); } indices->new_vector (sel, 2, nelts); - return can_vec_perm_const_p (TYPE_MODE (vectype), *indices); + return can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), + *indices); } /* Try to use permutes to define the masks in DEST_RGM using the masks diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ab7dade1c74..7ac938b3c7b 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4527,7 +4527,7 @@ have_whole_vector_shift (machine_mode mode) { calc_vec_perm_mask_for_shift (i, nelt, &sel); indices.new_vector (sel, 2, nelt); - if (!can_vec_perm_const_p (mode, indices, false)) + if (!can_vec_perm_const_p (mode, mode, indices, false)) return false; } return true; diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 8c61eb965a6..51803ec97c2 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -2643,7 +2643,8 @@ vect_recog_rotate_pattern (vec_info *vinfo, vec_perm_indices indices (elts, 1, TYPE_VECTOR_SUBPARTS (char_vectype)); - if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) + machine_mode vmode = TYPE_MODE (char_vectype); + if (can_vec_perm_const_p (vmode, vmode, indices)) { /* vectorizable_bswap can handle the __builtin_bswap16 if we undo the argument promotion. */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index cdfff1ab9f6..fe9361c338e 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -420,8 +420,9 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, } vec_perm_indices indices1 (sel1, 2, nelts); vec_perm_indices indices2 (sel2, 2, nelts); - if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1) - && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2)) + machine_mode vmode = TYPE_MODE (vector_type); + if (can_vec_perm_const_p (vmode, vmode, indices1) + && can_vec_perm_const_p (vmode, vmode, indices2)) { if (nvectors_out) *nvectors_out = nvectors; @@ -6762,7 +6763,7 @@ vect_transform_slp_perm_load (vec_info *vinfo, if (index == count && !noop_p) { indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits); - if (!can_vec_perm_const_p (mode, indices)) + if (!can_vec_perm_const_p (mode, mode, indices)) { if (dump_enabled_p ()) { @@ -7122,8 +7123,9 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, { indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, nunits); bool identity_p = indices.series_p (0, 1, 0, 1); + machine_mode vmode = TYPE_MODE (vectype); if (!identity_p - && !can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + && !can_vec_perm_const_p (vmode, vmode, indices)) { if (dump_enabled_p ()) { diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 8327e9d047e..207d1097d5b 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2016,7 +2016,7 @@ perm_mask_for_reverse (tree vectype) sel.quick_push (nunits - 1 - i); vec_perm_indices indices (sel, 1, nunits); - if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) + if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), indices)) return NULL_TREE; return vect_gen_perm_mask_checked (vectype, indices); } @@ -3168,7 +3168,8 @@ vectorizable_bswap (vec_info *vinfo, elts.quick_push ((i + 1) * word_bytes - j - 1); vec_perm_indices indices (elts, 1, num_bytes); - if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) + machine_mode vmode = TYPE_MODE (char_vectype); + if (!can_vec_perm_const_p (vmode, vmode, indices)) return false; if (! vec_stmt) @@ -6712,7 +6713,7 @@ scan_store_can_perm_p (tree vectype, tree init, sel[j] = nunits + k; } vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); - if (!can_vec_perm_const_p (vec_mode, indices)) + if (!can_vec_perm_const_p (vec_mode, vec_mode, indices)) { if (i == units_log2) return -1; @@ -8582,7 +8583,8 @@ vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel) tree vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel) { - gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel)); + machine_mode vmode = TYPE_MODE (vectype); + gcc_assert (can_vec_perm_const_p (vmode, vmode, sel)); return vect_gen_perm_mask_any (vectype, sel); }