This patch splits the variable handling out of expand_vec_perm into a subroutine, so that the next patch can use a different interface for expanding constant permutes. expand_vec_perm now does all the CONST_VECTOR handling directly and defers to expand_vec_perm_var for other rtx codes. Handling CONST_VECTORs includes handling the fallback to variable permutes. The patch also adds an assert for valid optab modes to expand_vec_perm_1, so that we get it when using optabs for CONST_VECTORs. The MODE_VECTOR_INT part was previously in expand_vec_perm and the mode_for_int_vector part is new. Most of the patch is just reindentation, so I've attached a -b version. 2017-12-06 Richard Sandiford gcc/ * optabs.c (expand_vec_perm_1): Assert that SEL has an integer vector mode and that that mode matches the mode of the data being permuted. (expand_vec_perm): Split handling of non-CONST_VECTOR selectors out into expand_vec_perm_var. Do all CONST_VECTOR handling here, directly using expand_vec_perm_1 when forcing selectors into registers. (expand_vec_perm_var): New function, split out from expand_vec_perm. Index: gcc/optabs.c =================================================================== --- gcc/optabs.c 2017-12-09 22:47:14.731310077 +0000 +++ gcc/optabs.c 2017-12-09 22:47:23.878315657 +0000 @@ -5405,6 +5405,8 @@ expand_vec_perm_1 (enum insn_code icode, machine_mode smode = GET_MODE (sel); struct expand_operand ops[4]; + gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT + || mode_for_int_vector (tmode).require () == smode); create_output_operand (&ops[0], target, tmode); create_input_operand (&ops[3], sel, smode); @@ -5431,8 +5433,13 @@ expand_vec_perm_1 (enum insn_code icode, return NULL_RTX; } -/* Generate instructions for vec_perm optab given its mode - and three operands. */ +static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx); + +/* Implement a permutation of vectors v0 and v1 using the permutation + vector in SEL and return the result. Use TARGET to hold the result + if nonnull and convenient. + + MODE is the mode of the vectors being permuted (V0 and V1). */ rtx expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) @@ -5443,6 +5450,9 @@ expand_vec_perm (machine_mode mode, rtx rtx tmp, sel_qi = NULL; rtvec vec; + if (GET_CODE (sel) != CONST_VECTOR) + return expand_vec_perm_var (mode, v0, v1, sel, target); + if (!target || GET_MODE (target) != mode) target = gen_reg_rtx (mode); @@ -5455,86 +5465,125 @@ expand_vec_perm (machine_mode mode, rtx if (!qimode_for_vec_perm (mode).exists (&qimode)) qimode = VOIDmode; - /* If the input is a constant, expand it specially. */ - gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT); - if (GET_CODE (sel) == CONST_VECTOR) - { - /* See if this can be handled with a vec_shr. We only do this if the - second vector is all zeroes. */ - enum insn_code shift_code = optab_handler (vec_shr_optab, mode); - enum insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) - ? optab_handler (vec_shr_optab, qimode) - : CODE_FOR_nothing); - rtx shift_amt = NULL_RTX; - if (v1 == CONST0_RTX (GET_MODE (v1)) - && (shift_code != CODE_FOR_nothing - || shift_code_qi != CODE_FOR_nothing)) + /* See if this can be handled with a vec_shr. We only do this if the + second vector is all zeroes. */ + insn_code shift_code = optab_handler (vec_shr_optab, mode); + insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) + ? optab_handler (vec_shr_optab, qimode) + : CODE_FOR_nothing); + + if (v1 == CONST0_RTX (GET_MODE (v1)) + && (shift_code != CODE_FOR_nothing + || shift_code_qi != CODE_FOR_nothing)) + { + rtx shift_amt = shift_amt_for_vec_perm_mask (sel); + if (shift_amt) { - shift_amt = shift_amt_for_vec_perm_mask (sel); - if (shift_amt) + struct expand_operand ops[3]; + if (shift_code != CODE_FOR_nothing) { - struct expand_operand ops[3]; - if (shift_code != CODE_FOR_nothing) - { - create_output_operand (&ops[0], target, mode); - create_input_operand (&ops[1], v0, mode); - create_convert_operand_from_type (&ops[2], shift_amt, - sizetype); - if (maybe_expand_insn (shift_code, 3, ops)) - return ops[0].value; - } - if (shift_code_qi != CODE_FOR_nothing) - { - tmp = gen_reg_rtx (qimode); - create_output_operand (&ops[0], tmp, qimode); - create_input_operand (&ops[1], gen_lowpart (qimode, v0), - qimode); - create_convert_operand_from_type (&ops[2], shift_amt, - sizetype); - if (maybe_expand_insn (shift_code_qi, 3, ops)) - return gen_lowpart (mode, ops[0].value); - } + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], v0, mode); + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); + if (maybe_expand_insn (shift_code, 3, ops)) + return ops[0].value; + } + if (shift_code_qi != CODE_FOR_nothing) + { + rtx tmp = gen_reg_rtx (qimode); + create_output_operand (&ops[0], tmp, qimode); + create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode); + create_convert_operand_from_type (&ops[2], shift_amt, sizetype); + if (maybe_expand_insn (shift_code_qi, 3, ops)) + return gen_lowpart (mode, ops[0].value); } } + } - icode = direct_optab_handler (vec_perm_const_optab, mode); - if (icode != CODE_FOR_nothing) + icode = direct_optab_handler (vec_perm_const_optab, mode); + if (icode != CODE_FOR_nothing) + { + tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + if (tmp) + return tmp; + } + + /* Fall back to a constant byte-based permutation. */ + if (qimode != VOIDmode) + { + vec = rtvec_alloc (w); + for (i = 0; i < e; ++i) { - tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); - if (tmp) - return tmp; + unsigned int j, this_e; + + this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); + this_e &= 2 * e - 1; + this_e *= u; + + for (j = 0; j < u; ++j) + RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); } + sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); - /* Fall back to a constant byte-based permutation. */ - if (qimode != VOIDmode) + icode = direct_optab_handler (vec_perm_const_optab, qimode); + if (icode != CODE_FOR_nothing) { - vec = rtvec_alloc (w); - for (i = 0; i < e; ++i) - { - unsigned int j, this_e; + tmp = gen_reg_rtx (qimode); + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), + gen_lowpart (qimode, v1), sel_qi); + if (tmp) + return gen_lowpart (mode, tmp); + } + } - this_e = INTVAL (CONST_VECTOR_ELT (sel, i)); - this_e &= 2 * e - 1; - this_e *= u; + /* Otherwise expand as a fully variable permuation. */ - for (j = 0; j < u; ++j) - RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); - } - sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); + icode = direct_optab_handler (vec_perm_optab, mode); + if (icode != CODE_FOR_nothing) + { + rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel); + if (tmp) + return tmp; + } - icode = direct_optab_handler (vec_perm_const_optab, qimode); - if (icode != CODE_FOR_nothing) - { - tmp = mode != qimode ? gen_reg_rtx (qimode) : target; - tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), - gen_lowpart (qimode, v1), sel_qi); - if (tmp) - return gen_lowpart (mode, tmp); - } + if (qimode != VOIDmode) + { + icode = direct_optab_handler (vec_perm_optab, qimode); + if (icode != CODE_FOR_nothing) + { + rtx tmp = gen_reg_rtx (qimode); + tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0), + gen_lowpart (qimode, v1), sel_qi); + if (tmp) + return gen_lowpart (mode, tmp); } } - /* Otherwise expand as a fully variable permuation. */ + return NULL_RTX; +} + +/* Implement a permutation of vectors v0 and v1 using the permutation + vector in SEL and return the result. Use TARGET to hold the result + if nonnull and convenient. + + MODE is the mode of the vectors being permuted (V0 and V1). + SEL must have the integer equivalent of MODE and is known to be + unsuitable for permutes with a constant permutation vector. */ + +static rtx +expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) +{ + enum insn_code icode; + unsigned int i, w, u; + rtx tmp, sel_qi; + rtvec vec; + + w = GET_MODE_SIZE (mode); + u = GET_MODE_UNIT_SIZE (mode); + + if (!target || GET_MODE (target) != mode) + target = gen_reg_rtx (mode); + icode = direct_optab_handler (vec_perm_optab, mode); if (icode != CODE_FOR_nothing) { @@ -5545,50 +5594,47 @@ expand_vec_perm (machine_mode mode, rtx /* As a special case to aid several targets, lower the element-based permutation to a byte-based permutation and try again. */ - if (qimode == VOIDmode) + machine_mode qimode; + if (!qimode_for_vec_perm (mode).exists (&qimode)) return NULL_RTX; icode = direct_optab_handler (vec_perm_optab, qimode); if (icode == CODE_FOR_nothing) return NULL_RTX; - if (sel_qi == NULL) + /* Multiply each element by its byte size. */ + machine_mode selmode = GET_MODE (sel); + if (u == 2) + sel = expand_simple_binop (selmode, PLUS, sel, sel, + NULL, 0, OPTAB_DIRECT); + else + sel = expand_simple_binop (selmode, ASHIFT, sel, GEN_INT (exact_log2 (u)), + NULL, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + /* Broadcast the low byte each element into each of its bytes. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) { - /* Multiply each element by its byte size. */ - machine_mode selmode = GET_MODE (sel); - if (u == 2) - sel = expand_simple_binop (selmode, PLUS, sel, sel, - NULL, 0, OPTAB_DIRECT); - else - sel = expand_simple_binop (selmode, ASHIFT, sel, - GEN_INT (exact_log2 (u)), - NULL, 0, OPTAB_DIRECT); - gcc_assert (sel != NULL); - - /* Broadcast the low byte each element into each of its bytes. */ - vec = rtvec_alloc (w); - for (i = 0; i < w; ++i) - { - int this_e = i / u * u; - if (BYTES_BIG_ENDIAN) - this_e += u - 1; - RTVEC_ELT (vec, i) = GEN_INT (this_e); - } - tmp = gen_rtx_CONST_VECTOR (qimode, vec); - sel = gen_lowpart (qimode, sel); - sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); - gcc_assert (sel != NULL); - - /* Add the byte offset to each byte element. */ - /* Note that the definition of the indicies here is memory ordering, - so there should be no difference between big and little endian. */ - vec = rtvec_alloc (w); - for (i = 0; i < w; ++i) - RTVEC_ELT (vec, i) = GEN_INT (i % u); - tmp = gen_rtx_CONST_VECTOR (qimode, vec); - sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, - sel, 0, OPTAB_DIRECT); - gcc_assert (sel_qi != NULL); + int this_e = i / u * u; + if (BYTES_BIG_ENDIAN) + this_e += u - 1; + RTVEC_ELT (vec, i) = GEN_INT (this_e); } + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel = gen_lowpart (qimode, sel); + sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); + gcc_assert (sel != NULL); + + /* Add the byte offset to each byte element. */ + /* Note that the definition of the indicies here is memory ordering, + so there should be no difference between big and little endian. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) + RTVEC_ELT (vec, i) = GEN_INT (i % u); + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, + sel, 0, OPTAB_DIRECT); + gcc_assert (sel_qi != NULL); tmp = mode != qimode ? gen_reg_rtx (qimode) : target; tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),