diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 6a17ef4..e22aa57 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -129,6 +129,9 @@ extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_vec_perm (rtx[]); extern bool ix86_expand_vec_perm_const (rtx[]); +extern bool ix86_expand_mask_vec_cmp (rtx[]); +extern bool ix86_expand_int_vec_cmp (rtx[]); +extern bool ix86_expand_fp_vec_cmp (rtx[]); extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 070605f..d17c350 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21440,8 +21440,8 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); if (optimize - || reg_overlap_mentioned_p (dest, op_true) - || reg_overlap_mentioned_p (dest, op_false)) + || (op_true && reg_overlap_mentioned_p (dest, op_true)) + || (op_false && reg_overlap_mentioned_p (dest, op_false))) dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); /* Compare patterns for int modes are unspec in AVX512F only. */ @@ -21713,34 +21713,127 @@ ix86_expand_fp_movcc (rtx operands[]) return true; } -/* Expand a floating-point vector conditional move; a vcond operation - rather than a movcc operation. */ +/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ + +static int +ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0; + case LT: + case LTU: + return 1; + case LE: + case LEU: + return 2; + case NE: + return 4; + case GE: + case GEU: + return 5; + case GT: + case GTU: + return 6; + default: + gcc_unreachable (); + } +} + +/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ + +static int +ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) +{ + switch (code) + { + case EQ: + return 0x08; + case NE: + return 0x04; + case GT: + return 0x16; + case LE: + return 0x1a; + case GE: + return 0x15; + case LT: + return 0x19; + default: + gcc_unreachable (); + } +} + +/* Return immediate value to be used in UNSPEC_PCMP + for comparison CODE in MODE. */ + +static int +ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) +{ + if (FLOAT_MODE_P (mode)) + return ix86_fp_cmp_code_to_pcmp_immediate (code); + return ix86_int_cmp_code_to_pcmp_immediate (code); +} + +/* Expand AVX-512 vector comparison. */ bool -ix86_expand_fp_vcond (rtx operands[]) +ix86_expand_mask_vec_cmp (rtx operands[]) { - enum rtx_code code = GET_CODE (operands[3]); + machine_mode mask_mode = GET_MODE (operands[0]); + machine_mode cmp_mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); + int unspec_code; + rtx unspec; + + switch (code) + { + case LEU: + case GTU: + case GEU: + case LTU: + unspec_code = UNSPEC_UNSIGNED_PCMP; + default: + unspec_code = UNSPEC_PCMP; + } + + unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], + operands[3], imm), + unspec_code); + emit_insn (gen_rtx_SET (operands[0], unspec)); + + return true; +} + +/* Expand fp vector comparison. */ + +bool +ix86_expand_fp_vec_cmp (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[1]); rtx cmp; code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[4], &operands[5]); + &operands[2], &operands[3]); if (code == UNKNOWN) { rtx temp; - switch (GET_CODE (operands[3])) + switch (GET_CODE (operands[1])) { case LTGT: - temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], + operands[3], NULL, NULL); code = AND; break; case UNEQ: - temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], - operands[5], operands[1], operands[2]); + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], + operands[3], NULL, NULL); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], + operands[3], NULL, NULL); code = IOR; break; default: @@ -21748,72 +21841,26 @@ ix86_expand_fp_vcond (rtx operands[]) } cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, OPTAB_DIRECT); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; } + else + cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], + operands[1], operands[2]); - if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], - operands[5], operands[1], operands[2])) - return true; + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); return true; } -/* Expand a signed/unsigned integral vector conditional move. */ - -bool -ix86_expand_int_vcond (rtx operands[]) +static rtx +ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, + rtx op_true, rtx op_false, bool *negate) { - machine_mode data_mode = GET_MODE (operands[0]); - machine_mode mode = GET_MODE (operands[4]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate = false; - rtx x, cop0, cop1; - - cop0 = operands[4]; - cop1 = operands[5]; + machine_mode data_mode = GET_MODE (dest); + machine_mode mode = GET_MODE (cop0); + rtx x; - /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 - and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ - if ((code == LT || code == GE) - && data_mode == mode - && cop1 == CONST0_RTX (mode) - && operands[1 + (code == LT)] == CONST0_RTX (data_mode) - && GET_MODE_UNIT_SIZE (data_mode) > 1 - && GET_MODE_UNIT_SIZE (data_mode) <= 8 - && (GET_MODE_SIZE (data_mode) == 16 - || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) - { - rtx negop = operands[2 - (code == LT)]; - int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; - if (negop == CONST1_RTX (data_mode)) - { - rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), - operands[0], 1, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - else if (GET_MODE_INNER (data_mode) != DImode - && vector_all_ones_operand (negop, data_mode)) - { - rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), - operands[0], 0, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - } - - if (!nonimmediate_operand (cop1, mode)) - cop1 = force_reg (mode, cop1); - if (!general_operand (operands[1], data_mode)) - operands[1] = force_reg (data_mode, operands[1]); - if (!general_operand (operands[2], data_mode)) - operands[2] = force_reg (data_mode, operands[2]); + *negate = false; /* XOP supports all of the comparisons on all 128-bit vector int types. */ if (TARGET_XOP @@ -21834,13 +21881,13 @@ ix86_expand_int_vcond (rtx operands[]) case LE: case LEU: code = reverse_condition (code); - negate = true; + *negate = true; break; case GE: case GEU: code = reverse_condition (code); - negate = true; + *negate = true; /* FALLTHRU */ case LT: @@ -21861,14 +21908,14 @@ ix86_expand_int_vcond (rtx operands[]) case EQ: /* SSE4.1 supports EQ. */ if (!TARGET_SSE4_1) - return false; + return NULL; break; case GT: case GTU: /* SSE4.2 supports GT/GTU. */ if (!TARGET_SSE4_2) - return false; + return NULL; break; default: @@ -21929,12 +21976,13 @@ ix86_expand_int_vcond (rtx operands[]) case V8HImode: /* Perform a parallel unsigned saturating subtraction. */ x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1))); + emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, + cop1))); cop0 = x; cop1 = CONST0_RTX (mode); code = EQ; - negate = !negate; + *negate = !*negate; break; default: @@ -21943,22 +21991,162 @@ ix86_expand_int_vcond (rtx operands[]) } } + if (*negate) + std::swap (op_true, op_false); + /* Allow the comparison to be done in one mode, but the movcc to happen in another mode. */ if (data_mode == mode) { - x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, - operands[1+negate], operands[2-negate]); + x = ix86_expand_sse_cmp (dest, code, cop0, cop1, + op_true, op_false); } else { gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, - operands[1+negate], operands[2-negate]); + op_true, op_false); if (GET_MODE (x) == mode) x = gen_lowpart (data_mode, x); } + return x; +} + +/* Expand integer vector comparison. */ + +bool +ix86_expand_int_vec_cmp (rtx operands[]) +{ + rtx_code code = GET_CODE (operands[1]); + bool negate = false; + rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], + operands[3], NULL, NULL, &negate); + + if (!cmp) + return false; + + if (negate) + cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, + CONST0_RTX (GET_MODE (cmp)), + NULL, NULL, &negate); + + gcc_assert (!negate); + + if (operands[0] != cmp) + emit_move_insn (operands[0], cmp); + + return true; +} + +/* Expand a floating-point vector conditional move; a vcond operation + rather than a movcc operation. */ + +bool +ix86_expand_fp_vcond (rtx operands[]) +{ + enum rtx_code code = GET_CODE (operands[3]); + rtx cmp; + + code = ix86_prepare_sse_fp_compare_args (operands[0], code, + &operands[4], &operands[5]); + if (code == UNKNOWN) + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } + + if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], + operands[5], operands[1], operands[2])) + return true; + + cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], + operands[1], operands[2]); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; +} + +/* Expand a signed/unsigned integral vector conditional move. */ + +bool +ix86_expand_int_vcond (rtx operands[]) +{ + machine_mode data_mode = GET_MODE (operands[0]); + machine_mode mode = GET_MODE (operands[4]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate = false; + rtx x, cop0, cop1; + + cop0 = operands[4]; + cop1 = operands[5]; + + /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 + and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ + if ((code == LT || code == GE) + && data_mode == mode + && cop1 == CONST0_RTX (mode) + && operands[1 + (code == LT)] == CONST0_RTX (data_mode) + && GET_MODE_UNIT_SIZE (data_mode) > 1 + && GET_MODE_UNIT_SIZE (data_mode) <= 8 + && (GET_MODE_SIZE (data_mode) == 16 + || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) + { + rtx negop = operands[2 - (code == LT)]; + int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; + if (negop == CONST1_RTX (data_mode)) + { + rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), + operands[0], 1, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + else if (GET_MODE_INNER (data_mode) != DImode + && vector_all_ones_operand (negop, data_mode)) + { + rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), + operands[0], 0, OPTAB_DIRECT); + if (res != operands[0]) + emit_move_insn (operands[0], res); + return true; + } + } + + if (!nonimmediate_operand (cop1, mode)) + cop1 = force_reg (mode, cop1); + if (!general_operand (operands[1], data_mode)) + operands[1] = force_reg (data_mode, operands[1]); + if (!general_operand (operands[2], data_mode)) + operands[2] = force_reg (data_mode, operands[2]); + + x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, + operands[1], operands[2], &negate); + + if (!x) + return false; + ix86_expand_sse_movcc (operands[0], x, operands[1+negate], operands[2-negate]); return true; @@ -51678,6 +51866,30 @@ ix86_autovectorize_vector_sizes (void) (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0; } +/* Implemenation of targetm.vectorize.get_mask_mode. */ + +static machine_mode +ix86_get_mask_mode (unsigned nunits, unsigned vector_size) +{ + /* Scalar mask case. */ + if (TARGET_AVX512F && vector_size == 64) + { + unsigned elem_size = vector_size / nunits; + if ((vector_size == 64 || TARGET_AVX512VL) + && ((elem_size == 4 || elem_size == 8) + || TARGET_AVX512BW)) + return smallest_mode_for_size (nunits, MODE_INT); + } + + unsigned elem_size = vector_size / nunits; + machine_mode elem_mode + = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT); + + gcc_assert (elem_size * nunits == vector_size); + + return mode_for_vector (elem_mode, nunits); +} + /* Return class of registers which could be used for pseudo of MODE @@ -52612,6 +52824,8 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load, #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ ix86_autovectorize_vector_sizes +#undef TARGET_VECTORIZE_GET_MASK_MODE +#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode #undef TARGET_VECTORIZE_INIT_COST #define TARGET_VECTORIZE_INIT_COST ix86_init_cost #undef TARGET_VECTORIZE_ADD_STMT_COST diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4535570..a8d55cc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -605,6 +605,15 @@ (V16SF "HI") (V8SF "QI") (V4SF "QI") (V8DF "QI") (V4DF "QI") (V2DF "QI")]) +;; Mapping of vector modes to corresponding mask size +(define_mode_attr avx512fmaskmodelower + [(V64QI "di") (V32QI "si") (V16QI "hi") + (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi") + (V16SI "hi") (V8SI "qi") (V4SI "qi") + (V8DI "qi") (V4DI "qi") (V2DI "qi") + (V16SF "hi") (V8SF "qi") (V4SF "qi") + (V8DF "qi") (V4DF "qi") (V2DF "qi")]) + ;; Mapping of vector float modes to an integer mode of the same size (define_mode_attr sseintvecmode [(V16SF "V16SI") (V8DF "V8DI") @@ -2803,6 +2812,150 @@ (const_string "0"))) (set_attr "mode" "")]) +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:V48_AVX512VL 2 "register_operand") + (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512F" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI12_AVX512VL 2 "register_operand") + (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512BW" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI_256 2 "register_operand") + (match_operand:VI_256 3 "nonimmediate_operand")]))] + "TARGET_AVX2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI124_128 2 "register_operand") + (match_operand:VI124_128 3 "nonimmediate_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpv2div2di" + [(set (match_operand:V2DI 0 "register_operand") + (match_operator:V2DI 1 "" + [(match_operand:V2DI 2 "register_operand") + (match_operand:V2DI 3 "nonimmediate_operand")]))] + "TARGET_SSE4_2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VF_256 2 "register_operand") + (match_operand:VF_256 3 "nonimmediate_operand")]))] + "TARGET_AVX" +{ + bool ok = ix86_expand_fp_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VF_128 2 "register_operand") + (match_operand:VF_128 3 "nonimmediate_operand")]))] + "TARGET_SSE" +{ + bool ok = ix86_expand_fp_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI48_AVX512VL 2 "register_operand") + (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512F" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI12_AVX512VL 2 "register_operand") + (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))] + "TARGET_AVX512BW" +{ + bool ok = ix86_expand_mask_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI_256 2 "register_operand") + (match_operand:VI_256 3 "nonimmediate_operand")]))] + "TARGET_AVX2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "" + [(match_operand:VI124_128 2 "register_operand") + (match_operand:VI124_128 3 "nonimmediate_operand")]))] + "TARGET_SSE2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpuv2div2di" + [(set (match_operand:V2DI 0 "register_operand") + (match_operator:V2DI 1 "" + [(match_operand:V2DI 2 "register_operand") + (match_operand:V2DI 3 "nonimmediate_operand")]))] + "TARGET_SSE4_2" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + (define_expand "vcond" [(set (match_operand:V_512 0 "register_operand") (if_then_else:V_512 @@ -17895,7 +18048,7 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "")]) -(define_expand "maskload" +(define_expand "maskload" [(set (match_operand:V48_AVX2 0 "register_operand") (unspec:V48_AVX2 [(match_operand: 2 "register_operand") @@ -17903,7 +18056,23 @@ UNSPEC_MASKMOV))] "TARGET_AVX") -(define_expand "maskstore" +(define_expand "maskload" + [(set (match_operand:V48_AVX512VL 0 "register_operand") + (vec_merge:V48_AVX512VL + (match_operand:V48_AVX512VL 1 "memory_operand") + (match_dup 0) + (match_operand: 2 "register_operand")))] + "TARGET_AVX512F") + +(define_expand "maskload" + [(set (match_operand:VI12_AVX512VL 0 "register_operand") + (vec_merge:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "memory_operand") + (match_dup 0) + (match_operand: 2 "register_operand")))] + "TARGET_AVX512BW") + +(define_expand "maskstore" [(set (match_operand:V48_AVX2 0 "memory_operand") (unspec:V48_AVX2 [(match_operand: 2 "register_operand") @@ -17912,6 +18081,22 @@ UNSPEC_MASKMOV))] "TARGET_AVX") +(define_expand "maskstore" + [(set (match_operand:V48_AVX512VL 0 "memory_operand") + (vec_merge:V48_AVX512VL + (match_operand:V48_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand: 2 "register_operand")))] + "TARGET_AVX512F") + +(define_expand "maskstore" + [(set (match_operand:VI12_AVX512VL 0 "memory_operand") + (vec_merge:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "register_operand") + (match_dup 0) + (match_operand: 2 "register_operand")))] + "TARGET_AVX512BW") + (define_insn_and_split "avx__" [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") (unspec:AVX256MODE2P