diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a613291..5f0b725 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11707,6 +11707,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, tree arg = CALL_EXPR_ARG (exp, i); rtx op = expand_normal (arg); machine_mode mode = insn_p->operand[i + 1].mode; + /* Need to fixup modeless constant before testing predicate. */ + op = fixup_modeless_constant (op, mode); bool match = insn_p->operand[i + 1].predicate (op, mode); if (second_arg_count && i == 1) @@ -11873,13 +11875,15 @@ ix86_expand_args_builtin (const struct builtin_description *d, /* If we aren't optimizing, only allow one memory operand to be generated. */ if (memory_operand (op, mode)) - num_memory++; - - op = fixup_modeless_constant (op, mode); + { + num_memory++; + if (!optimize && num_memory > 1) + op = copy_to_mode_reg (mode, op); + } if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) { - if (optimize || !match || num_memory > 1) + if (!match) op = copy_to_mode_reg (mode, op); } else @@ -25480,4 +25484,548 @@ ix86_expand_fast_convert_bf_to_sf (rtx val) return ret; } +/* Attempt to convert a CONST_VECTOR into a bcst_mem_operand. + Returns NULL_RTX if X is cannot be expressed as a suitable + VEC_DUPLICATE in mode MODE. */ + +static rtx +ix86_gen_bcst_mem (machine_mode mode, rtx x) +{ + if (!TARGET_AVX512F + || GET_CODE (x) != CONST_VECTOR + || (!TARGET_AVX512VL + && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512)) + || !VALID_BCST_MODE_P (GET_MODE_INNER (mode)) + /* Disallow HFmode broadcast. */ + || GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4) + return NULL_RTX; + + rtx cst = CONST_VECTOR_ELT (x, 0); + if (!CONST_SCALAR_INT_P (cst) + && !CONST_DOUBLE_P (cst) + && !CONST_FIXED_P (cst)) + return NULL_RTX; + + int n_elts = GET_MODE_NUNITS (mode); + if (CONST_VECTOR_NUNITS (x) != n_elts) + return NULL_RTX; + + for (int i = 1; i < n_elts; i++) + if (!rtx_equal_p (cst, CONST_VECTOR_ELT (x, i))) + return NULL_RTX; + + rtx mem = force_const_mem (GET_MODE_INNER (mode), cst); + return gen_rtx_VEC_DUPLICATE (mode, validize_mem (mem)); +} + +/* Determine the ternlog immediate index that implements 3-operand + ternary logic expression OP. This uses and modifies the 3 element + array ARGS to record and check the leaves, either 3 REGs, or 2 REGs + and MEM. Returns an index between 0 and 255 for a valid ternlog, + or -1 if the expression isn't suitable. */ + +int +ix86_ternlog_idx (rtx op, rtx *args) +{ + int idx0, idx1; + + if (!op) + return -1; + + switch (GET_CODE (op)) + { + case REG: + if (!args[0]) + { + args[0] = op; + return 0xf0; + } + if (REGNO (op) == REGNO (args[0])) + return 0xf0; + if (!args[1]) + { + args[1] = op; + return 0xcc; + } + if (REGNO (op) == REGNO (args[1])) + return 0xcc; + if (!args[2]) + { + args[2] = op; + return 0xaa; + } + if (REG_P (args[2]) && REGNO (op) == REGNO (args[2])) + return 0xaa; + return -1; + + case VEC_DUPLICATE: + if (!bcst_mem_operand (op, GET_MODE (op))) + return -1; + /* FALLTHRU */ + + case MEM: + if (MEM_P (op) + && MEM_VOLATILE_P (op) + && !volatile_ok) + return -1; + /* FALLTHRU */ + + case CONST_VECTOR: + if (!args[2]) + { + args[2] = op; + return 0xaa; + } + /* Maximum of one volatile memory reference per expression. */ + if (side_effects_p (op) && side_effects_p (args[2])) + return -1; + if (rtx_equal_p (op, args[2])) + return 0xaa; + /* Check if one CONST_VECTOR is the ones-complement of the other. */ + if (GET_CODE (op) == CONST_VECTOR + && GET_CODE (args[2]) == CONST_VECTOR + && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op), + op, GET_MODE (op)), + args[2])) + return 0x55; + return -1; + + case SUBREG: + if (GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) + != GET_MODE_SIZE (GET_MODE (op))) + return -1; + return ix86_ternlog_idx (SUBREG_REG (op), args); + + case NOT: + idx0 = ix86_ternlog_idx (XEXP (op, 0), args); + return (idx0 >= 0) ? idx0 ^ 0xff : -1; + + case AND: + idx0 = ix86_ternlog_idx (XEXP (op, 0), args); + if (idx0 < 0) + return -1; + idx1 = ix86_ternlog_idx (XEXP (op, 1), args); + return (idx1 >= 0) ? idx0 & idx1 : -1; + + case IOR: + idx0 = ix86_ternlog_idx (XEXP (op, 0), args); + if (idx0 < 0) + return -1; + idx1 = ix86_ternlog_idx (XEXP (op, 1), args); + return (idx1 >= 0) ? idx0 | idx1 : -1; + + case XOR: + idx0 = ix86_ternlog_idx (XEXP (op, 0), args); + if (idx0 < 0) + return -1; + if (vector_all_ones_operand (XEXP (op, 1), GET_MODE (op))) + return idx0 ^ 0xff; + idx1 = ix86_ternlog_idx (XEXP (op, 1), args); + return (idx1 >= 0) ? idx0 ^ idx1 : -1; + + case UNSPEC: + if (XINT (op, 1) != UNSPEC_VTERNLOG + || XVECLEN (op, 0) != 4 + || !CONST_INT_P (XVECEXP (op, 0, 3))) + return -1; + + /* TODO: Handle permuted operands. */ + if (ix86_ternlog_idx (XVECEXP (op, 0, 0), args) != 0xf0 + || ix86_ternlog_idx (XVECEXP (op, 0, 1), args) != 0xcc + || ix86_ternlog_idx (XVECEXP (op, 0, 2), args) != 0xaa) + return -1; + return INTVAL (XVECEXP (op, 0, 3)); + + default: + return -1; + } +} + +/* Return TRUE if OP (in mode MODE) is the leaf of a ternary logic + expression, such as a register or a memory reference. */ + +bool +ix86_ternlog_leaf_p (rtx op, machine_mode mode) +{ + /* We can't use memory_operand here, as it may return a different + value before and after reload (for volatile MEMs) which creates + problems splitting instructions. */ + return register_operand (op, mode) + || MEM_P (op) + || GET_CODE (op) == CONST_VECTOR + || bcst_mem_operand (op, mode); +} + +/* Test whether OP is a 3-operand ternary logic expression suitable + for use in a ternlog instruction. */ + +bool +ix86_ternlog_operand_p (rtx op) +{ + rtx op0, op1; + rtx args[3]; + + args[0] = NULL_RTX; + args[1] = NULL_RTX; + args[2] = NULL_RTX; + int idx = ix86_ternlog_idx (op, args); + if (idx < 0) + return false; + + /* Don't match simple (binary or unary) expressions. */ + machine_mode mode = GET_MODE (op); + switch (GET_CODE (op)) + { + case AND: + op0 = XEXP (op, 0); + op1 = XEXP (op, 1); + + /* Prefer pand. */ + if (ix86_ternlog_leaf_p (op0, mode) + && ix86_ternlog_leaf_p (op1, mode)) + return false; + /* Prefer pandn. */ + if (GET_CODE (op0) == NOT + && register_operand (XEXP (op0, 0), mode) + && ix86_ternlog_leaf_p (op1, mode)) + return false; + break; + + case IOR: + /* Prefer por. */ + if (ix86_ternlog_leaf_p (XEXP (op, 0), mode) + && ix86_ternlog_leaf_p (XEXP (op, 1), mode)) + return false; + break; + + case XOR: + op1 = XEXP (op, 1); + /* Prefer pxor, or one_cmpl2. */ + if (ix86_ternlog_leaf_p (XEXP (op, 0), mode) + && (ix86_ternlog_leaf_p (op1, mode) + || vector_all_ones_operand (op1, mode))) + return false; + break; + + default: + break; + } + return true; +} + +/* Helper function for ix86_expand_ternlog. */ +static rtx +ix86_expand_ternlog_binop (enum rtx_code code, machine_mode mode, + rtx op0, rtx op1, rtx target) +{ + if (GET_MODE (op0) != mode) + op0 = gen_lowpart (mode, op0); + if (GET_MODE (op1) != mode) + op1 = gen_lowpart (mode, op1); + + if (GET_CODE (op0) == CONST_VECTOR) + op0 = validize_mem (force_const_mem (mode, op0)); + if (GET_CODE (op1) == CONST_VECTOR) + op1 = validize_mem (force_const_mem (mode, op1)); + + if (memory_operand (op0, mode)) + { + if (memory_operand (op1, mode)) + op0 = force_reg (mode, op0); + else + std::swap (op0, op1); + } + rtx ops[3] = { target, op0, op1 }; + ix86_expand_vector_logical_operator (code, mode, ops); + return target; +} + + +/* Helper function for ix86_expand_ternlog. */ +static rtx +ix86_expand_ternlog_andnot (machine_mode mode, rtx op0, rtx op1, rtx target) +{ + if (GET_MODE (op0) != mode) + op0 = gen_lowpart (mode, op0); + op0 = gen_rtx_NOT (mode, op0); + if (GET_MODE (op1) != mode) + op1 = gen_lowpart (mode, op1); + emit_move_insn (target, gen_rtx_AND (mode, op0, op1)); + return target; +} + +/* Expand a 3-operand ternary logic expression. Return TARGET. */ +rtx +ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx, + rtx target) +{ + rtx tmp0, tmp1, tmp2; + + if (!target) + target = gen_reg_rtx (mode); + + /* Canonicalize ternlog index for degenerate (duplicated) operands. */ + if (rtx_equal_p (op0, op1) && rtx_equal_p (op0, op2)) + switch (idx & 0x81) + { + case 0x00: + idx = 0x00; + break; + case 0x01: + idx = 0x0f; + break; + case 0x80: + idx = 0xf0; + break; + case 0x81: + idx = 0xff; + break; + } + + switch (idx & 0xff) + { + case 0x00: + if ((!op0 || !side_effects_p (op0)) + && (!op1 || !side_effects_p (op1)) + && (!op2 || !side_effects_p (op2))) + { + emit_move_insn (target, CONST0_RTX (mode)); + return target; + } + break; + + case 0x0a: /* ~a&c */ + if ((!op1 || !side_effects_p (op1)) + && op0 && register_operand (op0, mode) + && op2 && register_operand (op2, mode)) + return ix86_expand_ternlog_andnot (mode, op0, op2, target); + break; + + case 0x0c: /* ~a&b */ + if ((!op2 || !side_effects_p (op2)) + && op0 && register_operand (op0, mode) + && op1 && register_operand (op1, mode)) + return ix86_expand_ternlog_andnot (mode, op0, op1, target); + break; + + case 0x0f: /* ~a */ + if ((!op1 || !side_effects_p (op1)) + && (!op2 || !side_effects_p (op2)) + && op0) + { + if (GET_MODE (op0) != mode) + op0 = gen_lowpart (mode, op0); + if (!TARGET_64BIT && !register_operand (op0, mode)) + op0 = force_reg (mode, op0); + emit_move_insn (target, gen_rtx_XOR (mode, op0, CONSTM1_RTX (mode))); + return target; + } + break; + + case 0x22: /* ~b&c */ + if ((!op0 || !side_effects_p (op0)) + && op1 && register_operand (op1, mode) + && op2 && register_operand (op2, mode)) + return ix86_expand_ternlog_andnot (mode, op1, op2, target); + break; + + case 0x30: /* ~b&a */ + if ((!op2 || !side_effects_p (op2)) + && op0 && register_operand (op0, mode) + && op1 && register_operand (op1, mode)) + return ix86_expand_ternlog_andnot (mode, op1, op0, target); + break; + + case 0x33: /* ~b */ + if ((!op0 || !side_effects_p (op0)) + && (!op2 || !side_effects_p (op2)) + && op1) + { + if (GET_MODE (op1) != mode) + op1 = gen_lowpart (mode, op1); + if (!TARGET_64BIT && !register_operand (op1, mode)) + op1 = force_reg (mode, op1); + emit_move_insn (target, gen_rtx_XOR (mode, op1, CONSTM1_RTX (mode))); + return target; + } + break; + + case 0x3c: /* a^b */ + if (op0 && op1 + && (!op2 || !side_effects_p (op2))) + return ix86_expand_ternlog_binop (XOR, mode, op0, op1, target); + break; + + case 0x44: /* ~c&b */ + if ((!op0 || !side_effects_p (op0)) + && op1 && register_operand (op1, mode) + && op2 && register_operand (op2, mode)) + return ix86_expand_ternlog_andnot (mode, op2, op1, target); + break; + + case 0x50: /* ~c&a */ + if ((!op1 || !side_effects_p (op1)) + && op0 && register_operand (op0, mode) + && op2 && register_operand (op2, mode)) + return ix86_expand_ternlog_andnot (mode, op2, op0, target); + break; + + case 0x55: /* ~c */ + if ((!op0 || !side_effects_p (op0)) + && (!op1 || !side_effects_p (op1)) + && op2) + { + if (GET_MODE (op2) != mode) + op2 = gen_lowpart (mode, op2); + if (!TARGET_64BIT && !register_operand (op2, mode)) + op2 = force_reg (mode, op2); + emit_move_insn (target, gen_rtx_XOR (mode, op2, CONSTM1_RTX (mode))); + return target; + } + break; + + case 0x5a: /* a^c */ + if (op0 && op2 + && (!op1 || !side_effects_p (op1))) + return ix86_expand_ternlog_binop (XOR, mode, op0, op2, target); + break; + + case 0x66: /* b^c */ + if ((!op0 || !side_effects_p (op0)) + && op1 && op2) + return ix86_expand_ternlog_binop (XOR, mode, op1, op2, target); + break; + + case 0x88: /* b&c */ + if ((!op0 || !side_effects_p (op0)) + && op1 && op2) + return ix86_expand_ternlog_binop (AND, mode, op1, op2, target); + break; + + case 0xa0: /* a&c */ + if ((!op1 || !side_effects_p (op1)) + && op0 && op2) + return ix86_expand_ternlog_binop (AND, mode, op0, op2, target); + break; + + case 0xaa: /* c */ + if ((!op0 || !side_effects_p (op0)) + && (!op1 || !side_effects_p (op1)) + && op2) + { + if (GET_MODE (op2) != mode) + op2 = gen_lowpart (mode, op2); + emit_move_insn (target, op2); + return target; + } + break; + + case 0xc0: /* a&b */ + if (op0 && op1 + && (!op2 || !side_effects_p (op2))) + return ix86_expand_ternlog_binop (AND, mode, op0, op1, target); + break; + + case 0xcc: /* b */ + if ((!op0 || !side_effects_p (op0)) + && op1 + && (!op2 || !side_effects_p (op2))) + { + if (GET_MODE (op1) != mode) + op1 = gen_lowpart (mode, op1); + emit_move_insn (target, op1); + return target; + } + break; + + case 0xee: /* b|c */ + if ((!op0 || !side_effects_p (op0)) + && op1 && op2) + return ix86_expand_ternlog_binop (IOR, mode, op1, op2, target); + break; + + case 0xf0: /* a */ + if (op0 + && (!op1 || !side_effects_p (op1)) + && (!op2 || !side_effects_p (op2))) + { + if (GET_MODE (op0) != mode) + op0 = gen_lowpart (mode, op0); + emit_move_insn (target, op0); + return target; + } + break; + + case 0xfa: /* a|c */ + if (op0 && op2 + && (!op1 || !side_effects_p (op1))) + return ix86_expand_ternlog_binop (IOR, mode, op0, op2, target); + break; + + case 0xfc: /* a|b */ + if (op0 && op1 + && (!op2 || !side_effects_p (op2))) + return ix86_expand_ternlog_binop (IOR, mode, op0, op1, target); + break; + + case 0xff: + if ((!op0 || !side_effects_p (op0)) + && (!op1 || !side_effects_p (op1)) + && (!op2 || !side_effects_p (op2))) + { + emit_move_insn (target, CONSTM1_RTX (mode)); + return target; + } + break; + } + + tmp0 = register_operand (op0, mode) ? op0 : force_reg (mode, op0); + if (GET_MODE (tmp0) != mode) + tmp0 = gen_lowpart (mode, tmp0); + + if (!op1 || rtx_equal_p (op0, op1)) + tmp1 = copy_rtx (tmp0); + else if (!register_operand (op1, mode)) + tmp1 = force_reg (mode, op1); + else + tmp1 = op1; + if (GET_MODE (tmp1) != mode) + tmp1 = gen_lowpart (mode, tmp1); + + if (!op2 || rtx_equal_p (op0, op2)) + tmp2 = copy_rtx (tmp0); + else if (rtx_equal_p (op1, op2)) + tmp2 = copy_rtx (tmp1); + else if (GET_CODE (op2) == CONST_VECTOR) + { + if (GET_MODE (op2) != mode) + op2 = gen_lowpart (mode, op2); + tmp2 = ix86_gen_bcst_mem (mode, op2); + if (!tmp2) + { + tmp2 = validize_mem (force_const_mem (mode, op2)); + rtx bcast = ix86_broadcast_from_constant (mode, tmp2); + if (bcast) + { + rtx reg2 = gen_reg_rtx (mode); + bool ok = ix86_expand_vector_init_duplicate (false, mode, + reg2, bcast); + if (ok) + tmp2 = reg2; + } + } + } + else + tmp2 = op2; + if (GET_MODE (tmp2) != mode) + tmp2 = gen_lowpart (mode, tmp2); + /* Some memory_operands are not vector_memory_operands. */ + if (!bcst_vector_operand (tmp2, mode)) + tmp2 = force_reg (mode, tmp2); + + rtvec vec = gen_rtvec (4, tmp0, tmp1, tmp2, GEN_INT (idx)); + emit_move_insn (target, gen_rtx_UNSPEC (mode, vec, UNSPEC_VTERNLOG)); + return target; +} + #include "gt-i386-expand.h" diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 46214a6..9a3e183 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -245,6 +245,11 @@ extern rtx ix86_expand_fast_convert_bf_to_sf (rtx); extern rtx ix86_memtag_untagged_pointer (rtx, rtx); extern bool ix86_memtag_can_tag_addresses (void); +extern int ix86_ternlog_idx (rtx op, rtx *args); +extern bool ix86_ternlog_operand_p (rtx op); +extern rtx ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, + int idx, rtx target); + #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); #endif /* TREE_CODE */ diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 2a97776..7afe310 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1098,6 +1098,11 @@ (and (match_code "not") (match_test "nonimmediate_operand (XEXP (op, 0), mode)")))) +;; True for expressions valid for 3-operand ternlog instructions. +(define_predicate "ternlog_operand" + (and (match_code "not,and,ior,xor") + (match_test "ix86_ternlog_operand_p (op)"))) + ;; True if OP is acceptable as operand of DImode shift expander. (define_predicate "shiftdi_operand" (if_then_else (match_test "TARGET_64BIT") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1bf5072..3148651 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12940,6 +12940,26 @@ ;; ;; and so on. +(define_insn_and_split "*_vpternlog_0" + [(set (match_operand:V 0 "register_operand") + (match_operand:V 1 "ternlog_operand"))] + "( == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx args[3]; + args[0] = NULL_RTX; + args[1] = NULL_RTX; + args[2] = NULL_RTX; + int idx = ix86_ternlog_idx (operands[1], args); + ix86_expand_ternlog (mode, args[0], args[1], args[2], idx, + operands[0]); + DONE; +}) + (define_code_iterator any_logic1 [and ior xor]) (define_code_iterator any_logic2 [and ior xor]) (define_code_attr logic_op [(and "&") (ior "|") (xor "^")]) @@ -13160,7 +13180,33 @@ }) -(define_insn "_vternlog_mask" +(define_expand "_vternlog_mask" + [(set (match_operand:VI48_AVX512VL 0 "register_operand") + (vec_merge:VI48_AVX512VL + (unspec:VI48_AVX512VL + [(match_operand:VI48_AVX512VL 1 "register_operand") + (match_operand:VI48_AVX512VL 2 "register_operand") + (match_operand:VI48_AVX512VL 3 "bcst_vector_operand") + (match_operand:SI 4 "const_0_to_255_operand")] + UNSPEC_VTERNLOG) + (match_dup 1) + (match_operand: 5 "general_operand")))] + "TARGET_AVX512F" +{ + unsigned HOST_WIDE_INT mode_mask = GET_MODE_MASK (mode); + if (CONST_INT_P (operands[5]) + && (UINTVAL (operands[5]) & mode_mask) == mode_mask) + { + ix86_expand_ternlog (mode, operands[1], operands[2], + operands[3], INTVAL (operands[4]), + operands[0]); + DONE; + } + if (!register_operand (operands[5], mode)) + operands[5] = force_reg (mode, operands[5]); +}) + +(define_insn "*_vternlog_mask" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") (vec_merge:VI48_AVX512VL (unspec:VI48_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c index 4ebb30f..24f3d6c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$80, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c index 86e7ebe..1f5e72d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$80, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c index 7d02f03..d21f48f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$245, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c index c793083..5359200 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$175, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c index a88153a..b098487 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogd-1.c @@ -1,6 +1,5 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c index ef30246..8e5d22f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpternlogq-1.c @@ -1,6 +1,5 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c index 045a266..dd53563 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c @@ -1,7 +1,5 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c index 3a6707c..31fec3e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c @@ -1,7 +1,5 @@ /* { dg-do compile } */ /* { dg-options "-mavx512vl -O2" } */ -/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ -/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c b/gcc/testsuite/gcc.target/i386/pr100711-3.c index 98cc1c3..ea60190 100644 --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c @@ -39,4 +39,4 @@ v8di foo_v8di (long long a, v8di b) /* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$80" 2 { target { ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c b/gcc/testsuite/gcc.target/i386/pr100711-4.c index 3ca524f..a33f0a1 100644 --- a/gcc/testsuite/gcc.target/i386/pr100711-4.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c @@ -37,6 +37,6 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; } -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { target { ia32 } } } } */ -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$207" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$207" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$245" 2 { target { ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c b/gcc/testsuite/gcc.target/i386/pr100711-5.c index 161fbfc..99cafc1 100644 --- a/gcc/testsuite/gcc.target/i386/pr100711-5.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c @@ -37,4 +37,4 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; } -/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 4 } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$1\[69\]5" 4 } } */