diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 4ae3e75..3677aef 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -563,183 +563,195 @@ general_scalar_chain::compute_convert_gain () else if (MEM_P (src) && REG_P (dst)) igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; else - switch (GET_CODE (src)) - { - case ASHIFT: - case ASHIFTRT: - case LSHIFTRT: - if (m == 2) - { - if (INTVAL (XEXP (src, 1)) >= 32) - igain += ix86_cost->add; - /* Gain for extend highpart case. */ - else if (GET_CODE (XEXP (src, 0)) == ASHIFT) - igain += ix86_cost->shift_const - ix86_cost->sse_op; - else - igain += ix86_cost->shift_const; - } - - igain += ix86_cost->shift_const - ix86_cost->sse_op; + { + /* For operations on memory operands, include the overhead + of explicit load and store instructions. */ + if (MEM_P (dst)) + igain += !optimize_insn_for_size_p () + ? (m * (ix86_cost->int_load[2] + + ix86_cost->int_store[2]) + - (ix86_cost->sse_load[sse_cost_idx] + + ix86_cost->sse_store[sse_cost_idx])) + : -COSTS_N_BYTES (8); - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - break; + switch (GET_CODE (src)) + { + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + if (m == 2) + { + if (INTVAL (XEXP (src, 1)) >= 32) + igain += ix86_cost->add; + /* Gain for extend highpart case. */ + else if (GET_CODE (XEXP (src, 0)) == ASHIFT) + igain += ix86_cost->shift_const - ix86_cost->sse_op; + else + igain += ix86_cost->shift_const; + } - case ROTATE: - case ROTATERT: - igain += m * ix86_cost->shift_const; - if (TARGET_AVX512VL) - igain -= ix86_cost->sse_op; - else if (smode == DImode) - { - int bits = INTVAL (XEXP (src, 1)); - if ((bits & 0x0f) == 0) - igain -= ix86_cost->sse_op; - else if ((bits & 0x07) == 0) - igain -= 2 * ix86_cost->sse_op; - else - igain -= 3 * ix86_cost->sse_op; - } - else if (INTVAL (XEXP (src, 1)) == 16) - igain -= ix86_cost->sse_op; - else - igain -= 2 * ix86_cost->sse_op; - break; + igain += ix86_cost->shift_const - ix86_cost->sse_op; - case AND: - case IOR: - case XOR: - case PLUS: - case MINUS: - igain += m * ix86_cost->add - ix86_cost->sse_op; - /* Additional gain for andnot for targets without BMI. */ - if (GET_CODE (XEXP (src, 0)) == NOT - && !TARGET_BMI) - igain += m * ix86_cost->add; - - if (CONST_INT_P (XEXP (src, 0))) - igain -= vector_const_cost (XEXP (src, 0)); - if (CONST_INT_P (XEXP (src, 1))) - igain -= vector_const_cost (XEXP (src, 1)); - if (MEM_P (XEXP (src, 1))) - { - if (optimize_insn_for_size_p ()) - igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); - else - igain += m * ix86_cost->int_load[2] - - ix86_cost->sse_load[sse_cost_idx]; - } - break; + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + break; - case NEG: - case NOT: - igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); + case ROTATE: + case ROTATERT: + igain += m * ix86_cost->shift_const; + if (TARGET_AVX512VL) + igain -= ix86_cost->sse_op; + else if (smode == DImode) + { + int bits = INTVAL (XEXP (src, 1)); + if ((bits & 0x0f) == 0) + igain -= ix86_cost->sse_op; + else if ((bits & 0x07) == 0) + igain -= 2 * ix86_cost->sse_op; + else + igain -= 3 * ix86_cost->sse_op; + } + else if (INTVAL (XEXP (src, 1)) == 16) + igain -= ix86_cost->sse_op; + else + igain -= 2 * ix86_cost->sse_op; + break; - if (GET_CODE (XEXP (src, 0)) != ABS) - { + case AND: + case IOR: + case XOR: + case PLUS: + case MINUS: + igain += m * ix86_cost->add - ix86_cost->sse_op; + /* Additional gain for andnot for targets without BMI. */ + if (GET_CODE (XEXP (src, 0)) == NOT + && !TARGET_BMI) igain += m * ix86_cost->add; - break; - } - /* FALLTHRU */ - - case ABS: - case SMAX: - case SMIN: - case UMAX: - case UMIN: - /* We do not have any conditional move cost, estimate it as a - reg-reg move. Comparisons are costed as adds. */ - igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); - /* Integer SSE ops are all costed the same. */ - igain -= ix86_cost->sse_op; - break; - case COMPARE: - if (XEXP (src, 1) != const0_rtx) - { - /* cmp vs. pxor;pshufd;ptest. */ - igain += COSTS_N_INSNS (m - 3); - } - else if (GET_CODE (XEXP (src, 0)) != AND) - { - /* test vs. pshufd;ptest. */ - igain += COSTS_N_INSNS (m - 2); - } - else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT) - { - /* and;test vs. pshufd;ptest. */ - igain += COSTS_N_INSNS (2 * m - 2); - } - else if (TARGET_BMI) - { - /* andn;test vs. pandn;pshufd;ptest. */ - igain += COSTS_N_INSNS (2 * m - 3); - } - else - { - /* not;and;test vs. pandn;pshufd;ptest. */ - igain += COSTS_N_INSNS (3 * m - 3); - } - break; + if (CONST_INT_P (XEXP (src, 0))) + igain -= vector_const_cost (XEXP (src, 0)); + if (CONST_INT_P (XEXP (src, 1))) + igain -= vector_const_cost (XEXP (src, 1)); + if (MEM_P (XEXP (src, 1))) + { + if (optimize_insn_for_size_p ()) + igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); + else + igain += m * ix86_cost->int_load[2] + - ix86_cost->sse_load[sse_cost_idx]; + } + break; - case CONST_INT: - if (REG_P (dst)) - { - if (optimize_insn_for_size_p ()) - { - /* xor (2 bytes) vs. xorps (3 bytes). */ - if (src == const0_rtx) - igain -= COSTS_N_BYTES (1); - /* movdi_internal vs. movv2di_internal. */ - /* => mov (5 bytes) vs. movaps (7 bytes). */ - else if (x86_64_immediate_operand (src, SImode)) - igain -= COSTS_N_BYTES (2); - else - /* ??? Larger immediate constants are placed in the - constant pool, where the size benefit/impact of - STV conversion is affected by whether and how - often each constant pool entry is shared/reused. - The value below is empirically derived from the - CSiBE benchmark (and the optimal value may drift - over time). */ - igain += COSTS_N_BYTES (0); - } - else - { - /* DImode can be immediate for TARGET_64BIT - and SImode always. */ - igain += m * COSTS_N_INSNS (1); - igain -= vector_const_cost (src); - } - } - else if (MEM_P (dst)) - { - igain += (m * ix86_cost->int_store[2] - - ix86_cost->sse_store[sse_cost_idx]); - igain -= vector_const_cost (src); - } - break; + case NEG: + case NOT: + igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); - case VEC_SELECT: - if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) - { - // movd (4 bytes) replaced with movdqa (4 bytes). - if (!optimize_insn_for_size_p ()) - igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move; - } - else - { - // pshufd; movd replaced with pshufd. - if (optimize_insn_for_size_p ()) - igain += COSTS_N_BYTES (4); - else - igain += ix86_cost->sse_to_integer; - } - break; + if (GET_CODE (XEXP (src, 0)) != ABS) + { + igain += m * ix86_cost->add; + break; + } + /* FALLTHRU */ + + case ABS: + case SMAX: + case SMIN: + case UMAX: + case UMIN: + /* We do not have any conditional move cost, estimate it as a + reg-reg move. Comparisons are costed as adds. */ + igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); + /* Integer SSE ops are all costed the same. */ + igain -= ix86_cost->sse_op; + break; - default: - gcc_unreachable (); - } + case COMPARE: + if (XEXP (src, 1) != const0_rtx) + { + /* cmp vs. pxor;pshufd;ptest. */ + igain += COSTS_N_INSNS (m - 3); + } + else if (GET_CODE (XEXP (src, 0)) != AND) + { + /* test vs. pshufd;ptest. */ + igain += COSTS_N_INSNS (m - 2); + } + else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT) + { + /* and;test vs. pshufd;ptest. */ + igain += COSTS_N_INSNS (2 * m - 2); + } + else if (TARGET_BMI) + { + /* andn;test vs. pandn;pshufd;ptest. */ + igain += COSTS_N_INSNS (2 * m - 3); + } + else + { + /* not;and;test vs. pandn;pshufd;ptest. */ + igain += COSTS_N_INSNS (3 * m - 3); + } + break; + + case CONST_INT: + if (REG_P (dst)) + { + if (optimize_insn_for_size_p ()) + { + /* xor (2 bytes) vs. xorps (3 bytes). */ + if (src == const0_rtx) + igain -= COSTS_N_BYTES (1); + /* movdi_internal vs. movv2di_internal. */ + /* => mov (5 bytes) vs. movaps (7 bytes). */ + else if (x86_64_immediate_operand (src, SImode)) + igain -= COSTS_N_BYTES (2); + else + /* ??? Larger immediate constants are placed in the + constant pool, where the size benefit/impact of + STV conversion is affected by whether and how + often each constant pool entry is shared/reused. + The value below is empirically derived from the + CSiBE benchmark (and the optimal value may drift + over time). */ + igain += COSTS_N_BYTES (0); + } + else + { + /* DImode can be immediate for TARGET_64BIT + and SImode always. */ + igain += m * COSTS_N_INSNS (1); + igain -= vector_const_cost (src); + } + } + else if (MEM_P (dst)) + { + igain += (m * ix86_cost->int_store[2] + - ix86_cost->sse_store[sse_cost_idx]); + igain -= vector_const_cost (src); + } + break; + + case VEC_SELECT: + if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) + { + // movd (4 bytes) replaced with movdqa (4 bytes). + if (!optimize_insn_for_size_p ()) + igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move; + } + else + { + // pshufd; movd replaced with pshufd. + if (optimize_insn_for_size_p ()) + igain += COSTS_N_BYTES (4); + else + igain += ix86_cost->sse_to_integer; + } + break; + + default: + gcc_unreachable (); + } + } if (igain != 0 && dump_file) { diff --git a/gcc/testsuite/gcc.target/i386/pr113231.c b/gcc/testsuite/gcc.target/i386/pr113231.c new file mode 100644 index 0000000..f9dcd9a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113231.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ + +void foo(int *i) { *i *= 2; } +void bar(int *i) { *i <<= 2; } +void baz(int *i) { *i >>= 2; } + +/* { dg-final { scan-assembler-not "movd" } } */