diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 5cd7b99..8e9d2b6 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2317,21 +2317,15 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) case E_DImode: if (TARGET_64BIT) goto simple; - /* For 32-bit target DI comparison may be performed on - SSE registers. To allow this we should avoid split - to SI mode which is achieved by doing xor in DI mode - and then comparing with zero (which is recognized by - STV pass). We don't compare using xor when optimizing - for size. */ - if (!optimize_insn_for_size_p () - && TARGET_STV - && (code == EQ || code == NE)) - { - op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); - op1 = const0_rtx; - } /* FALLTHRU */ case E_TImode: + /* DI and TI mode equality/inequality comparisons may be performed + on SSE registers. Avoid splitting them, except when optimizing + for size. */ + if ((code == EQ || code == NE) + && !optimize_insn_for_size_p ()) + goto simple; + /* Expand DImode branch into multiple compare+branch. */ { rtx lo[2], hi[2]; @@ -2350,34 +2344,7 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) submode = mode == DImode ? SImode : DImode; - /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to - avoid two branches. This costs one extra insn, so disable when - optimizing for size. */ - - if ((code == EQ || code == NE) - && (!optimize_insn_for_size_p () - || hi[1] == const0_rtx || lo[1] == const0_rtx)) - { - rtx xor0, xor1; - - xor1 = hi[0]; - if (hi[1] != const0_rtx) - xor1 = expand_binop (submode, xor_optab, xor1, hi[1], - NULL_RTX, 0, OPTAB_WIDEN); - - xor0 = lo[0]; - if (lo[1] != const0_rtx) - xor0 = expand_binop (submode, xor_optab, xor0, lo[1], - NULL_RTX, 0, OPTAB_WIDEN); - - tmp = expand_binop (submode, ior_optab, xor1, xor0, - NULL_RTX, 0, OPTAB_WIDEN); - - ix86_expand_branch (code, tmp, const0_rtx, label); - return; - } - - /* Otherwise, if we are doing less-than or greater-or-equal-than, + /* If we are doing less-than or greater-or-equal-than, op1 is a constant and the low word is zero, then we can just examine the high word. Similarly for low word -1 and less-or-equal-than or greater-than. */ diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 6fe41c3..8908e42 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -711,8 +711,7 @@ gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr) switch (GET_MODE_NUNITS (vmode)) { case 1: - /* We are not using this case currently. */ - gcc_unreachable (); + return gen_rtx_SUBREG (vmode, gpr, 0); case 2: return gen_rtx_VEC_CONCAT (vmode, gpr, CONST0_RTX (GET_MODE_INNER (vmode))); @@ -932,6 +931,48 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) } } +/* Convert COMPARE to vector mode. */ + +rtx +general_scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) +{ + rtx tmp = gen_reg_rtx (vmode); + rtx src; + convert_op (&op1, insn); + /* Comparison against anything other than zero, requires an XOR. */ + if (op2 != const0_rtx) + { + convert_op (&op2, insn); + /* If both operands are MEMs, explicitly load the OP1 into TMP. */ + if (MEM_P (op1) && MEM_P (op2)) + { + emit_insn_before (gen_rtx_SET (tmp, op1), insn); + src = tmp; + } + else + src = op1; + src = gen_rtx_XOR (vmode, src, op2); + } + else + src = op1; + emit_insn_before (gen_rtx_SET (tmp, src), insn); + + if (vmode == V2DImode) + emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (tmp), + copy_rtx_if_shared (tmp), + copy_rtx_if_shared (tmp)), + insn); + else if (vmode == V4SImode) + emit_insn_before (gen_sse2_pshufd (copy_rtx_if_shared (tmp), + copy_rtx_if_shared (tmp), + const0_rtx), + insn); + + return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (tmp), + copy_rtx_if_shared (tmp)), + UNSPEC_PTEST); +} + /* Convert INSN to vector mode. */ void @@ -1090,19 +1131,8 @@ general_scalar_chain::convert_insn (rtx_insn *insn) break; case COMPARE: - src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); - - gcc_assert (REG_P (src) && GET_MODE (src) == DImode); - subreg = gen_rtx_SUBREG (V2DImode, src, 0); - emit_insn_before (gen_vec_interleave_lowv2di - (copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg)), - insn); dst = gen_rtx_REG (CCmode, FLAGS_REG); - src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), - copy_rtx_if_shared (subreg)), - UNSPEC_PTEST); + src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn); break; case CONST_INT: @@ -1339,20 +1369,14 @@ pseudo_reg_set (rtx_insn *insn) return set; } -/* Check if comparison INSN may be transformed - into vector comparison. Currently we transform - zero checks only which look like: - - (set (reg:CCZ 17 flags) - (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) - (subreg:SI (reg:DI x) 0)) - (const_int 0 [0]))) */ +/* Check if comparison INSN may be transformed into vector comparison. + Currently we transform equality/inequality checks which look like: + (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y))) */ static bool convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) { - /* ??? Currently convertible for double-word DImode chain only. */ - if (TARGET_64BIT || mode != DImode) + if (mode != (TARGET_64BIT ? TImode : DImode)) return false; if (!TARGET_SSE4_1) @@ -1375,31 +1399,14 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) rtx op1 = XEXP (src, 0); rtx op2 = XEXP (src, 1); - if (op2 != CONST0_RTX (GET_MODE (op2))) + if (!CONST_INT_P (op1) + && ((!REG_P (op1) && !MEM_P (op1)) + || GET_MODE (op1) != mode)) return false; - if (GET_CODE (op1) != IOR) - return false; - - op2 = XEXP (op1, 1); - op1 = XEXP (op1, 0); - - if (!SUBREG_P (op1) - || !SUBREG_P (op2) - || GET_MODE (op1) != SImode - || GET_MODE (op2) != SImode - || ((SUBREG_BYTE (op1) != 0 - || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) - && (SUBREG_BYTE (op2) != 0 - || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) - return false; - - op1 = SUBREG_REG (op1); - op2 = SUBREG_REG (op2); - - if (op1 != op2 - || !REG_P (op1) - || GET_MODE (op1) != DImode) + if (!CONST_INT_P (op2) + && ((!REG_P (op2) && !MEM_P (op2)) + || GET_MODE (op2) != mode)) return false; return true; diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h index 5c30760..891cb46 100644 --- a/gcc/config/i386/i386-features.h +++ b/gcc/config/i386/i386-features.h @@ -181,6 +181,7 @@ class general_scalar_chain : public scalar_chain void convert_reg (rtx_insn *insn, rtx dst, rtx src); void make_vector_copies (rtx_insn *, rtx); void convert_registers (); + rtx convert_compare (rtx op1, rtx op2, rtx_insn *insn); int vector_const_cost (rtx exp); }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 602dfa7..12c1dee 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1357,14 +1357,20 @@ (define_expand "cstore4" [(set (reg:CC FLAGS_REG) - (compare:CC (match_operand:SWIM 2 "nonimmediate_operand") - (match_operand:SWIM 3 ""))) + (compare:CC (match_operand:SDWIM 2 "nonimmediate_operand") + (match_operand:SDWIM 3 ""))) (set (match_operand:QI 0 "register_operand") (match_operator 1 "ordered_comparison_operator" [(reg:CC FLAGS_REG) (const_int 0)]))] "" { - if (MEM_P (operands[2]) && MEM_P (operands[3])) + if (mode == (TARGET_64BIT ? TImode : DImode)) + { + if (GET_CODE (operands[1]) != EQ + && GET_CODE (operands[1]) != NE) + FAIL; + } + else if (MEM_P (operands[2]) && MEM_P (operands[3])) operands[2] = force_reg (mode, operands[2]); ix86_expand_setcc (operands[0], GET_CODE (operands[1]), operands[2], operands[3]); @@ -1500,6 +1506,52 @@ [(set_attr "type" "icmp") (set_attr "mode" "QI")]) +(define_insn_and_split "*cmp_doubleword" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand: 0 "nonimmediate_operand") + (match_operand: 1 "x86_64_general_operand")))] + "ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (ior:DWIH (match_dup 4) (match_dup 5)) + (const_int 0))) + (set (match_dup 4) (ior:DWIH (match_dup 4) (match_dup 5)))])] +{ + split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]); + /* Placing the SUBREG pieces in pseudos helps reload. */ + for (int i = 0; i < 4; i++) + if (SUBREG_P (operands[i])) + operands[i] = force_reg (mode, operands[i]); + + operands[4] = gen_reg_rtx (mode); + if (operands[1] == const0_rtx) + emit_move_insn (operands[4], operands[0]); + else if (operands[0] == const0_rtx) + emit_move_insn (operands[4], operands[1]); + else if (operands[1] == constm1_rtx) + emit_insn (gen_one_cmpl2 (operands[4], operands[0])); + else if (operands[0] == constm1_rtx) + emit_insn (gen_one_cmpl2 (operands[4], operands[1])); + else + emit_insn (gen_xor3 (operands[4], operands[0], operands[1])); + + if (operands[3] == const0_rtx) + operands[5] = operands[2]; + else if (operands[2] == const0_rtx) + operands[5] = operands[3]; + else + { + operands[5] = gen_reg_rtx (mode); + if (operands[3] == constm1_rtx) + emit_insn (gen_one_cmpl2 (operands[5], operands[2])); + else if (operands[2] == constm1_rtx) + emit_insn (gen_one_cmpl2 (operands[5], operands[3])); + else + emit_insn (gen_xor3 (operands[5], operands[2], operands[3])); + } +}) + ;; These implement float point compares. ;; %%% See if we can get away with VOIDmode operands on the actual insns, ;; which would allow mix and match FP modes on the compares. Which is what diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b2602b..4183adb 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -466,9 +466,9 @@ ;; All DImode vector integer modes (define_mode_iterator V_AVX - [V16QI V8HI V4SI V2DI V4SF V2DF + [V16QI V8HI V4SI V2DI V1TI V4SF V2DF (V32QI "TARGET_AVX") (V16HI "TARGET_AVX") - (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") + (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") (V2TI "TARGET_AVX") (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")]) (define_mode_iterator VI48_AVX @@ -890,6 +890,7 @@ [(V4SF "sse4_1") (V2DF "sse4_1") (V8SF "avx") (V4DF "avx") (V8DF "avx512f") + (V2TI "avx") (V1TI "sse4_1") (V4DI "avx") (V2DI "sse4_1") (V8SI "avx") (V4SI "sse4_1") (V16QI "sse4_1") (V32QI "avx") diff --git a/gcc/testsuite/gcc.target/i386/pr70321.c b/gcc/testsuite/gcc.target/i386/pr70321.c new file mode 100644 index 0000000..eaba728 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr70321.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2" } */ + +void foo (long long ixi) +{ + if (ixi != 14348907) + __builtin_abort (); +} + +/* { dg-final { scan-assembler-times "mov" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-stv-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-stv-1.c new file mode 100644 index 0000000..9486d0c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-stv-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse4.1" } */ +long long a[1024]; +long long b[1024]; + +int foo() +{ + for (int i=0; i<1024; i++) + { + long long t = (a[i]<<8) | (b[i]<<24); + if (t == 0) + return 1; + } + return 0; +} + +/* { dg-final { scan-assembler "ptest" } } */ +/* { dg-final { scan-assembler-not "pxor" } } */