diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index def060a..e844467 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -10222,6 +10222,18 @@ ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, machine_mode mode1 = insn_data[d->icode].operand[1].mode; enum rtx_code comparison = d->comparison; + /* ptest reg, reg sets the carry flag. */ + if (comparison == LTU + && (d->code == IX86_BUILTIN_PTESTC + || d->code == IX86_BUILTIN_PTESTC256) + && rtx_equal_p (op0, op1)) + { + if (!target) + target = gen_reg_rtx (SImode); + emit_move_insn (target, const1_rtx); + return target; + } + if (VECTOR_MODE_P (mode0)) op0 = safe_vector_operand (op0, mode0); if (VECTOR_MODE_P (mode1)) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3a1444d..3e99e23 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21423,16 +21423,23 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else if (XINT (x, 1) == UNSPEC_PTEST) { *total = cost->sse_op; - if (XVECLEN (x, 0) == 2 - && GET_CODE (XVECEXP (x, 0, 0)) == AND) + rtx test_op0 = XVECEXP (x, 0, 0); + if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1))) + return false; + if (GET_CODE (test_op0) == AND) { - rtx andop = XVECEXP (x, 0, 0); - *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop), - AND, opno, speed) - + rtx_cost (XEXP (andop, 1), GET_MODE (andop), - AND, opno, speed); - return true; + rtx and_op0 = XEXP (test_op0, 0); + if (GET_CODE (and_op0) == NOT) + and_op0 = XEXP (and_op0, 0); + *total += rtx_cost (and_op0, GET_MODE (and_op0), + AND, 0, speed) + + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0), + AND, 1, speed); } + else + *total = rtx_cost (test_op0, GET_MODE (test_op0), + UNSPEC, 0, speed); + return true; } return false; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9bec09d..ce90ae4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -23147,6 +23147,70 @@ [(set (reg:CCZ FLAGS_REG) (unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))]) +;; ptest reg,reg sets the carry flag. +(define_split + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "register_operand")] + UNSPEC_PTEST))] + "TARGET_SSE4_1 + && rtx_equal_p (operands[0], operands[1])" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(const_int 0)] UNSPEC_STC))]) + +;; Changing the CCmode of FLAGS_REG requires updating both def and use. +;; pandn/ptestz/set{n?}e -> ptestc/set{n?}c +(define_split + [(set (match_operand:SWI 0 "register_operand") + (match_operator:SWI 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (match_dup 0) + (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))]) + +(define_split + [(set (strict_low_part (subreg:QI (match_operand:SI 0 "register_operand") 0)) + (match_operator:QI 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (strict_low_part (subreg:QI (match_dup 0) 0)) + (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]))]) + +;; pandn/ptestz/j{n?}e -> ptestc/j{n?}c +(define_split + [(set (pc) + (if_then_else + (match_operator 3 "bt_comparison_operator" + [(unspec:CCZ [ + (and:V_AVX + (not:V_AVX (match_operand:V_AVX 1 "register_operand")) + (match_operand:V_AVX 2 "register_operand")) + (and:V_AVX (not:V_AVX (match_dup 1)) (match_dup 2))] + UNSPEC_PTEST) + (const_int 0)]) + (match_operand 0) + (pc)))] + "TARGET_SSE4_1" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_dup 1) (match_dup 2)] UNSPEC_PTEST)) + (set (pc) (if_then_else (match_op_dup 3 [(reg:CCC FLAGS_REG) (const_int 0)]) + (match_dup 0) + (pc)))]) + (define_expand "nearbyint2" [(set (match_operand:VFH 0 "register_operand") (unspec:VFH diff --git a/gcc/testsuite/gcc.target/i386/avx-vptest-4.c b/gcc/testsuite/gcc.target/i386/avx-vptest-4.c new file mode 100644 index 0000000..0a234e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vptest-4.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef long long __m256i __attribute__ ((__vector_size__ (32))); + +int foo (__m256i x, __m256i y) +{ + __m256i a = x & ~y; + return __builtin_ia32_ptestz256 (a, a); +} + +int bar (__m256i x, __m256i y) +{ + __m256i a = ~x & y; + return __builtin_ia32_ptestz256 (a, a); +} + +/* { dg-final { scan-assembler-times "vptest\[ \\t\]+%" 2 } } */ +/* { dg-final { scan-assembler-times "setc" 2 } } */ +/* { dg-final { scan-assembler-not "vpandn" } } */ +/* { dg-final { scan-assembler-not "sete" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vptest-5.c b/gcc/testsuite/gcc.target/i386/avx-vptest-5.c new file mode 100644 index 0000000..fd0e5e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vptest-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef long long __m256i __attribute__ ((__vector_size__ (32))); + +int foo (__m256i x, __m256i y) +{ + __m256i a = x & ~y; + return !__builtin_ia32_ptestz256 (a, a); +} + +int bar (__m256i x, __m256i y) +{ + __m256i a = ~x & y; + return !__builtin_ia32_ptestz256 (a, a); +} + +/* { dg-final { scan-assembler-times "vptest\[ \\t\]+%" 2} } */ +/* { dg-final { scan-assembler-times "setnc" 2 } } */ +/* { dg-final { scan-assembler-not "vpandn" } } */ +/* { dg-final { scan-assembler-not "setne" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vptest-6.c b/gcc/testsuite/gcc.target/i386/avx-vptest-6.c new file mode 100644 index 0000000..5821a92 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vptest-6.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +typedef long long __m256i __attribute__ ((__vector_size__ (32))); + +extern void ext (void); + +void foo (__m256i x, __m256i y) +{ + __m256i a = x & ~y; + if (__builtin_ia32_ptestz256 (a, a)) + ext(); +} + +void bar (__m256i x, __m256i y) +{ + __m256i a = ~x & y; + if (__builtin_ia32_ptestz256 (a, a)) + ext(); +} + +void foo2 (__m256i x, __m256i y) +{ + __m256i a = x & ~y; + if (__builtin_ia32_ptestz256 (a, a)) + ext(); +} + +void bar2 (__m256i x, __m256i y) +{ + __m256i a = ~x & y; + if (__builtin_ia32_ptestz256 (a, a)) + ext(); +} + +/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 4 } } */ +/* { dg-final { scan-assembler-times "jn?c" 4 } } */ +/* { dg-final { scan-assembler-not "pandn" } } */ +/* { dg-final { scan-assembler-not "jne" } } */ +/* { dg-final { scan-assembler-not "je" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109973-1.c b/gcc/testsuite/gcc.target/i386/pr109973-1.c index a1b6136b..1d812dd 100644 --- a/gcc/testsuite/gcc.target/i386/pr109973-1.c +++ b/gcc/testsuite/gcc.target/i386/pr109973-1.c @@ -10,4 +10,4 @@ foo (__m256i x, __m256i y) return __builtin_ia32_ptestc256 (a, a); } -/* { dg-final { scan-assembler "vpand" } } */ +/* { dg-final { scan-assembler "movl\[ \\t]*\\\$1, %eax" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109973-2.c b/gcc/testsuite/gcc.target/i386/pr109973-2.c index 167f6ee..1068c3e 100644 --- a/gcc/testsuite/gcc.target/i386/pr109973-2.c +++ b/gcc/testsuite/gcc.target/i386/pr109973-2.c @@ -10,4 +10,4 @@ foo (__m128i x, __m128i y) return __builtin_ia32_ptestc128 (a, a); } -/* { dg-final { scan-assembler "pand" } } */ +/* { dg-final { scan-assembler "movl\[ \\t]*\\\$1, %eax" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-4.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-4.c new file mode 100644 index 0000000..e74ddb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-4.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +int foo (__m128i x, __m128i y) +{ + __m128i a = x & ~y; + return __builtin_ia32_ptestz128 (a, a); +} + +int bar (__m128i x, __m128i y) +{ + __m128i a = ~x & y; + return __builtin_ia32_ptestz128 (a, a); +} + +/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 2 } } */ +/* { dg-final { scan-assembler-times "setc" 2 } } */ +/* { dg-final { scan-assembler-not "pandn" } } */ +/* { dg-final { scan-assembler-not "sete" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-5.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-5.c new file mode 100644 index 0000000..74b0a8c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-5.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +int foo (__m128i x, __m128i y) +{ + __m128i a = x & ~y; + return !__builtin_ia32_ptestz128 (a, a); +} + +int bar (__m128i x, __m128i y) +{ + __m128i a = ~x & y; + return !__builtin_ia32_ptestz128 (a, a); +} + +/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 2 } } */ +/* { dg-final { scan-assembler-times "setnc" 2 } } */ +/* { dg-final { scan-assembler-not "pandn" } } */ +/* { dg-final { scan-assembler-not "setne" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-ptest-6.c b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-6.c new file mode 100644 index 0000000..d9114bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-ptest-6.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +extern void ext (void); + +void foo (__m128i x, __m128i y) +{ + __m128i a = x & ~y; + if (__builtin_ia32_ptestz128 (a, a)) + ext(); +} + +void bar (__m128i x, __m128i y) +{ + __m128i a = ~x & y; + if (__builtin_ia32_ptestz128 (a, a)) + ext(); +} + +void foo2 (__m128i x, __m128i y) +{ + __m128i a = x & ~y; + if (__builtin_ia32_ptestz128 (a, a)) + ext(); +} + +void bar2 (__m128i x, __m128i y) +{ + __m128i a = ~x & y; + if (__builtin_ia32_ptestz128 (a, a)) + ext(); +} + +/* { dg-final { scan-assembler-times "ptest\[ \\t\]+%" 4 } } */ +/* { dg-final { scan-assembler-times "jn?c" 4 } } */ +/* { dg-final { scan-assembler-not "pandn" } } */ +/* { dg-final { scan-assembler-not "jne" } } */ +/* { dg-final { scan-assembler-not "je" } } */