diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 6dae697..37df018 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1004,8 +1004,8 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST) +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestzv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST) +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestcv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST) /* SSE4.2 */ @@ -1164,8 +1164,8 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzc BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestzv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestcv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF ) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 0d817fc..7719449 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2370,8 +2370,8 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) tmp = gen_reg_rtx (mode); emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); tmp = gen_lowpart (p_mode, tmp); - emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), - gen_rtx_UNSPEC (CCmode, + emit_insn (gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG), + gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, tmp, tmp), UNSPEC_PTEST))); tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index a0a7348..4a3b07a 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -974,12 +974,45 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) } } -/* Convert COMPARE to vector mode. */ +/* Convert CCZmode COMPARE to vector mode. */ rtx scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) { rtx src, tmp; + + /* Handle any REG_EQUAL notes. */ + tmp = find_reg_equal_equiv_note (insn); + if (tmp) + { + if (GET_CODE (XEXP (tmp, 0)) == COMPARE + && GET_MODE (XEXP (tmp, 0)) == CCZmode + && REG_P (XEXP (XEXP (tmp, 0), 0))) + { + rtx *op = &XEXP (XEXP (tmp, 0), 1); + if (CONST_SCALAR_INT_P (*op)) + { + if (constm1_operand (*op, GET_MODE (*op))) + *op = CONSTM1_RTX (vmode); + else + { + unsigned n = GET_MODE_NUNITS (vmode); + rtx *v = XALLOCAVEC (rtx, n); + v[0] = *op; + for (unsigned i = 1; i < n; ++i) + v[i] = const0_rtx; + *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v)); + } + tmp = NULL_RTX; + } + else if (REG_P (*op)) + tmp = NULL_RTX; + } + + if (tmp) + remove_note (insn, tmp); + } + /* Comparison against anything other than zero, requires an XOR. */ if (op2 != const0_rtx) { @@ -1023,7 +1056,7 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) emit_insn_before (gen_rtx_SET (tmp, op11), insn); op11 = tmp; } - return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, op11, op12), + return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12), UNSPEC_PTEST); } else @@ -1052,7 +1085,7 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn) src = tmp; } - return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, src, src), UNSPEC_PTEST); + return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST); } /* Helper function for converting INSN to vector mode. */ @@ -1219,7 +1252,7 @@ general_scalar_chain::convert_insn (rtx_insn *insn) break; case COMPARE: - dst = gen_rtx_REG (CCmode, FLAGS_REG); + dst = gen_rtx_REG (CCZmode, FLAGS_REG); src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn); break; @@ -1726,7 +1759,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) break; case COMPARE: - dst = gen_rtx_REG (CCmode, FLAGS_REG); + dst = gen_rtx_REG (CCZmode, FLAGS_REG); src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn); break; diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 71ae95f..b00756b 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -140,6 +140,7 @@ extern void ix86_expand_copysign (rtx []); extern void ix86_expand_xorsign (rtx []); extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[2]); extern bool ix86_match_ccmode (rtx, machine_mode); +extern bool ix86_match_ptest_ccmode (rtx); extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx); extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx); extern bool ix86_expand_int_movcc (rtx[]); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index fbd33a6..30fc552 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15985,6 +15985,29 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) } } +/* Return TRUE or FALSE depending on whether the ptest instruction + INSN has source and destination with suitable matching CC modes. */ + +bool +ix86_match_ptest_ccmode (rtx insn) +{ + rtx set, src; + machine_mode set_mode; + + set = PATTERN (insn); + gcc_assert (GET_CODE (set) == SET); + src = SET_SRC (set); + gcc_assert (GET_CODE (src) == UNSPEC + && XINT (src, 1) == UNSPEC_PTEST); + + set_mode = GET_MODE (src); + if (set_mode != CCZmode + && set_mode != CCCmode + && set_mode != CCmode) + return false; + return GET_MODE (SET_DEST (set)) == set_mode; +} + /* Return the fixed registers used for condition codes. */ static bool diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 513960e..e8d50a1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20441,10 +20441,10 @@ UNSPEC_MOVMSK) (match_operand 2 "const_int_operand")))] "TARGET_SSE4_1 && (INTVAL (operands[2]) == (int) ())" - [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0) - (match_dup 0)] - UNSPEC_PTEST))]) + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(match_dup 0) + (match_dup 0)] + UNSPEC_PTEST))]) (define_expand "sse2_maskmovdqu" [(set (match_operand:V16QI 0 "memory_operand") @@ -23096,13 +23096,13 @@ (set_attr "mode" "")]) ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. -;; But it is not a really compare instruction. -(define_insn "_ptest" - [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x") - (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")] - UNSPEC_PTEST))] - "TARGET_SSE4_1" +;; But it is not really a compare instruction. +(define_insn "*_ptest" + [(set (reg FLAGS_REG) + (unspec [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x") + (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")] + UNSPEC_PTEST))] + "TARGET_SSE4_1 && ix86_match_ptest_ccmode (insn)" "%vptest\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecomi") @@ -23115,6 +23115,30 @@ (const_string "*"))) (set_attr "mode" "")]) +;; Expand a ptest to set the Z flag. +(define_expand "_ptestz" + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "vector_operand")] + UNSPEC_PTEST))] + "TARGET_SSE4_1") + +;; Expand a ptest to set the C flag +(define_expand "_ptestc" + [(set (reg:CCC FLAGS_REG) + (unspec:CCC [(match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "vector_operand")] + UNSPEC_PTEST))] + "TARGET_SSE4_1") + +;; Expand a ptest to set both the Z and C flags +(define_expand "_ptest" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "vector_operand")] + UNSPEC_PTEST))] + "TARGET_SSE4_1") + (define_insn "ptesttf2" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x") @@ -23129,17 +23153,17 @@ (set_attr "mode" "TI")]) (define_insn_and_split "*ptest_and" - [(set (reg:CC FLAGS_REG) - (unspec:CC [(and:V_AVX (match_operand:V_AVX 0 "register_operand") - (match_operand:V_AVX 1 "vector_operand")) - (and:V_AVX (match_dup 0) (match_dup 1))] + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(and:V_AVX (match_operand:V_AVX 0 "register_operand") + (match_operand:V_AVX 1 "vector_operand")) + (and:V_AVX (match_dup 0) (match_dup 1))] UNSPEC_PTEST))] "TARGET_SSE4_1 && ix86_pre_reload_split ()" "#" "&& 1" - [(set (reg:CC FLAGS_REG) - (unspec:CC [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))]) + [(set (reg:CCZ FLAGS_REG) + (unspec:CCZ [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))]) (define_expand "nearbyint2" [(set (match_operand:VFH 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/i386/pr109973-1.c b/gcc/testsuite/gcc.target/i386/pr109973-1.c new file mode 100644 index 0000000..a1b6136b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109973-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2" } */ + +typedef long long __m256i __attribute__ ((__vector_size__ (32))); + +int +foo (__m256i x, __m256i y) +{ + __m256i a = x & y; + return __builtin_ia32_ptestc256 (a, a); +} + +/* { dg-final { scan-assembler "vpand" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109973-2.c b/gcc/testsuite/gcc.target/i386/pr109973-2.c new file mode 100644 index 0000000..167f6ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109973-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +int +foo (__m128i x, __m128i y) +{ + __m128i a = x & y; + return __builtin_ia32_ptestc128 (a, a); +} + +/* { dg-final { scan-assembler "pand" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr110083.c b/gcc/testsuite/gcc.target/i386/pr110083.c new file mode 100644 index 0000000..4b38ca8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110083.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -msse4 -mstv -mno-stackrealign" } */ +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); + +void foo (void) +{ + static volatile TItype ivin, ivout; + static volatile float fv1, fv2; + ivin = ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1)); + fv1 = ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1)); + fv2 = ivin; + ivout = fv2; + if (ivin != ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1)) + || ((((128) > sizeof (TItype) * 8 - 1)) && ivout != ivin) + || ((((128) > sizeof (TItype) * 8 - 1)) + && ivout != + ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1))) + || fv1 != + (float) ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1)) + || fv2 != + (float) ((TItype) (UTItype) ~ (((UTItype) ~ (UTItype) 0) >> 1)) + || fv1 != fv2) + __builtin_abort (); +} +