Hi, I am new to this, and I really need your advice, thanks. I noticed PR71716 and I want to enable ATOMIC_COMPARE_EXCHANGE internal-fn optimization for floating type or types contains padding (e.g., long double). Please correct me if I happen to make any mistakes, Thanks! Firstly, about the concerns of sNaNs float/doduble value, it seems work well and shall have been covered by testsuite/gcc.dg/atomic/c11-atomic-exec-5.c Secondly, since ATOMIC_COMPARE_EXCHANGE is only enabled when expected var is only addressable because of the call, the padding bits can not be modified by any other stmts. So we can save all bits after ATOMIC_COMPARE_EXCHANGE call and extract the padding bits. After first iteration, the extracted padding bits can be mixed with the expected var. Bootstrapped/regtested on x86_64-linux. I did some benchmarks, and there is some significant time optimization for float/double types, while there is no regression for long double type. Thanks, xndcn gcc/ChangeLog: * gimple-fold.cc (optimize_atomic_compare_exchange_p): enable for SCALAR_FLOAT_TYPE_P type of expected var, or if TYPE_PRECISION is different from mode's bitsize (fold_builtin_atomic_compare_exchange): if TYPE_PRECISION is different from mode's bitsize, try to keep track all the bits and mix it with VIEW_CONVERT_EXPR(expected). Signed-off-by: xndcn --- gcc/gimple-fold.cc | 77 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index cb4b57250..321ff4f41 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -5306,12 +5306,7 @@ optimize_atomic_compare_exchange_p (gimple *stmt) || !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl) || TREE_THIS_VOLATILE (etype) || VECTOR_TYPE_P (etype) - || TREE_CODE (etype) == COMPLEX_TYPE - /* Don't optimize floating point expected vars, VIEW_CONVERT_EXPRs - might not preserve all the bits. See PR71716. */ - || SCALAR_FLOAT_TYPE_P (etype) - || maybe_ne (TYPE_PRECISION (etype), - GET_MODE_BITSIZE (TYPE_MODE (etype)))) + || TREE_CODE (etype) == COMPLEX_TYPE) return false; tree weak = gimple_call_arg (stmt, 3); @@ -5350,8 +5345,10 @@ fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt))); tree ctype = build_complex_type (itype); tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); + tree etype = TREE_TYPE (expected); bool throws = false; edge e = NULL; + tree allbits = NULL_TREE; gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), expected); gsi_insert_before (gsi, g, GSI_SAME_STMT); @@ -5362,6 +5359,67 @@ fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) build1 (VIEW_CONVERT_EXPR, itype, gimple_assign_lhs (g))); gsi_insert_before (gsi, g, GSI_SAME_STMT); + + // VIEW_CONVERT_EXPRs might not preserve all the bits. See PR71716. + // so we have to keep track all bits here. + if (maybe_ne (TYPE_PRECISION (etype), + GET_MODE_BITSIZE (TYPE_MODE (etype)))) + { + gimple_stmt_iterator cgsi + = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))); + allbits = create_tmp_var (itype); + // allbits is initialized to 0, which can be ignored first time + gimple *init_stmt + = gimple_build_assign (allbits, build_int_cst (itype, 0)); + gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT); + tree maskbits = create_tmp_var (itype); + // maskbits is initialized to full 1 (0xFFF...) + init_stmt = gimple_build_assign (maskbits, build1 (BIT_NOT_EXPR, + itype, allbits)); + gsi_insert_before (&cgsi, init_stmt, GSI_SAME_STMT); + + // g = g & maskbits + g = gimple_build_assign (make_ssa_name (itype), + build2 (BIT_AND_EXPR, itype, + gimple_assign_lhs (g), maskbits)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + + gimple *def_mask = gimple_build_assign ( + make_ssa_name (itype), + build2 (LSHIFT_EXPR, itype, build_int_cst (itype, 1), + build_int_cst (itype, TYPE_PRECISION (etype)))); + gsi_insert_before (gsi, def_mask, GSI_SAME_STMT); + def_mask = gimple_build_assign (make_ssa_name (itype), + build2 (MINUS_EXPR, itype, + gimple_assign_lhs (def_mask), + build_int_cst (itype, 1))); + gsi_insert_before (gsi, def_mask, GSI_SAME_STMT); + // maskbits = (1 << TYPE_PRECISION (etype)) - 1 + def_mask = gimple_build_assign (maskbits, SSA_NAME, + gimple_assign_lhs (def_mask)); + gsi_insert_before (gsi, def_mask, GSI_SAME_STMT); + + // paddingbits = (~maskbits) & allbits + def_mask + = gimple_build_assign (make_ssa_name (itype), + build1 (BIT_NOT_EXPR, itype, + gimple_assign_lhs (def_mask))); + gsi_insert_before (gsi, def_mask, GSI_SAME_STMT); + def_mask + = gimple_build_assign (make_ssa_name (itype), + build2 (BIT_AND_EXPR, itype, allbits, + gimple_assign_lhs (def_mask))); + gsi_insert_before (gsi, def_mask, GSI_SAME_STMT); + + // g = g | paddingbits, i.e., + // g = (VIEW_CONVERT_EXPR(expected) & maskbits) + // | (allbits &(~maskbits)) + g = gimple_build_assign (make_ssa_name (itype), + build2 (BIT_IOR_EXPR, itype, + gimple_assign_lhs (g), + gimple_assign_lhs (def_mask))); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + } } int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0) + int_size_in_bytes (itype); @@ -5410,6 +5468,13 @@ fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) gsi_insert_after (gsi, g, GSI_NEW_STMT); if (!useless_type_conversion_p (TREE_TYPE (expected), itype)) { + // save all bits here + if (maybe_ne (TYPE_PRECISION (etype), + GET_MODE_BITSIZE (TYPE_MODE (etype)))) + { + g = gimple_build_assign (allbits, SSA_NAME, gimple_assign_lhs (g)); + gsi_insert_after (gsi, g, GSI_NEW_STMT); + } g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)), VIEW_CONVERT_EXPR, build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected), -- 2.25.1