diff --git a/gcc/testsuite/gcc.target/aarch64/sve/combine_vcond_mask_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/combine_vcond_mask_store_1.c new file mode 100644 index 0000000000000000000000000000000000000000..64f6b7b00f58ee45bd4a2f91c1a9404911f1a09f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/combine_vcond_mask_store_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize --param allow-store-data-races=1 -fdump-tree-vect-details" } */ + +void test () +{ + static int array[100]; + for (unsigned i = 1; i < 16; ++i) + { + int a = array[i]; + if (a & 1) + array[i] = a + 1; + if (array[i] > 10) + array[i] = a + 2; + } +} + +/* { dg-final { scan-tree-dump-times "Combining VEC_COND_EXPR and MASK_STORE" 1 "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 177b284e9c617a41c33d1387ba5afbed51d8ed00..9e1a167d03ea5bf640e58b3426d42b4e3c74da56 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -8539,6 +8539,166 @@ vect_transform_loop (loop_vec_info loop_vinfo) return epilogue; } +/* + When allow-store-data-races=1, if-conversion will convert certain if + statements into: + A[i] = cond ? val : A[i]. + If the loop is successfully vectorized, + MASK_LOAD + VEC_COND_EXPR + MASK_STORE will be generated. + + This pattern could be combined into a single MASK_STORE with new mask. + The new mask is the combination of original mask and the value selection mask + in VEC_COND_EXPR. + + After the transformation, the MASK_LOAD and VEC_COND_EXPR might be dead. */ + +void +combine_sel_mask_store (struct loop *loop) +{ + basic_block *bbs = get_loop_body (loop); + unsigned nbbs = loop->num_nodes; + unsigned i; + basic_block bb; + gimple_stmt_iterator gsi; + + vect_location = find_loop_location (loop); + for (i = 0; i < nbbs; i++) + { + bb = bbs[i]; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *mask_store = gsi_stmt (gsi); + if (!gimple_call_internal_p (mask_store, IFN_MASK_STORE)) + continue; + + /* + X = MASK_LOAD (PTR, -, MASK) + VAL = ... + Y = VEC_COND (cond, VAL, X) + MASK_STORE (PTR, -, MASK, Y) + */ + tree vec_op = gimple_call_arg (mask_store, 3); + tree store_mask = gimple_call_arg (mask_store, 2); + if (TREE_CODE (vec_op) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (vec_op); + gassign *assign = dyn_cast (def); + if (!assign || gimple_assign_rhs_code (assign) != VEC_COND_EXPR) + continue; + + tree sel_cond = gimple_assign_rhs1 (assign); + tree true_val = gimple_assign_rhs2 (assign); + tree false_val = gimple_assign_rhs3 (assign); + gimple *mask_load = NULL; + + /* A[i] = cond ? val : A[i] */ + if (TREE_CODE (false_val) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (false_val); + if (gimple_call_internal_p (def, IFN_MASK_LOAD)) + mask_load = def; + } + /* A[i] = cond ? A[i] : val + Transform into: + A[i] = !cond ? val : A[i] */ + if (mask_load == NULL && TREE_CODE (true_val) == SSA_NAME) + { + gimple *def = SSA_NAME_DEF_STMT (true_val); + if (gimple_call_internal_p (def, IFN_MASK_LOAD)) + { + enum tree_code code = TREE_CODE (sel_cond); + tree op_type = TREE_TYPE (TREE_OPERAND (sel_cond, 0)); + code = invert_tree_comparison (code, HONOR_NANS (op_type)); + if (code == ERROR_MARK) + continue; + sel_cond = build2_loc (EXPR_LOCATION (sel_cond), code, + TREE_TYPE (sel_cond), + TREE_OPERAND (sel_cond, 0), + TREE_OPERAND (sel_cond, 1)); + mask_load = def; + true_val = false_val; + } + } + + /* The pair must be in the same basic block, use the same mask, + and access the same memory. */ + if (mask_load == NULL || + gimple_bb (mask_store) != gimple_bb (mask_load) || + store_mask != gimple_call_arg (mask_load, 2) || + gimple_vuse (mask_store) != gimple_vuse (mask_load)) + continue; + + auto_vec refs; + opt_result res + = find_data_references_in_stmt (loop, mask_store, &refs); + if (!res) + continue; + data_reference_p dr_a = refs.pop (); + res = find_data_references_in_stmt (loop, mask_load, &refs); + if (!res) + continue; + data_reference_p dr_b = refs.pop (); + + if (!same_data_refs (dr_a, dr_b)) + continue; + + /* If the data reference is the same, they are accessing the + same memory location. Merge the pattern. */ + tree sel_mask + = force_gimple_operand_gsi (&gsi, unshare_expr (sel_cond), + true, NULL_TREE, + true, GSI_SAME_STMT); + + tree and_mask = make_temp_ssa_name (TREE_TYPE (store_mask), + NULL, "vec_mask_and"); + gimple *and_stmt = gimple_build_assign (and_mask, BIT_AND_EXPR, + sel_mask, store_mask); + gsi_insert_before (&gsi, and_stmt, GSI_SAME_STMT); + + gcall *new_stmt + = gimple_build_call_internal (IFN_MASK_STORE, 4, + gimple_call_arg (mask_store, 0), + gimple_call_arg (mask_store, 1), + and_mask, true_val); + gimple_call_set_nothrow (new_stmt, true); + gsi_replace (&gsi, new_stmt, true); + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Combining VEC_COND_EXPR and MASK_STORE:\n" + "%G%G", assign, new_stmt); + + /* Remove dead statements. */ + if (has_zero_uses (vec_op)) + { + auto_vec worklist; + worklist.safe_push (vec_op); + while (!worklist.is_empty ()) + { + tree val = worklist.pop (); + if (TREE_CODE (val) == SSA_NAME + && has_zero_uses (val)) + { + ssa_op_iter i; + tree op; + gimple *def = SSA_NAME_DEF_STMT (val); + + FOR_EACH_SSA_TREE_OPERAND (op, def, i, SSA_OP_USE) + worklist.safe_push (op); + + gimple_stmt_iterator gsi = gsi_for_stmt (def); + gsi_remove (&gsi, true); + } + } + } + } + } + } + + free (bbs); +} + /* The code below is trying to perform simple optimization - revert if-conversion for masked stores, i.e. if the mask of a store is zero do not perform it and all stored value producers also if possible. diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 988456808318dabd0058f6b0d038f8c272e75c6b..ea661ec609df56f96cb54c3f2996646c05870667 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1510,6 +1510,7 @@ extern void vect_get_store_cost (stmt_vec_info, int, extern bool vect_supportable_shift (enum tree_code, tree); extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); +extern void combine_sel_mask_store (struct loop*); extern void optimize_mask_stores (struct loop*); extern gcall *vect_gen_while (tree, tree, tree); extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 12bf0fcd5bde4b889fb74342c4e7dd52327efa57..a1145cdfbeb826669071dd077420908ba67bdecc 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1143,6 +1143,7 @@ vectorize_loops (void) loop_vinfo = (loop_vec_info) loop->aux; has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); delete loop_vinfo; + combine_sel_mask_store (loop); if (has_mask_store && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) optimize_mask_stores (loop);