diff --git a/gcc/testsuite/gcc.dg/associate_division_1.c b/gcc/testsuite/gcc.dg/associate_division_1.c new file mode 100644 index 0000000000000000000000000000000000000000..187d3597af8825a6a43f29bfaa68b089d2d5bbfe --- /dev/null +++ b/gcc/testsuite/gcc.dg/associate_division_1.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized" } */ + +float +extract_square (float x, float y, float *a, float *b) +{ + *a = 3 / (y * y); + *b = 5 / (y * y); + + return x / (y * y); +} + +/* Don't expect the 'powmult' (calculation of y * y) + to be deleted until a later pass, so look for one + more multiplication than strictly necessary. */ +float +extract_recip (float *w, float x, float y, float z) +{ + *w = 7 / y; + + return x / (y * y) + z / y; +} + +float +neg_recip (float x, float y, float z) +{ + return (x / y) + (z / -y); +} + +float +const_divisor (float *w, float x, float y, float z) +{ + *w = 5 / y; + return x / (y * 3.0f) + z / y; +} + +/* 4 For the pointers to a, b, w and w, 4 multiplications in 'extract_square', + 5 multiplications in 'extract_recip', 0 multiplications + in 'neg_recip', 3 multiplcations in 'const_divisor' expected. */ +/* { dg-final { scan-tree-dump-times " \\* " 14 "optimized" } } */ + +/* 1 division in 'extract_square', 1 division in 'extract_recip', + 1 division in 'neg_recip', 1 division in 'const_divisor'. */ +/* { dg-final { scan-tree-dump-times " / " 4 "optimized" } } */ + + diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 7ac1659fa0670b7080685f3f9513939807073a63..0db160f68001ffb90942c4002703625430128b9f 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -340,6 +340,38 @@ is_division_by (gimple *use_stmt, tree def) && gimple_assign_rhs1 (use_stmt) != def; } +/* Return whether USE_STMT is DEF * DEF. */ +static inline bool +is_square_of (gimple *use_stmt, tree def) +{ + if (gimple_code (use_stmt) == GIMPLE_ASSIGN + && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) + { + tree op0 = gimple_assign_rhs1 (use_stmt); + tree op1 = gimple_assign_rhs2 (use_stmt); + + return op0 == op1 && op0 == def; + } + return 0; +} + +/* Return whether USE_STMT is a floating-point division by + DEF * DEF. */ +static inline bool +is_division_by_square (gimple *use_stmt, tree def) +{ + if (gimple_code (use_stmt) == GIMPLE_ASSIGN + && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR) + { + tree denominator = gimple_assign_rhs2 (use_stmt); + if (TREE_CODE (denominator) == SSA_NAME) + { + return is_square_of (SSA_NAME_DEF_STMT (denominator), def); + } + } + return 0; +} + /* Walk the subset of the dominator tree rooted at OCC, setting the RECIP_DEF field to a definition of 1.0 / DEF that can be used in the given basic block. The field may be left NULL, of course, @@ -369,14 +401,16 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, build_one_cst (type), def); if (occ->bb_has_division) - { - /* Case 1: insert before an existing division. */ - gsi = gsi_after_labels (occ->bb); - while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) + { + /* Case 1: insert before an existing division. */ + gsi = gsi_after_labels (occ->bb); + while (!gsi_end_p (gsi) + && (!is_division_by (gsi_stmt (gsi), def)) + && (!is_division_by_square (gsi_stmt (gsi), def))) gsi_next (&gsi); - gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); - } + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + } else if (def_gsi && occ->bb == def_gsi->bb) { /* Case 2: insert right after the definition. Note that this will @@ -403,6 +437,80 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, } +/* Walk the tree until we find the first division by a square. Insert + the definition of the reciprocal there. This returns that definition, + or if there is an alternate definition earlier, then it returns that + instead. */ + +static tree +insert_square_reciprocals (struct occurrence *occ, tree def) +{ + gimple_stmt_iterator gsi = gsi_after_labels (occ->bb); + + while (!gsi_end_p (gsi) + && !is_division_by (gsi_stmt (gsi), def) + /* Check to see if a calculation statement has already + been inserted. */ + && !is_square_of (gsi_stmt (gsi), occ->recip_def)) + gsi_next (&gsi); + + if (gsi_end_p (gsi)) + return NULL; + else if (is_square_of (gsi_stmt (gsi), occ->recip_def)) + return gimple_assign_lhs (gsi_stmt (gsi)); + else + { + tree type = TREE_TYPE (def); + tree recip_square_def = create_tmp_reg (type, "powmult_reciptmp"); + gassign *new_stmt = gimple_build_assign (recip_square_def, MULT_EXPR, + occ->recip_def, occ->recip_def); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + return recip_square_def; + } +} + +/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). + Unlike replace_reciprocals, we expect use_p to be a square definition + (i.e. use_p is _ = x * x). Iterate though all uses of this square + and replace them. */ +static inline void +replace_reciprocal_squares (use_operand_p use_p) +{ + gimple *use_stmt = USE_STMT (use_p); + basic_block bb = gimple_bb (use_stmt); + struct occurrence *occ = (struct occurrence *) bb->aux; + imm_use_iterator use_iter; + tree def_name = gimple_assign_lhs (use_stmt); + use_operand_p square_use_p; + tree squared_reciprocal = insert_square_reciprocals (occ, def_name); + + if (optimize_bb_for_speed_p (bb) && squared_reciprocal + && occ->recip_def && use_stmt != occ->recip_def_stmt) + { + + /* Find all occurrences of the use name and replace + them by multiplications of this new inverse. */ + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def_name) + { + FOR_EACH_IMM_USE_ON_STMT (square_use_p, use_iter) + { + gimple *square_use = USE_STMT (square_use_p); + + if (gimple_assign_rhs_code (square_use) == RDIV_EXPR) + { + gimple_assign_set_rhs_code (square_use, MULT_EXPR); + gimple_assign_set_rhs2 (square_use, squared_reciprocal); + SET_USE (square_use_p, squared_reciprocal); + + reciprocal_stats.rdivs_inserted++; + update_stmt (square_use); + } + } + } + } +} + + /* Replace the division at USE_P with a multiplication by the reciprocal, if possible. */ @@ -460,10 +568,12 @@ free_bb (struct occurrence *occ) static void execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) { - use_operand_p use_p; - imm_use_iterator use_iter; + use_operand_p use_p, square_use_p; + imm_use_iterator use_iter, square_use_iter; + tree square_def; struct occurrence *occ; int count = 0, threshold; + int square_recip_count = 0; gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); @@ -475,11 +585,26 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) register_division_in (gimple_bb (use_stmt)); count++; } + + if (is_square_of (use_stmt, def)) + { + square_def = gimple_assign_lhs (use_stmt); + FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) + { + gimple *square_use_stmt = USE_STMT (square_use_p); + if (is_division_by (square_use_stmt, square_def)) + { + register_division_in (gimple_bb (square_use_stmt)); + square_recip_count++; + } + } + } } /* Do the expensive part only if we can hope to optimize something. */ threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); - if (count >= threshold) + if (count + square_recip_count >= threshold + && count >= 1) { gimple *use_stmt; for (occ = occ_head; occ; occ = occ->next) @@ -495,6 +620,11 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) replace_reciprocal (use_p); } + else if (is_square_of (use_stmt, def)) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + replace_reciprocal_squares (use_p); + } } }