From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
Peter Bergner <bergner@linux.ibm.com>,
Richard Biener <richard.guenther@gmail.com>,
Jeff Law <jeffreyalaw@gmail.com>,
Raphael Zinsly <rzinsly@ventanamicro.com>
Subject: [PATCH v2] rtl-optimization: ppc backend generates unnecessary extension.
Date: Tue, 28 Mar 2023 22:19:27 +0530 [thread overview]
Message-ID: <a22c34d6-07c5-80a5-f6d5-8aa49869a03d@linux.ibm.com> (raw)
Hello All:
This patch makes REE pass as a default pass in rs6000 target. And
add necessary subroutines to eliminate extensions across basic blocks.
Bootstrapped and regtested on powerpc64-linu-gnu.
Thanks & Regards
Ajit
rtl-optimization: ppc backend generates unnecessary
extension.
Eliminate unnecessary redundant zero extension across basic
blocks.
2023-03-28 Ajit Kumar Agarwal <aagarwa1@linux.ibm.com>
gcc/ChangeLog:
* ree.cc(insn_s_zext_p): New function.
* ree.cc(is_feasible_elim_across_basic_blocks):
New function.
* ree.cc(merge_def_and_ext): Add call to
is_feasible_elim_across_basic_blocks to check feasibility
of extension elimination across basic blocks.
* common/config/rs6000/rs6000-common.cc: Add free pass
as default pass in rs6000 target.
---
gcc/common/config/rs6000/rs6000-common.cc | 3 +-
gcc/ree.cc | 269 +++++++++++++++++-----
2 files changed, 209 insertions(+), 63 deletions(-)
diff --git a/gcc/common/config/rs6000/rs6000-common.cc b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..e7780dc0c5d 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -30,6 +30,8 @@
/* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */
static const struct default_options rs6000_option_optimization_table[] =
{
+ /* Enable -free for zero extension and sign extension elimination.*/
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
/* Split multi-word types early. */
{ OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
/* Enable -fsched-pressure for first pass instruction scheduling. */
@@ -42,7 +44,6 @@ static const struct default_options rs6000_option_optimization_table[] =
/* -frename-registers leads to non-optimal codegen and performance
on rs6000, turn it off by default. */
{ OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
-
/* Double growth factor to counter reduced min jump length. */
{ OPT_LEVELS_ALL, OPT__param_max_grow_copy_bb_insns_, NULL, 16 },
{ OPT_LEVELS_NONE, 0, NULL, 0 }
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 63d8cf9f237..d05d37f9a23 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,53 @@ struct ext_cand
static int max_insn_uid;
+/* Identify instruction AND with identical zero extension. */
+
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+ if (GET_CODE (insn) == AND)
+ {
+ rtx set = XEXP (insn, 0);
+ if (REG_P(set))
+ {
+ if (CONST_INT_P (XEXP (insn, 1))
+ && INTVAL (XEXP (insn, 1)) == 1)
+ return 1;
+ }
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Identify instruction AND with identical zero extension. */
+
+static unsigned int
+insn_is_zext_p(rtx_insn * insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) == PARALLEL) return 0;
+
+ if (GET_CODE(body) == SET && GET_CODE (SET_SRC (body)) == AND)
+ {
+ rtx set = XEXP (SET_SRC(body), 0);
+
+ if (REG_P(set) && GET_MODE(SET_DEST(body))
+ == GET_MODE(set))
+ {
+ if (CONST_INT_P (XEXP (SET_SRC (body), 1))
+ && INTVAL (XEXP (SET_SRC (body), 1)) == 1)
+ return 1;
+ }
+ else
+ return 0;
+ }
+ return 0;
+}
+
/* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */
static bool
@@ -297,6 +344,31 @@ update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode,
return true;
}
+/* Return true if INSN is
+ (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
+ and store x1 and x2 in REG_1 and REG_2. */
+
+static bool
+is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
+{
+ rtx expr = single_set (insn);
+
+ if (expr != NULL_RTX
+ && GET_CODE (expr) == SET
+ && GET_CODE (SET_DEST (expr)) == REG
+ && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
+ && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
+ && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
+ {
+ *reg1 = XEXP (SET_SRC (expr), 1);
+ *reg2 = XEXP (SET_SRC (expr), 2);
+ return true;
+ }
+
+ return false;
+}
+
+
/* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
this code modifies the SET rtx to a new SET rtx that extends the
@@ -321,6 +393,9 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
rtx new_set = NULL_RTX;
rtx cand_pat = single_set (cand->insn);
+ if (insn_is_zext_p(cand->insn)
+ && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
+ return false;
/* If the extension's source/destination registers are not the same
then we need to change the original load to reference the destination
@@ -359,8 +434,14 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
else if (GET_CODE (orig_src) == cand->code)
{
/* Here is a sequence of two extensions. Try to merge them. */
- rtx temp_extension
- = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+ rtx temp_extension = NULL_RTX;
+ if (GET_CODE (SET_SRC (cand_pat)) == AND)
+ temp_extension
+ = gen_rtx_fmt_ee (cand->code, cand->mode, XEXP (orig_src, 0),
+ XEXP (orig_src, 1));
+ else
+ temp_extension
+ = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
rtx simplified_temp_extension = simplify_rtx (temp_extension);
if (simplified_temp_extension)
temp_extension = simplified_temp_extension;
@@ -370,8 +451,9 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set)
else if (GET_CODE (orig_src) == IF_THEN_ELSE)
{
/* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
- in general, IF_THEN_ELSE should not be combined. */
- return true;
+ in general, IF_THEN_ELSE should not be combined. Relaxed
+ cases with IF_THEN_ELSE across basic blocls */
+ return true;
}
else
{
@@ -541,30 +623,6 @@ get_uses (rtx_insn *insn, rtx reg)
return ref_chain;
}
-/* Return true if INSN is
- (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
- and store x1 and x2 in REG_1 and REG_2. */
-
-static bool
-is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
-{
- rtx expr = single_set (insn);
-
- if (expr != NULL_RTX
- && GET_CODE (expr) == SET
- && GET_CODE (SET_DEST (expr)) == REG
- && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
- && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
- && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
- {
- *reg1 = XEXP (SET_SRC (expr), 1);
- *reg2 = XEXP (SET_SRC (expr), 2);
- return true;
- }
-
- return false;
-}
-
enum ext_modified_kind
{
/* The insn hasn't been modified by ree pass yet. */
@@ -709,6 +767,90 @@ get_sub_rtx (rtx_insn *def_insn)
return sub_rtx;
}
+/* Find feasibility of extension elimination
+ across basic blocks. */
+
+static bool
+is_feasible_elim_across_basic_blocks (ext_cand *cand,
+ rtx_insn *def_insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (cand->insn);
+ edge fallthru_edge;
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ rtx_insn *insn = BB_END (e->src) ? PREV_INSN (BB_END (e->src)) : NULL;
+
+ if (insn && NONDEBUG_INSN_P (insn)
+ && GET_CODE (PATTERN (insn)) == SET && SET_SRC (PATTERN(insn))
+ && GET_CODE (SET_SRC (PATTERN (insn))) == IF_THEN_ELSE)
+ {
+ if (e->dest == bb)
+ {
+ basic_block jump_block = e->dest;
+ if (jump_block == bb)
+ {
+ if (single_succ_p (BLOCK_FOR_INSN (def_insn)))
+ {
+ fallthru_edge = single_succ_edge (BLOCK_FOR_INSN (def_insn));
+ if (BB_END (fallthru_edge->dest) && (bb != fallthru_edge->dest
+ || e->dest != fallthru_edge->dest))
+ return false;
+ }
+ else
+ return false;
+ }
+ else return false;
+ }
+ else
+ {
+ if (single_succ_p (e->dest))
+ {
+ fallthru_edge = single_succ_edge (e->dest);
+ if (BB_END (fallthru_edge->dest) && bb != fallthru_edge->dest)
+ return false;
+ }
+ }
+ }
+ }
+ if (single_succ_p (BLOCK_FOR_INSN (def_insn)))
+ {
+ fallthru_edge = single_succ_edge (BLOCK_FOR_INSN (def_insn));
+ if (BB_END (fallthru_edge->dest) && bb != fallthru_edge->dest)
+ return false;
+ }
+ else
+ return false;
+
+ rtx set = single_set(cand->insn);
+ /* The destination register of the extension insn must not be
+ used or set between the def_insn and cand->insn exclusive. */
+ if (INSN_CHAIN_CODE_P (GET_CODE (def_insn))
+ && INSN_CHAIN_CODE_P (cand->code))
+ if ((cand->code == ZERO_EXTEND)
+ && REG_P (SET_DEST (set)) && NEXT_INSN (def_insn)
+ && (reg_used_between_p (SET_DEST (set), def_insn, cand->insn)
+ || reg_set_between_p (SET_DEST (set), def_insn, cand->insn)))
+ return false;
+
+ if (cand->code == ZERO_EXTEND && (bb != BLOCK_FOR_INSN (def_insn)
+ || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn)))
+ return false;
+
+ if (insn_is_zext_p (cand->insn)
+ && GET_CODE (PATTERN (BB_END (bb))) != USE)
+ return false;
+
+ if (insn_is_zext_p (cand->insn)
+ && GET_CODE (PATTERN (BB_END (bb))) == USE
+ && REGNO (XEXP (PATTERN (BB_END (bb)), 0)) != REGNO (SET_DEST (cand->expr)))
+ return false;
+
+ return true;
+}
+
/* Merge the DEF_INSN with an extension. Calls combine_set_extension
on the SET pattern. */
@@ -727,12 +869,18 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state)
bool copy_needed
= (REGNO (SET_DEST (cand->expr)) != REGNO (XEXP (SET_SRC (cand->expr), 0)));
- if (!copy_needed || (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
- || ((state->modified[INSN_UID (def_insn)].kind
- == (cand->code == ZERO_EXTEND || cand->code == AND
+ bool feasible = is_feasible_elim_across_basic_blocks (cand, def_insn);
+
+ if (!feasible) return false;
+
+ if (((!copy_needed && (insn_is_zext_p (cand->insn))
+ && (GET_MODE (SET_DEST (*sub_rtx)) != ext_src_mode
+ && state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE))
+ || ((state->modified[INSN_UID (def_insn)].kind
+ == (cand->code == ZERO_EXTEND
? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
- && state->modified[INSN_UID (def_insn)].mode
- == ext_src_mode)))
+ && state->modified[INSN_UID (def_insn)].mode
+ == ext_src_mode)))
{
if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
>= GET_MODE_UNIT_SIZE (cand->mode))
@@ -759,7 +907,7 @@ static inline rtx
get_extended_src_reg (rtx src)
{
while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND
- || GET_CODE (src) == AND)
+ || insn_is_zext_p(src))
src = XEXP (src, 0);
gcc_assert (REG_P (src));
return src;
@@ -1008,7 +1156,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
machine_mode mode;
if (state->modified[INSN_UID (cand->insn)].kind
- != (cand->code == ZERO_EXTEND || cand->code == AND
+ != (cand->code == ZERO_EXTEND
? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
|| state->modified[INSN_UID (cand->insn)].mode != cand->mode
|| (set == NULL_RTX))
@@ -1019,7 +1167,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
cand->mode = mode;
}
- merge_successful = true;
+ merge_successful = false;
/* Go through the defs vector and try to merge all the definitions
in this vector. */
@@ -1027,7 +1175,10 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
{
if (merge_def_and_ext (cand, def_insn, state))
- state->modified_list.safe_push (def_insn);
+ {
+ merge_successful = true;
+ state->modified_list.safe_push (def_insn);
+ }
else
{
merge_successful = false;
@@ -1058,14 +1209,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
cannot be merged, we entirely give up. In the future, we should allow
extensions to be partially eliminated along those paths where the
definitions could be merged. */
- int num_clobbers = 0;
- int icode = recog (cand->insn, cand->insn,
- (GET_CODE (cand->expr) == SET
- && ! reload_completed
- && ! reload_in_progress)
- ? &num_clobbers : 0);
-
- if (apply_change_group () || (icode < 0))
+ if (apply_change_group ())
{
if (dump_file)
fprintf (dump_file, "All merges were successful.\n");
@@ -1074,7 +1218,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
{
ext_modified *modified = &state->modified[INSN_UID (def_insn)];
if (modified->kind == EXT_MODIFIED_NONE)
- modified->kind = (cand->code == ZERO_EXTEND || cand->code == AND ? EXT_MODIFIED_ZEXT
+ modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
: EXT_MODIFIED_SEXT);
if (copy_needed)
@@ -1082,19 +1226,19 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
}
return true;
}
- else
- {
- /* Changes need not be cancelled explicitly as apply_change_group
- does it. Print list of definitions in the dump_file for debug
- purposes. This extension cannot be deleted. */
- if (dump_file)
- {
- fprintf (dump_file,
- "Merge cancelled, non-mergeable definitions:\n");
- FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
- print_rtl_single (dump_file, def_insn);
- }
- }
+ else
+ {
+ /* Changes need not be cancelled explicitly as apply_change_group
+ does it. Print list of definitions in the dump_file for debug
+ purposes. This extension cannot be deleted. */
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "Merge cancelled, non-mergeable definitions:\n");
+ FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
+ print_rtl_single (dump_file, def_insn);
+ }
+ }
}
else
{
@@ -1128,7 +1272,7 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
mode = GET_MODE (dest);
if (REG_P (dest)
- && (code == SIGN_EXTEND || code == ZERO_EXTEND || code == AND)
+ && (code == SIGN_EXTEND || code == ZERO_EXTEND || insn_is_zext_p(src))
&& REG_P (XEXP (src, 0)))
{
rtx reg = XEXP (src, 0);
@@ -1140,7 +1284,7 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
a path from the entry to this zero-extension that leaves this register
uninitialized, removing the extension could change the behavior of
correct programs. So first, check it is not the case. */
- if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
+ if ((code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg))))
{
if (dump_file)
{
@@ -1390,9 +1534,10 @@ find_and_remove_re (void)
reinsn_list.release ();
XDELETEVEC (state.modified);
- if (dump_file && num_re_opportunities > 0)
+ if (dump_file && num_re_opportunities > 0)
fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
num_re_opportunities, num_realized);
+
}
/* Find and remove redundant extensions. */
--
2.31.1
next reply other threads:[~2023-03-28 16:49 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-28 16:49 Ajit Agarwal [this message]
2023-03-30 12:00 ` Ajit Agarwal
2023-03-30 14:28 ` Bernhard Reutner-Fischer
2023-03-30 15:28 ` Segher Boessenkool
2023-03-30 13:41 ` Segher Boessenkool
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a22c34d6-07c5-80a5-f6d5-8aa49869a03d@linux.ibm.com \
--to=aagarwa1@linux.ibm.com \
--cc=bergner@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=richard.guenther@gmail.com \
--cc=rzinsly@ventanamicro.com \
--cc=segher@kernel.crashing.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).