From: Ajit Agarwal <aagarwa1@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>, bergner@linux.ibm.com
Subject: [PATCH] rs6000: suboptimal code for returning bool value on target ppc
Date: Thu, 16 Mar 2023 10:50:21 +0530 [thread overview]
Message-ID: <86cf8475-4353-52ca-869c-75f40bd7d06f@linux.ibm.com> (raw)
Hello All:
This patch eliminates unnecessary zero extension instruction from power generated assembly.
Bootstrapped and regtested on powerpc64-linux-gnu.
Thanks & Regards
Ajit
rs6000: suboptimal code for returning bool value on target ppc.
New pass to eliminate unnecessary zero extension. This pass
is registered after cse rtl pass.
2023-03-16 Ajit Kumar Agarwal <aagarwa1@linux.ibm.com>
gcc/ChangeLog:
* config/rs6000/rs6000-passes.def: Registered zero elimination
pass.
* config/rs6000/rs6000-zext-elim.cc: Add new pass.
* config.gcc: Add new executable.
* config/rs6000/rs6000-protos.h: Add new prototype for zero
elimination pass.
* config/rs6000/rs6000.cc: Add new prototype for zero
elimination pass.
* config/rs6000/t-rs6000: Add new rule.
* expr.cc: Modified gcc assert.
* explow.cc: Modified gcc assert.
* optabs.cc: Modified gcc assert.
---
gcc/config.gcc | 4 +-
gcc/config/rs6000/rs6000-passes.def | 2 +
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
gcc/config/rs6000/rs6000.cc | 2 +
gcc/config/rs6000/t-rs6000 | 5 +
gcc/explow.cc | 3 +-
gcc/expr.cc | 4 +-
gcc/optabs.cc | 3 +-
9 files changed, 379 insertions(+), 6 deletions(-)
create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
diff --git a/gcc/config.gcc b/gcc/config.gcc
index da3a6d3ba1f..e8ac9d882f0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -503,7 +503,7 @@ or1k*-*-*)
;;
powerpc*-*-*)
cpu_type=rs6000
- extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+ extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
@@ -538,7 +538,7 @@ riscv*)
;;
rs6000*-*-*)
extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
- extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+ extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index ca899d5f7af..d7500feddf1 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -28,6 +28,8 @@ along with GCC; see the file COPYING3. If not see
The power8 does not have instructions that automaticaly do the byte swaps
for loads and stores. */
INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+ INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
+
/* Pass to do the PCREL_OPT optimization that combines the load of an
external symbol's address along with a single load or store using that
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..f6cf2d673d4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -340,6 +340,7 @@ namespace gcc { class context; }
class rtl_opt_pass;
extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
extern bool rs6000_quadword_masked_address_p (const_rtx exp);
diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
new file mode 100644
index 00000000000..777c7a5a387
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-zext-elim.cc
@@ -0,0 +1,361 @@
+/* Subroutine to eliminate redundant zero extend for power architecture.
+ Copyright (C) 1991-2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+/* This pass remove unnecessary zero extension instruction from
+ power generated assembly. This pass is register after cse
+ pass.
+ Identifies the following sequence of instruction after cse
+ rtl pass.
+
+ set compare (subreg)
+ set if_then_else
+ set SImode -> QImode
+ set zero_extend to DImode from QImode
+ set return value 0 in one path of cfg.
+ set return value 1 in other path of cfg.
+
+ In cfgexpand pass QImode is generated with
+ bool register value and this pass uses QI
+ as 64 bit registers.
+
+ This pass replace copy operation from QImode to DImode
+ and return appropriate return values.*/
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+
+/* This is based on the union-find logic in web.cc. web_entry_base is
+ defined in df.h. */
+class zext_web_entry : public web_entry_base
+{
+ public:
+ /* Pointer to the insn. */
+ rtx_insn *insn;
+ unsigned int is_relevant : 1;
+ /* Set if insn is a load. */
+ unsigned int is_load : 1;
+ /* Set if insn is a store. */
+ unsigned int is_store : 1;
+ unsigned int is_zext :1 ;
+ unsigned int is_move :1;
+ unsigned int is_delete_move :1;
+ /* Set if this insn should be deleted. */
+ unsigned int will_delete : 1;
+ unsigned int will_delete_chances : 1;
+};
+
+/* Checks if instruction is zero extension
+ * with QIMode to DImode.*/
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+ rtx body = PATTERN (insn);
+
+ if (GET_CODE (body) == SET
+ && GET_MODE(SET_DEST (body)) == DImode
+ && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
+ {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set))
+ {
+ if (GET_MODE (set) == QImode) return 1;
+ }
+ else
+ return 0;
+ }
+ return 0;
+}
+
+/* Checks if instruction is SET operation with QImode.*/
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+ rtx body = PATTERN (insn);
+ if (GET_CODE (body) == SET
+ && SUBREG_P(SET_SRC (body))
+ && !CONST_INT_P(SET_SRC (body))
+ && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
+ && GET_MODE(SET_SRC (body)) == QImode)
+ return 1;
+
+ return 0;
+}
+
+/* Find out zero extension removal candidate with use-def web.*/
+static void
+find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
+ rtx insn, df_ref def)
+{
+ struct df_link *link = DF_REF_CHAIN (def);
+
+ rtx move_insn = NULL_RTX;
+ rtx compare_insn = NULL_RTX;
+
+ while (link)
+ {
+ if (!DF_REF_INSN_INFO (link->ref))
+ insn_entry[INSN_UID(insn)].will_delete_chances = 0;
+
+ if (DF_REF_INSN_INFO (link->ref))
+ {
+ rtx use_insn = DF_REF_INSN (link->ref);
+
+ if (GET_CODE (PATTERN (use_insn)) == SET
+ && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
+ {
+ if (GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
+ {
+ rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
+
+ if (SUBREG_P (body))
+ {
+ compare_insn = use_insn;
+ rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
+
+ if (compare_insn
+ && ((REGNO (XEXP (compare_body, 0)))
+ == REGNO (SET_DEST (PATTERN (insn)))))
+ insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+ }
+ }
+ }
+
+ if (insn_is_store_p(use_insn)
+ && GET_CODE (PATTERN (insn)) == SET
+ && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
+ {
+ if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
+ {
+ if (insn_entry[INSN_UID(insn)].will_delete_chances)
+ insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+ }
+ }
+
+ if (insn_is_zext_p (insn))
+ {
+ if (GET_CODE (PATTERN (use_insn)) == SET
+ && REG_P (SET_SRC (PATTERN (use_insn))))
+ {
+ if (move_insn
+ && REGNO (SET_SRC (PATTERN (use_insn)))
+ == REGNO (SET_SRC (PATTERN (move_insn)))
+ && insn_entry[INSN_UID(insn)].is_delete_move)
+ {
+ insn_entry[INSN_UID (insn)].is_move = 1;
+ break;
+ }
+ else if (insn_entry[INSN_UID (insn)].will_delete)
+ {
+ move_insn = use_insn;
+ insn_entry[INSN_UID(insn)].is_delete_move= 1;
+ }
+ }
+ }
+
+ if (insn_is_zext_p (use_insn))
+ {
+ insn_entry[INSN_UID (use_insn)].is_zext = 1;
+ insn_entry[INSN_UID(use_insn)].is_relevant = 1;
+
+ if (insn_is_store_p (insn)
+ && insn_entry[INSN_UID (insn)].will_delete_chances)
+ {
+ insn_entry[INSN_UID (use_insn)].will_delete = 1;
+ insn_entry[INSN_UID (insn)].will_delete = 1;
+ insn_entry[INSN_UID( insn)].is_store = 1;
+ }
+
+ if (NONDEBUG_INSN_P (use_insn))
+ unionfind_union (insn_entry + INSN_UID (insn),
+ insn_entry + INSN_UID (use_insn));
+ }
+ }
+
+ link = link->next;
+ }
+}
+
+/* Replace QImode extensions with copy operations.*/
+static void
+replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
+{
+ rtx_insn *insn = insn_entry[i].insn;
+ rtx body = PATTERN (insn);
+ rtx src_reg;
+ src_reg = XEXP (SET_SRC (body), 0);
+ set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
+
+ if (GET_MODE(SET_DEST(body)) != DImode)
+ set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
+
+ rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
+ rtx_insn *new_insn = emit_insn_before (copy, insn);
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+ df_insn_rescan (new_insn);
+
+ df_insn_delete (insn);
+ remove_insn (insn);
+ insn->set_deleted ();
+}
+
+/* Main entry point for this pass. */
+unsigned int
+rs6000_analyze_zext (function *fun)
+{
+ zext_web_entry *insn_entry;
+ basic_block bb;
+ rtx_insn *insn, *curr_insn = 0;
+
+ /* Dataflow analysis for use-def chains. */
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ /* Rebuild ud- and du-chains. */
+ df_remove_problem (df_chain);
+ df_process_deferred_rescans ();
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+
+ /* Allocate structure to represent webs of insns. */
+ insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
+
+ /* Walk the insns to gather basic data. */
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+ {
+ unsigned int uid = INSN_UID (insn);
+ if (NONDEBUG_INSN_P (insn))
+ {
+ insn_entry[uid].insn = insn;
+
+ if (GET_CODE (insn) == insn_is_store_p (insn))
+ {
+ insn_entry[uid].is_store = 1;
+ insn_entry[uid].is_relevant = 1;
+ }
+
+ /* Walk the uses and defs to identify the optimization
+ candidates.*/
+ struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+ df_ref mention;
+
+ FOR_EACH_INSN_INFO_DEF (mention, insn_info)
+ {
+ insn_entry[uid].is_relevant = 1;
+ insn_entry[uid].is_store = insn_is_store_p (insn);
+ find_zero_ext_elimination_candidate (insn_entry, insn, mention);
+ }
+
+ if (insn_entry[uid].is_relevant)
+ {
+ /* Determine if this is a store. */
+ insn_entry[uid].is_store = insn_is_store_p (insn);
+ }
+ }
+ }
+
+ unsigned e = get_max_uid (), i;
+
+ int store_index = -1;
+
+ /* Replace with copy operation.*/
+ for (i = 0; i < e; ++i)
+ {
+ if (insn_entry[i].is_store && insn_entry[i].will_delete)
+ store_index = i;
+
+ if ((store_index != -1)
+ && insn_entry[i].is_move && insn_entry[i].will_delete)
+ {
+ replace_marked_insns (insn_entry, store_index);
+ replace_marked_insns (insn_entry, i);
+ }
+ }
+ /* Clean up. */
+ free (insn_entry);
+
+ return 0;
+}
+
+const pass_data pass_data_analyze_zext =
+{
+ RTL_PASS, /* type */
+ "zext", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_zext : public rtl_opt_pass
+{
+public:
+ pass_analyze_zext(gcc::context *ctxt)
+ : rtl_opt_pass(pass_data_analyze_zext, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (optimize > 0 );
+ }
+
+ virtual unsigned int execute (function *fun)
+ {
+ return rs6000_analyze_zext (fun);
+ }
+
+ opt_pass *clone ()
+ {
+ return new pass_analyze_zext (m_ctxt);
+ }
+
+}; // class pass_analyze_zext
+
+rtl_opt_pass *
+make_pass_analyze_zext (gcc::context *ctxt)
+{
+ return new pass_analyze_zext (ctxt);
+}
+
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 8e0b0d022db..6541334bf2d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
bool);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
+rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
+
/* Hash table stuff for keeping track of TOC entries. */
struct GTY((for_user)) toc_hash_struct
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..c1f61591d2f 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
$(COMPILE) $<
$(POSTCOMPILE)
+rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
+
rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
$(COMPILE) $<
$(POSTCOMPILE)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 32e9498ee07..316aa975e40 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
if (! general_operand (x, VOIDmode))
x = force_operand (x, temp);
- gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
+ gcc_assert (mode == DImode || GET_MODE (x) == mode
+ || GET_MODE (x) == VOIDmode);
if (x != temp)
emit_move_insn (temp, x);
return temp;
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 15be1c8db99..6162ef92b88 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
rtx y_cst = NULL_RTX;
rtx_insn *last_insn;
rtx set;
-
gcc_assert (mode != BLKmode
- && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ && (mode == DImode || GET_MODE (y) == mode
+ || GET_MODE (y) == VOIDmode));
/* If we have a copy that looks like one of the following patterns:
(set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 4c641cab192..9d22fadc7ef 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
input:
gcc_assert (mode != VOIDmode);
gcc_assert (GET_MODE (op->value) == VOIDmode
- || GET_MODE (op->value) == mode);
+ || GET_MODE (op->value) == mode
+ || mode == DImode);
if (maybe_legitimize_operand_same_code (icode, opno, op))
return true;
--
2.31.1
next reply other threads:[~2023-03-16 5:20 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-16 5:20 Ajit Agarwal [this message]
2023-03-16 7:40 ` Richard Biener
2023-03-16 8:11 ` Ajit Agarwal
2023-03-16 8:14 ` Richard Biener
2023-03-16 8:19 ` Ajit Agarwal
2023-03-16 9:52 ` Richard Biener
2023-03-16 10:11 ` Ajit Agarwal
2023-03-16 10:30 ` Richard Biener
2023-03-16 10:43 ` Ajit Agarwal
2023-03-16 10:56 ` Richard Biener
2023-03-16 11:43 ` Ajit Agarwal
2023-03-16 14:48 ` Jeff Law
2023-03-17 11:49 ` Ajit Agarwal
2023-03-17 3:37 ` Surya Kumari Jangala
2023-03-17 21:20 ` Peter Bergner
2023-03-18 3:53 ` Peter Bergner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=86cf8475-4353-52ca-869c-75f40bd7d06f@linux.ibm.com \
--to=aagarwa1@linux.ibm.com \
--cc=bergner@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=segher@kernel.crashing.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).