public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] rs6000: suboptimal code for returning bool value on target ppc
@ 2023-03-16  5:20 Ajit Agarwal
  2023-03-16  7:40 ` Richard Biener
  2023-03-17  3:37 ` Surya Kumari Jangala
  0 siblings, 2 replies; 16+ messages in thread
From: Ajit Agarwal @ 2023-03-16  5:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, bergner

Hello All:


This patch eliminates unnecessary zero extension instruction from power generated assembly.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

	rs6000: suboptimal code for returning bool value on target ppc.

	New pass to eliminate unnecessary zero extension. This pass
	is registered after cse rtl pass.

	2023-03-16  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

	* config/rs6000/rs6000-passes.def: Registered zero elimination
	pass.
	* config/rs6000/rs6000-zext-elim.cc: Add new pass.
	* config.gcc: Add new executable.
	* config/rs6000/rs6000-protos.h: Add new prototype for zero
	elimination pass.
	* config/rs6000/rs6000.cc: Add new prototype for zero
	elimination pass.
	* config/rs6000/t-rs6000: Add new rule.
	* expr.cc: Modified gcc assert.
	* explow.cc: Modified gcc assert.
	* optabs.cc: Modified gcc assert.
---
 gcc/config.gcc                        |   4 +-
 gcc/config/rs6000/rs6000-passes.def   |   2 +
 gcc/config/rs6000/rs6000-protos.h     |   1 +
 gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.cc           |   2 +
 gcc/config/rs6000/t-rs6000            |   5 +
 gcc/explow.cc                         |   3 +-
 gcc/expr.cc                           |   4 +-
 gcc/optabs.cc                         |   3 +-
 9 files changed, 379 insertions(+), 6 deletions(-)
 create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc

diff --git a/gcc/config.gcc b/gcc/config.gcc
index da3a6d3ba1f..e8ac9d882f0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -503,7 +503,7 @@ or1k*-*-*)
 	;;
 powerpc*-*-*)
 	cpu_type=rs6000
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
 	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 	extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
 	extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
@@ -538,7 +538,7 @@ riscv*)
 	;;
 rs6000*-*-*)
 	extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
-	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+	extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o rs6000-logue.o"
 	extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index ca899d5f7af..d7500feddf1 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
      The power8 does not have instructions that automaticaly do the byte swaps
      for loads and stores.  */
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
+
 
   /* Pass to do the PCREL_OPT optimization that combines the load of an
      external symbol's address along with a single load or store using that
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..f6cf2d673d4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -340,6 +340,7 @@ namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
 extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc b/gcc/config/rs6000/rs6000-zext-elim.cc
new file mode 100644
index 00000000000..777c7a5a387
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-zext-elim.cc
@@ -0,0 +1,361 @@
+/* Subroutine to eliminate redundant zero extend for power architecture.
+   Copyright (C) 1991-2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This pass remove unnecessary zero extension instruction from
+  power generated assembly. This pass is register after cse
+  pass.
+  Identifies the following sequence of instruction after cse
+  rtl pass.
+
+  set compare (subreg)
+  set if_then_else
+  set SImode -> QImode
+  set zero_extend to DImode from QImode
+  set return value 0 in one path of cfg.
+  set return value 1 in other path of cfg.
+
+  In cfgexpand pass QImode is generated with
+  bool register value and this pass uses QI
+  as 64 bit registers.
+
+  This pass replace copy operation from QImode to DImode
+  and return appropriate return values.*/
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+
+/* This is based on the union-find logic in web.cc.  web_entry_base is
+   defined in df.h.  */
+class zext_web_entry : public web_entry_base
+{
+ public:
+  /* Pointer to the insn.  */
+  rtx_insn *insn;
+  unsigned int is_relevant : 1;
+  /* Set if insn is a load.  */
+  unsigned int is_load : 1;
+  /* Set if insn is a store.  */
+  unsigned int is_store : 1;
+  unsigned int is_zext :1 ;
+  unsigned int is_move :1;
+  unsigned int is_delete_move :1;
+  /* Set if this insn should be deleted.  */
+  unsigned int will_delete : 1;
+  unsigned int will_delete_chances : 1;
+};
+
+/* Checks if instruction is zero extension
+ * with QIMode to DImode.*/
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == SET
+      && GET_MODE(SET_DEST (body)) == DImode
+      && GET_CODE(SET_SRC (body)) == ZERO_EXTEND)
+  {
+    rtx set = XEXP (SET_SRC (body), 0);
+
+    if (REG_P (set))
+    {
+      if (GET_MODE (set) == QImode) return 1;
+    }
+    else
+      return 0;
+  }
+  return 0;
+}
+
+/* Checks if instruction is SET operation with QImode.*/
+static unsigned int
+insn_is_store_p (rtx insn)
+{
+  rtx body = PATTERN (insn);
+  if (GET_CODE (body) == SET
+      && SUBREG_P(SET_SRC (body))
+      && !CONST_INT_P(SET_SRC (body))
+      && GET_MODE(XEXP (SET_SRC (body), 0)) == SImode
+      && GET_MODE(SET_SRC (body)) == QImode)
+    return 1;
+
+  return 0;
+}
+
+/* Find out zero extension removal candidate with use-def web.*/
+static void
+find_zero_ext_elimination_candidate (zext_web_entry *insn_entry,
+				     rtx insn, df_ref def)
+{
+  struct df_link *link = DF_REF_CHAIN (def);
+
+  rtx move_insn = NULL_RTX;
+  rtx compare_insn = NULL_RTX;
+
+  while (link)
+  {
+    if (!DF_REF_INSN_INFO (link->ref))
+      insn_entry[INSN_UID(insn)].will_delete_chances = 0;
+
+    if (DF_REF_INSN_INFO (link->ref))
+      {
+	rtx use_insn = DF_REF_INSN (link->ref);
+
+	if (GET_CODE (PATTERN (use_insn)) == SET
+	    && (GET_CODE (SET_SRC (PATTERN (use_insn))) == IF_THEN_ELSE))
+	  {
+	    if (GET_CODE (PATTERN (insn)) == SET
+		&& GET_CODE (SET_SRC (PATTERN (insn))) == COMPARE)
+	      {
+		rtx body = XEXP (SET_SRC (PATTERN (insn)), 0);
+
+		if (SUBREG_P (body))
+		  {
+		    compare_insn = use_insn;
+		    rtx compare_body = XEXP (SET_SRC (PATTERN (compare_insn)), 0);
+
+		    if (compare_insn
+			&& ((REGNO (XEXP (compare_body, 0)))
+				== REGNO (SET_DEST (PATTERN (insn)))))
+		      insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+		  }
+	       }
+	    }
+
+	if (insn_is_store_p(use_insn)
+	    && GET_CODE (PATTERN (insn)) == SET
+	    && (GET_CODE (SET_SRC (PATTERN(insn))) == IF_THEN_ELSE))
+	  {
+	    if (GET_MODE (SET_DEST (PATTERN (insn))) == SImode)
+	      {
+		if (insn_entry[INSN_UID(insn)].will_delete_chances)
+		  insn_entry[INSN_UID(use_insn)].will_delete_chances = 1;
+	      }
+	  }
+
+	if (insn_is_zext_p (insn))
+	  {
+	    if (GET_CODE (PATTERN (use_insn)) == SET
+		&& REG_P (SET_SRC (PATTERN (use_insn))))
+	      {
+		if (move_insn
+		    && REGNO (SET_SRC (PATTERN (use_insn)))
+		       == REGNO (SET_SRC (PATTERN (move_insn)))
+		    && insn_entry[INSN_UID(insn)].is_delete_move)
+		  {
+		    insn_entry[INSN_UID (insn)].is_move = 1;
+		    break;
+		  }
+		  else if (insn_entry[INSN_UID (insn)].will_delete)
+		    {
+		      move_insn = use_insn;
+		      insn_entry[INSN_UID(insn)].is_delete_move= 1;
+		    }
+	      }
+	  }
+
+	if (insn_is_zext_p (use_insn))
+	  {
+	    insn_entry[INSN_UID (use_insn)].is_zext = 1;
+	    insn_entry[INSN_UID(use_insn)].is_relevant = 1;
+
+	    if (insn_is_store_p (insn)
+		&& insn_entry[INSN_UID (insn)].will_delete_chances)
+	    {
+	      insn_entry[INSN_UID (use_insn)].will_delete = 1;
+	      insn_entry[INSN_UID (insn)].will_delete = 1;
+	      insn_entry[INSN_UID( insn)].is_store = 1;
+	    }
+
+	   if (NONDEBUG_INSN_P (use_insn))
+	     unionfind_union (insn_entry + INSN_UID (insn),
+			      insn_entry + INSN_UID (use_insn));
+	}
+      }
+
+    link = link->next;
+  }
+}
+
+/* Replace QImode extensions with copy operations.*/
+static void
+replace_marked_insns (zext_web_entry *insn_entry, unsigned i)
+{
+  rtx_insn *insn = insn_entry[i].insn;
+  rtx body = PATTERN (insn);
+  rtx src_reg;
+  src_reg = XEXP (SET_SRC (body), 0);
+  set_mode_and_regno (src_reg, DImode, REGNO(src_reg));
+
+  if (GET_MODE(SET_DEST(body)) != DImode)
+    set_mode_and_regno (SET_DEST(body), DImode, REGNO (SET_DEST (body)));
+
+  rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
+  rtx_insn *new_insn = emit_insn_before (copy, insn);
+  set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
+  df_insn_rescan (new_insn);
+
+  df_insn_delete (insn);
+  remove_insn (insn);
+  insn->set_deleted ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+rs6000_analyze_zext (function *fun)
+{
+  zext_web_entry *insn_entry;
+  basic_block bb;
+  rtx_insn *insn, *curr_insn = 0;
+
+  /* Dataflow analysis for use-def chains.  */
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  /* Rebuild ud- and du-chains.  */
+  df_remove_problem (df_chain);
+  df_process_deferred_rescans ();
+  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+  df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  /* Allocate structure to represent webs of insns.  */
+  insn_entry = XCNEWVEC (zext_web_entry, get_max_uid ());
+
+  /* Walk the insns to gather basic data.  */
+  FOR_ALL_BB_FN (bb, fun)
+    FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+    {
+      unsigned int uid = INSN_UID (insn);
+      if (NONDEBUG_INSN_P (insn))
+	{
+	  insn_entry[uid].insn = insn;
+
+	  if (GET_CODE (insn) == insn_is_store_p (insn))
+	    {
+	      insn_entry[uid].is_store = 1;
+	      insn_entry[uid].is_relevant = 1;
+	    }
+
+	  /* Walk the uses and defs to identify the optimization
+	     candidates.*/
+	  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+	  df_ref mention;
+
+	  FOR_EACH_INSN_INFO_DEF (mention, insn_info)
+	    {
+	      insn_entry[uid].is_relevant = 1;
+	      insn_entry[uid].is_store = insn_is_store_p (insn);
+	      find_zero_ext_elimination_candidate (insn_entry, insn, mention);
+	    }
+
+	  if (insn_entry[uid].is_relevant)
+	    {
+	      /* Determine if this is a store.  */
+	      insn_entry[uid].is_store = insn_is_store_p (insn);
+	    }
+	}
+     }
+
+   unsigned e = get_max_uid (), i;
+
+   int store_index = -1;
+
+   /* Replace with copy operation.*/
+   for (i = 0; i < e; ++i)
+     {
+       if (insn_entry[i].is_store && insn_entry[i].will_delete)
+	 store_index  = i;
+
+	if ((store_index != -1)
+	     && insn_entry[i].is_move && insn_entry[i].will_delete)
+	  {
+	    replace_marked_insns (insn_entry, store_index);
+	    replace_marked_insns (insn_entry, i);
+	  }
+     }
+    /* Clean up.  */
+    free (insn_entry);
+
+    return 0;
+}
+
+const pass_data pass_data_analyze_zext =
+{
+  RTL_PASS, /* type */
+  "zext", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_zext : public rtl_opt_pass
+{
+public:
+  pass_analyze_zext(gcc::context *ctxt)
+    : rtl_opt_pass(pass_data_analyze_zext, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return (optimize > 0 );
+    }
+
+  virtual unsigned int execute (function *fun)
+    {
+      return rs6000_analyze_zext (fun);
+    }
+
+  opt_pass *clone ()
+    {
+      return new pass_analyze_zext (m_ctxt);
+    }
+
+}; // class pass_analyze_zext
+
+rtl_opt_pass *
+make_pass_analyze_zext (gcc::context *ctxt)
+{
+  return new pass_analyze_zext (ctxt);
+}
+
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 8e0b0d022db..6541334bf2d 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1178,6 +1178,8 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
 					  bool);
 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
 
+rtl_opt_pass *make_pass_analyze_zext (gcc::context*);
+
 /* Hash table stuff for keeping track of TOC entries.  */
 
 struct GTY((for_user)) toc_hash_struct
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..c1f61591d2f 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -35,6 +35,11 @@ rs6000-p8swap.o: $(srcdir)/config/rs6000/rs6000-p8swap.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-zext-elim.o: $(srcdir)/config/rs6000/rs6000-zext-elim.cc
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+
 rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
diff --git a/gcc/explow.cc b/gcc/explow.cc
index 32e9498ee07..316aa975e40 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -654,7 +654,8 @@ copy_to_mode_reg (machine_mode mode, rtx x)
   if (! general_operand (x, VOIDmode))
     x = force_operand (x, temp);
 
-  gcc_assert (GET_MODE (x) == mode || GET_MODE (x) == VOIDmode);
+  gcc_assert (mode == DImode || GET_MODE (x) == mode
+	       || GET_MODE (x) == VOIDmode);
   if (x != temp)
     emit_move_insn (temp, x);
   return temp;
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 15be1c8db99..6162ef92b88 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -4223,9 +4223,9 @@ emit_move_insn (rtx x, rtx y)
   rtx y_cst = NULL_RTX;
   rtx_insn *last_insn;
   rtx set;
-
   gcc_assert (mode != BLKmode
-	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+	      && (mode == DImode || GET_MODE (y) == mode
+	      || GET_MODE (y) == VOIDmode));
 
   /* If we have a copy that looks like one of the following patterns:
        (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 4c641cab192..9d22fadc7ef 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -7902,7 +7902,8 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno,
     input:
       gcc_assert (mode != VOIDmode);
       gcc_assert (GET_MODE (op->value) == VOIDmode
-		  || GET_MODE (op->value) == mode);
+		  || GET_MODE (op->value) == mode
+		  || mode == DImode);
       if (maybe_legitimize_operand_same_code (icode, opno, op))
 	return true;
 
-- 
2.31.1


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-03-18  3:53 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-16  5:20 [PATCH] rs6000: suboptimal code for returning bool value on target ppc Ajit Agarwal
2023-03-16  7:40 ` Richard Biener
2023-03-16  8:11   ` Ajit Agarwal
2023-03-16  8:14     ` Richard Biener
2023-03-16  8:19       ` Ajit Agarwal
2023-03-16  9:52         ` Richard Biener
2023-03-16 10:11           ` Ajit Agarwal
2023-03-16 10:30             ` Richard Biener
2023-03-16 10:43               ` Ajit Agarwal
2023-03-16 10:56                 ` Richard Biener
2023-03-16 11:43                   ` Ajit Agarwal
2023-03-16 14:48             ` Jeff Law
2023-03-17 11:49               ` Ajit Agarwal
2023-03-17  3:37 ` Surya Kumari Jangala
2023-03-17 21:20   ` Peter Bergner
2023-03-18  3:53     ` Peter Bergner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).