* [PATCH] rs6000: Add new pass for replacement of contiguous adresses vector load lxv with lxvp
@ 2023-09-29 13:22 Ajit Agarwal
0 siblings, 0 replies; only message in thread
From: Ajit Agarwal @ 2023-09-29 13:22 UTC (permalink / raw)
To: gcc-patches; +Cc: Peter Bergner, Segher Boessenkool, Kewen.Lin
Hello All:
This patch add new pass to replace contiguous addresses vector load lxv with mma instruction
lxvp.
Bootstrapped and regtested with powepc64-linux-gnu.
Thanks & Regards
Ajit
rs6000: Add new pass for replacement of contiguous lxv with lxvp
New pass to replace contiguous addresses vector load (lxv) with mma
instruction lxvp. This pass is registered before cse rtl pass.
2023-09-29 Ajit Kumar Agarwal <aagarwa1@linux.ibm.com>
gcc/ChangeLog:
* config/rs6000/rs6000-passes.def: Registered vecload pass.
* config/rs6000/rs6000-vecload-opt.cc: Add new pass.
* config.gcc: Add new executable.
* config/rs6000/rs6000-protos.h: Add new prototype for vecload
pass.
* config/rs6000/rs6000.cc: Add new prototype for vecload pass.
* config/rs6000/t-rs6000: Add new rule.
gcc/testsuite/ChangeLog:
* g++.target/powerpc/vecload.C: New test.
---
gcc/config.gcc | 4 +-
gcc/config/rs6000/rs6000-passes.def | 1 +
gcc/config/rs6000/rs6000-protos.h | 2 +
gcc/config/rs6000/rs6000-vecload-opt.cc | 207 +++++++++++++++++++++
gcc/config/rs6000/rs6000.cc | 3 +-
gcc/config/rs6000/t-rs6000 | 4 +
gcc/testsuite/g++.target/powerpc/vecload.C | 15 ++
7 files changed, 233 insertions(+), 3 deletions(-)
create mode 100644 gcc/config/rs6000/rs6000-vecload-opt.cc
create mode 100644 gcc/testsuite/g++.target/powerpc/vecload.C
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ee46d96bf62..482ab094b89 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -515,7 +515,7 @@ or1k*-*-*)
;;
powerpc*-*-*)
cpu_type=rs6000
- extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+ extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-vecload-opt.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
@@ -552,7 +552,7 @@ riscv*)
;;
rs6000*-*-*)
extra_options="${extra_options} g.opt fused-madd.opt rs6000/rs6000-tables.opt"
- extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+ extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o rs6000-vecload-opt.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-logue.cc \$(srcdir)/config/rs6000/rs6000-call.cc"
target_gtfiles="$target_gtfiles \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index ca899d5f7af..58a74058c6a 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
The power8 does not have instructions that automaticaly do the byte swaps
for loads and stores. */
INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+ INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_vecload);
/* Pass to do the PCREL_OPT optimization that combines the load of an
external symbol's address along with a single load or store using that
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index f70118ea40f..9c44bae33d3 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -91,6 +91,7 @@ extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
extern bool direct_move_p (rtx, rtx);
extern bool quad_address_p (rtx, machine_mode, bool);
+extern bool mode_supports_dq_form (machine_mode);
extern bool quad_load_store_p (rtx, rtx);
extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx);
extern void expand_fusion_gpr_load (rtx *);
@@ -344,6 +345,7 @@ class rtl_opt_pass;
extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
+extern rtl_opt_pass *make_pass_analyze_vecload (gcc::context *);
extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
extern bool rs6000_quadword_masked_address_p (const_rtx exp);
extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx);
diff --git a/gcc/config/rs6000/rs6000-vecload-opt.cc b/gcc/config/rs6000/rs6000-vecload-opt.cc
new file mode 100644
index 00000000000..955e5d6361b
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-vecload-opt.cc
@@ -0,0 +1,207 @@
+/* Subroutines used to replace lxv with lxvp
+ for p10 little-endian VSX code.
+ Copyright (C) 1991-2023 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "df.h"
+#include "tm_p.h"
+#include "ira.h"
+#include "print-tree.h"
+#include "varasm.h"
+#include "explow.h"
+#include "expr.h"
+#include "output.h"
+#include "tree-pass.h"
+#include "regs.h"
+#include "rtx-vector-builder.h"
+#include "rs6000-protos.h"
+
+static inline bool
+quad_address_offset_p (HOST_WIDE_INT offset)
+{
+ return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
+}
+
+/* Replace identified lxv with lxvp. */
+static void
+replace_lxv_with_lxvp (rtx_insn *insn1, rtx_insn *insn2)
+{
+ rtx body = PATTERN (insn1);
+ rtx src_exp = SET_SRC (body);
+ rtx dest_exp = SET_DEST (body);
+ rtx lxv;
+ rtx opnd = gen_rtx_REG (OOmode, REGNO (dest_exp));
+ PUT_MODE (src_exp, OOmode);
+ lxv = gen_movoo (opnd, src_exp);
+ rtx_insn *new_insn = emit_insn_before (lxv, insn1);
+ set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn1));
+ df_insn_rescan (new_insn);
+
+ if (dump_file)
+ {
+ unsigned int new_uid = INSN_UID (new_insn);
+ fprintf (dump_file, "Replacing lxv %d with lxvp %d\n", 0, new_uid);
+ }
+
+ df_insn_delete (insn1);
+ remove_insn (insn1);
+ df_insn_delete (insn2);
+ remove_insn (insn2);
+ insn1->set_deleted ();
+ insn2->set_deleted ();
+}
+
+/* Identify lxv instruction that are candidate of continguous
+ addresses and replace them with mma instruction lxvp. */
+unsigned int
+rs6000_analyze_vecload (function *fun)
+{
+ basic_block bb;
+ rtx_insn *insn, *curr_insn = 0;
+ rtx_insn *insn1 = 0, *insn2 = 0;
+ bool first_vec_insn = false;
+ unsigned int offset = 0;
+ unsigned int regno = 0;
+
+ FOR_ALL_BB_FN (bb, fun)
+ FOR_BB_INSNS_SAFE (bb, insn, curr_insn)
+ {
+ if (NONDEBUG_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET)
+ {
+ rtx set = single_set (insn);
+ rtx src = SET_SRC (set);
+ machine_mode mode = GET_MODE (SET_DEST (set));
+ bool dest_fp_p, dest_vmx_p, dest_vsx_p = false;
+ rtx dest = SET_DEST (PATTERN (insn));
+ int dest_regno;
+
+ if (REG_P (dest))
+ {
+ dest_regno = REGNO (dest);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_vmx_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+ }
+
+ if (TARGET_VSX && TARGET_MMA && dest_vsx_p)
+ {
+ if (mode_supports_dq_form (mode)
+ && dest_regno >= 0 && MEM_P (src)
+ && quad_address_p (XEXP (src, 0), mode, true))
+ {
+ if (first_vec_insn)
+ {
+ rtx addr = XEXP (src, 0);
+ insn2 = insn;
+
+ if (GET_CODE (addr) != PLUS)
+ return false;
+
+ rtx op0 = XEXP (addr, 0);
+ if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, true))
+ return false;
+
+ rtx op1 = XEXP (addr, 1);
+ if (!CONST_INT_P (op1))
+ return false;
+
+ mem_attrs attrs (*get_mem_attrs (src));
+
+ if ((attrs.offset_known_p && known_ge (attrs.offset, 0))
+ && quad_address_offset_p (INTVAL (op1))
+ && (regno == REGNO (op0))
+ && ((INTVAL (op1) - offset) == 16))
+ {
+ replace_lxv_with_lxvp (insn1, insn2);
+ return true;
+ }
+ }
+ if (REG_P (XEXP (src, 0)) && GET_CODE (XEXP (src, 0)) != PLUS)
+ {
+ mem_attrs attrs (*get_mem_attrs (src));
+ if (attrs.offset_known_p)
+ offset = attrs.offset;
+ regno = REGNO (XEXP (src,0));
+ first_vec_insn = true;
+ insn1 = insn;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+const pass_data pass_data_analyze_vecload =
+{
+ RTL_PASS, /* type */
+ "vecload", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_analyze_vecload : public rtl_opt_pass
+{
+public:
+ pass_analyze_vecload(gcc::context *ctxt)
+ : rtl_opt_pass(pass_data_analyze_vecload, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return (optimize > 0 && TARGET_VSX);
+ }
+
+ virtual unsigned int execute (function *fun)
+ {
+ return rs6000_analyze_vecload (fun);
+ }
+
+ opt_pass *clone ()
+ {
+ return new pass_analyze_vecload (m_ctxt);
+ }
+
+}; // class pass_analyze_vecload
+
+rtl_opt_pass *
+make_pass_analyze_vecload (gcc::context *ctxt)
+{
+ return new pass_analyze_vecload (ctxt);
+}
+
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index cc9253bb040..dba545271e0 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -387,7 +387,7 @@ mode_supports_vmx_dform (machine_mode mode)
/* Return true if we have D-form addressing in VSX registers. This addressing
is more limited than normal d-form addressing in that the offset must be
aligned on a 16-byte boundary. */
-static inline bool
+bool
mode_supports_dq_form (machine_mode mode)
{
return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
@@ -1178,6 +1178,7 @@ static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
secondary_reload_info *,
bool);
rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
+rtl_opt_pass *make_pass_analyze_vecload (gcc::context*);
/* Hash table stuff for keeping track of TOC entries. */
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..da7ae26e88b 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -47,6 +47,10 @@ rs6000-builtin.o: $(srcdir)/config/rs6000/rs6000-builtin.cc
$(COMPILE) $<
$(POSTCOMPILE)
+rs6000-vecload-opt.o: $(srcdir)/config/rs6000/rs6000-vecload-opt.cc
+ $(COMPILE) $<
+ $(POSTCOMPILE)
+
build/rs6000-gen-builtins.o: $(srcdir)/config/rs6000/rs6000-gen-builtins.cc
build/rbtree.o: $(srcdir)/config/rs6000/rbtree.cc
diff --git a/gcc/testsuite/g++.target/powerpc/vecload.C b/gcc/testsuite/g++.target/powerpc/vecload.C
new file mode 100644
index 00000000000..83eea412c04
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/vecload.C
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -mmma" } */
+
+#include <altivec.h>
+
+void
+foo (__vector_quad *dst, vector unsigned char *ptr, vector unsigned char src)
+{
+ __vector_quad acc;
+ __builtin_mma_xvf32ger(&acc, src, ptr[0]);
+ __builtin_mma_xvf32gerpp(&acc, src, ptr[1]);
+ *dst = acc;
+}
+/* { dg-final { scan-assembler-not {\mlxvp\M} } } */
--
2.39.3
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-09-29 13:22 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-29 13:22 [PATCH] rs6000: Add new pass for replacement of contiguous adresses vector load lxv with lxvp Ajit Agarwal
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).