From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 61B7F3858C50 for ; Thu, 30 May 2024 11:14:24 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 61B7F3858C50 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=arm.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 61B7F3858C50 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1717067666; cv=none; b=EdWmawdXi9O8EVTmZAE9a49d3jOX67zmEUHEsLvTVwH3Lm05z+MstQyRdxlNueWcR7eBaLYtGZ1F77+6dt7TowGjOkQiluYSi9+wl6IFrSf0ep6ajElUYf5fOa3gRipEVI1xazm47ON0xW7ed47B8UZW4ZT838WN8UGuEUqLowc= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1717067666; c=relaxed/simple; bh=HwYuTftGm5WMSAfrIJA9SYm1a8vA6+xkm0KffjgtXLo=; h=From:To:Subject:Date:Message-ID:MIME-Version; b=g/uAfVGzv45kvY7CprYKqkR6q/3mgdPiM19uY4Fj8b4/iN51KFdC0MjKccoG3V/nKHTrCtvYuol5xQd5Yt4WV3SNzcfPnbqzwn0FkH09XO28nsxfHh58KXhHf5swYEZoCjrMT4BJNBmqKO3gvD1rtIqlsK8TB4T0cZPrvCygZr8= ARC-Authentication-Results: i=1; server2.sourceware.org Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 57185339; Thu, 30 May 2024 04:14:48 -0700 (PDT) Received: from localhost (e121540-lin.manchester.arm.com [10.32.110.72]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id F3D133F641; Thu, 30 May 2024 04:14:22 -0700 (PDT) From: Richard Sandiford To: Ajit Agarwal Mail-Followup-To: Ajit Agarwal ,Alex Coplan , "Kewen.Lin" , Segher Boessenkool , Michael Meissner , Peter Bergner , David Edelsohn , gcc-patches , richard.sandiford@arm.com Cc: Alex Coplan , "Kewen.Lin" , Segher Boessenkool , Michael Meissner , Peter Bergner , David Edelsohn , gcc-patches Subject: Re: [Patch, aarch64, middle-end\ v4: Move pair_fusion pass from aarch64 to middle-end References: <76142f4e-993e-4aae-b9c6-793dca2643a5@linux.ibm.com> Date: Thu, 30 May 2024 12:14:21 +0100 In-Reply-To: <76142f4e-993e-4aae-b9c6-793dca2643a5@linux.ibm.com> (Ajit Agarwal's message of "Fri, 24 May 2024 14:48:09 +0530") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Spam-Status: No, score=-20.3 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_NONE,KAM_DMARC_STATUS,KAM_LAZY_DOMAIN_SECURITY,KAM_SHORT,SPF_HELO_NONE,SPF_NONE,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Thanks for the update. Some comments below, but looks very close to ready. Ajit Agarwal writes: > diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc > new file mode 100644 > index 00000000000..060fdcccc95 > --- /dev/null > +++ b/gcc/pair-fusion.cc > @@ -0,0 +1,3012 @@ > +// Pass to fuse adjacent loads/stores into paired memory accesses. > +// Copyright (C) 2024 Free Software Foundation, Inc. This should probably be 2023-2024, since it's based on code contributed in 2023. > +// > +// This file is part of GCC. > +// > +// GCC is free software; you can redistribute it and/or modify it > +// under the terms of the GNU General Public License as published by > +// the Free Software Foundation; either version 3, or (at your option) > +// any later version. > +// > +// GCC is distributed in the hope that it will be useful, but > +// WITHOUT ANY WARRANTY; without even the implied warranty of > +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +// General Public License for more details. > +// > +// You should have received a copy of the GNU General Public License > +// along with GCC; see the file COPYING3. If not see > +// . > + > +#define INCLUDE_ALGORITHM > +#define INCLUDE_FUNCTIONAL > +#define INCLUDE_LIST > +#define INCLUDE_TYPE_TRAITS > +#include "config.h" > +#include "system.h" > +#include "coretypes.h" > +#include "backend.h" > +#include "rtl.h" > +#include "df.h" > +#include "rtl-iter.h" > +#include "rtl-ssa.h" > +#include "cfgcleanup.h" > +#include "tree-pass.h" > +#include "ordered-hash-map.h" > +#include "tree-dfa.h" > +#include "fold-const.h" > +#include "tree-hash-traits.h" > +#include "print-tree.h" > +#include "pair-fusion.h" > + > +using namespace rtl_ssa; > + > +// We pack these fields (load_p, fpsimd_p, and size) into an integer > +// (LFS) which we use as part of the key into the main hash tables. > +// > +// The idea is that we group candidates together only if they agree on > +// the fields below. Candidates that disagree on any of these > +// properties shouldn't be merged together. > +struct lfs_fields > +{ > + bool load_p; > + bool fpsimd_p; > + unsigned size; > +}; > + > +using insn_list_t = std::list; > + > +// Information about the accesses at a given offset from a particular > +// base. Stored in an access_group, see below. > +struct access_record > +{ > + poly_int64 offset; > + std::list cand_insns; > + std::list::iterator place; > + > + access_record (poly_int64 off) : offset (off) {} > +}; > + > +// A group of accesses where adjacent accesses could be ldp/stp > +// candidates. The splay tree supports efficient insertion, > +// while the list supports efficient iteration. > +struct access_group > +{ > + splay_tree tree; > + std::list list; > + > + template > + inline void track (Alloc node_alloc, poly_int64 offset, insn_info *insn); > +}; > + > +// Test if this base candidate is viable according to HAZARDS. > +bool base_cand::viable () const Formating nit, should be: bool base_cand::viable () const > +{ > + return !hazards[0] || !hazards[1] || (*hazards[0] > *hazards[1]); > +} > [...] > +void > +pair_fusion_bb_info::transform () > +{ > + traverse_base_map (expr_map); > + traverse_base_map (def_map); > +} > + > +// the base register which we can fold in to make this pair use > +// a writeback addressing mode. The first line of this comment is missing. It should be: // Given an existing pair insn INSN, look for a trailing update of > [...] > diff --git a/gcc/pair-fusion.h b/gcc/pair-fusion.h > new file mode 100644 > index 00000000000..f295fdbdb8f > --- /dev/null > +++ b/gcc/pair-fusion.h > @@ -0,0 +1,195 @@ > +// Pass to fuse adjacent loads/stores into paired memory accesses. > +// > +// This file contains the definition of the virtual base class which is > +// overriden by targets that make use of the pass. > +// > +// Copyright (C) 2024 Free Software Foundation, Inc. 2023-2024 here too > +// > +// This file is part of GCC. > +// > +// GCC is free software; you can redistribute it and/or modify it > +// under the terms of the GNU General Public License as published by > +// the Free Software Foundation; either version 3, or (at your option) > +// any later version. > +// > +// GCC is distributed in the hope that it will be useful, but > +// WITHOUT ANY WARRANTY; without even the implied warranty of > +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +// General Public License for more details. > +// > +// You should have received a copy of the GNU General Public License > +// along with GCC; see the file COPYING3. If not see > +// . > + > +namespace rtl_ssa { > + class def_info; > + class insn_info; > + class insn_range_info; > + class bb_info; > +} > + > +// Information about a potential base candidate, used in try_fuse_pair. > +// There may be zero, one, or two viable RTL bases for a given pair. > +struct base_cand > +{ > + // DEF is the def of the base register to be used by the pair. > + rtl_ssa::def_info *def; > + > + // FROM_INSN is -1 if the base candidate is already shared by both > + // candidate insns. Otherwise it holds the index of the insn from > + // which the base originated. > + // > + // In the case that the base is shared, either DEF is already used > + // by both candidate accesses, or both accesses see different versions > + // of the same regno, in which case DEF is the def consumed by the > + // first candidate access. > + int from_insn; > + > + // To form a pair, we do so by moving the first access down and the second > + // access up. To determine where to form the pair, and whether or not > + // it is safe to form the pair, we track instructions which cannot be > + // re-ordered past due to either dataflow or alias hazards. > + // > + // Since we allow changing the base used by an access, the choice of > + // base can change which instructions act as re-ordering hazards for > + // this pair (due to different dataflow). We store the initial > + // dataflow hazards for this choice of base candidate in HAZARDS. > + // > + // These hazards act as re-ordering barriers to each candidate insn > + // respectively, in program order. > + // > + // Later on, when we take alias analysis into account, we narrow > + // HAZARDS accordingly. > + rtl_ssa::insn_info *hazards[2]; > + > + base_cand (rtl_ssa::def_info *def, int insn) > + : def (def), from_insn (insn), hazards {nullptr, nullptr} {} > + > + base_cand (rtl_ssa::def_info *def) : base_cand (def, -1) {} > + > + // Test if this base candidate is viable according to HAZARDS. > + inline bool viable () const; This shouldn't be inline, now that it's defined in the .cc file. Same for all other uses of "inline" in the file. Richard > +}; > + > +struct alias_walker; > + > +// When querying should_handle_writeback, this enum is used to > +// qualify which opportunities we are asking about. > +enum class writeback { > + // Only those writeback opportunities that arise from existing > + // auto-increment accesses. > + EXISTING, > + > + // All writeback opportunities, including those that involve folding > + // base register updates into a non-writeback pair. > + ALL > +}; > + > +// This class can be overriden by targets to give a pass that fuses > +// adjacent loads and stores into load/store pair instructions. > +// > +// The target can override the various virtual functions to customize > +// the behaviour of the pass as appropriate for the target. > +struct pair_fusion { > + pair_fusion (); > + > + // Given: > + // - an rtx REG_OP, the non-memory operand in a load/store insn, > + // - a machine_mode MEM_MODE, the mode of the MEM in that insn, and > + // - a boolean LOAD_P (true iff the insn is a load), then: > + // return true if the access should be considered an FP/SIMD access. > + // Such accesses are segregated from GPR accesses, since we only want > + // to form pairs for accesses that use the same register file. > + virtual bool fpsimd_op_p (rtx, machine_mode, bool) > + { > + return false; > + } > + > + // Return true if we should consider forming pairs from memory > + // accesses with operand mode MODE at this stage in compilation. > + virtual bool pair_operand_mode_ok_p (machine_mode mode) = 0; > + > + // Return true iff REG_OP is a suitable register operand for a paired > + // memory access, where LOAD_P is true if we're asking about loads and > + // false for stores. MODE gives the mode of the operand. > + virtual bool pair_reg_operand_ok_p (bool load_p, rtx reg_op, > + machine_mode mode) = 0; > + > + // Return alias check limit. > + // This is needed to avoid unbounded quadratic behaviour when > + // performing alias analysis. > + virtual int pair_mem_alias_check_limit () = 0; > + > + // Return true if we should try to handle writeback opportunities. > + // WHICH determines the kinds of writeback opportunities the caller > + // is asking about. > + virtual bool should_handle_writeback (enum writeback which) = 0; > + > + // Given BASE_MEM, the mem from the lower candidate access for a pair, > + // and LOAD_P (true if the access is a load), check if we should proceed > + // to form the pair given the target's code generation policy on > + // paired accesses. > + virtual bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) = 0; > + > + // Generate the pattern for a paired access. PATS gives the patterns > + // for the individual memory accesses (which by this point must share a > + // common base register). If WRITEBACK is non-NULL, then this rtx > + // describes the update to the base register that should be performed by > + // the resulting insn. LOAD_P is true iff the accesses are loads. > + virtual rtx gen_pair (rtx *pats, rtx writeback, bool load_p) = 0; > + > + // Return true if INSN is a paired memory access. If so, set LOAD_P to > + // true iff INSN is a load pair. > + virtual bool pair_mem_insn_p (rtx_insn *insn, bool &load_p) = 0; > + > + // Return true if we should track loads. > + virtual bool track_loads_p () > + { > + return true; > + } > + > + // Return true if we should track stores. > + virtual bool track_stores_p () > + { > + return true; > + } > + > + // Return true if OFFSET is in range for a paired memory access. > + virtual bool pair_mem_in_range_p (HOST_WIDE_INT offset) = 0; > + > + // Given a load/store pair insn in PATTERN, unpack the insn, storing > + // the register operands in REGS, and returning the mem. LOAD_P is > + // true for loads and false for stores. > + virtual rtx destructure_pair (rtx regs[2], rtx pattern, bool load_p) = 0; > + > + // Given a pair mem in MEM, register operands in REGS, and an rtx > + // representing the effect of writeback on the base register in WB_EFFECT, > + // return an insn representing a writeback variant of this pair. > + // LOAD_P is true iff the pair is a load. > + // This is used when promoting existing non-writeback pairs to writeback > + // variants. > + virtual rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem, > + rtx regs[2], bool load_p) = 0; > + > + inline void process_block (rtl_ssa::bb_info *bb); > + inline rtl_ssa::insn_info *find_trailing_add (rtl_ssa::insn_info *insns[2], > + const rtl_ssa::insn_range_info > + &pair_range, > + int initial_writeback, > + rtx *writeback_effect, > + rtl_ssa::def_info **add_def, > + rtl_ssa::def_info *base_def, > + poly_int64 initial_offset, > + unsigned access_size); > + inline int get_viable_bases (rtl_ssa::insn_info *insns[2], > + vec &base_cands, > + rtx cand_mems[2], > + unsigned access_size, > + bool reversed); > + inline void do_alias_analysis (rtl_ssa::insn_info *alias_hazards[4], > + alias_walker *walkers[4], > + bool load_p); > + inline void try_promote_writeback (rtl_ssa::insn_info *insn, bool load_p); > + void run (); > + ~pair_fusion (); > +};