From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1464) id 0E76C3858C39; Wed, 15 Sep 2021 17:18:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0E76C3858C39 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Peter Bergner To: gcc-cvs@gcc.gnu.org Subject: [gcc r11-8994] rs6000: Move rs6000_split_multireg_move to later in file X-Act-Checkin: gcc X-Git-Author: Peter Bergner X-Git-Refname: refs/heads/releases/gcc-11 X-Git-Oldrev: e9a6a100046e847272bd65178567f315bdd14f99 X-Git-Newrev: 819e7784a0695b6fc9c2563540e593b3a9360de9 Message-Id: <20210915171850.0E76C3858C39@sourceware.org> Date: Wed, 15 Sep 2021 17:18:50 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Sep 2021 17:18:50 -0000 https://gcc.gnu.org/g:819e7784a0695b6fc9c2563540e593b3a9360de9 commit r11-8994-g819e7784a0695b6fc9c2563540e593b3a9360de9 Author: Peter Bergner Date: Wed Jul 14 18:23:31 2021 -0500 rs6000: Move rs6000_split_multireg_move to later in file An upcoming change to rs6000_split_multireg_move requires it to be moved later in the file to fix a declaration issue. 2021-07-14 Peter Bergner gcc/ * config/rs6000/rs6000.c (rs6000_split_multireg_move): Move to later in the file. (cherry picked from commit 7d914777fc6c6151f430d798fc97bae927a430f7) Diff: --- gcc/config/rs6000/rs6000.c | 1845 ++++++++++++++++++++++---------------------- 1 file changed, 922 insertions(+), 923 deletions(-) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 528a6f552bf..8ad2ec4ec61 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16716,533 +16716,157 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, emit_move_insn (orig_after, after); } -/* Emit instructions to move SRC to DST. Called by splitters for - multi-register moves. It will emit at most one instruction for - each register that is accessed; that is, it won't emit li/lis pairs - (or equivalent for 64-bit code). One of SRC or DST must be a hard - register. */ +static GTY(()) alias_set_type TOC_alias_set = -1; -void -rs6000_split_multireg_move (rtx dst, rtx src) +alias_set_type +get_TOC_alias_set (void) { - /* The register number of the first register being moved. */ - int reg; - /* The mode that is to be moved. */ - machine_mode mode; - /* The mode that the move is being done in, and its size. */ - machine_mode reg_mode; - int reg_mode_size; - /* The number of registers that will be moved. */ - int nregs; + if (TOC_alias_set == -1) + TOC_alias_set = new_alias_set (); + return TOC_alias_set; +} - reg = REG_P (dst) ? REGNO (dst) : REGNO (src); - mode = GET_MODE (dst); - nregs = hard_regno_nregs (reg, mode); +/* The mode the ABI uses for a word. This is not the same as word_mode + for -m32 -mpowerpc64. This is used to implement various target hooks. */ - /* If we have a vector quad register for MMA, and this is a load or store, - see if we can use vector paired load/stores. */ - if (mode == XOmode && TARGET_MMA - && (MEM_P (dst) || MEM_P (src))) - { - reg_mode = OOmode; - nregs /= 2; - } - /* If we have a vector pair/quad mode, split it into two/four separate - vectors. */ - else if (mode == OOmode || mode == XOmode) - reg_mode = V1TImode; - else if (FP_REGNO_P (reg)) - reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : - (TARGET_HARD_FLOAT ? DFmode : SFmode); - else if (ALTIVEC_REGNO_P (reg)) - reg_mode = V16QImode; +static scalar_int_mode +rs6000_abi_word_mode (void) +{ + return TARGET_32BIT ? SImode : DImode; +} + +/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +static char * +rs6000_offload_options (void) +{ + if (TARGET_64BIT) + return xstrdup ("-foffload-abi=lp64"); else - reg_mode = word_mode; - reg_mode_size = GET_MODE_SIZE (reg_mode); + return xstrdup ("-foffload-abi=ilp32"); +} - gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); + +/* A quick summary of the various types of 'constant-pool tables' + under PowerPC: - /* TDmode residing in FP registers is special, since the ISA requires that - the lower-numbered word of a register pair is always the most significant - word, even in little-endian mode. This does not match the usual subreg - semantics, so we cannnot use simplify_gen_subreg in those cases. Access - the appropriate constituent registers "by hand" in little-endian mode. + Target Flags Name One table per + AIX (none) AIX TOC object file + AIX -mfull-toc AIX TOC object file + AIX -mminimal-toc AIX minimal TOC translation unit + SVR4/EABI (none) SVR4 SDATA object file + SVR4/EABI -fpic SVR4 pic object file + SVR4/EABI -fPIC SVR4 PIC translation unit + SVR4/EABI -mrelocatable EABI TOC function + SVR4/EABI -maix AIX TOC object file + SVR4/EABI -maix -mminimal-toc + AIX minimal TOC translation unit - Note we do not need to check for destructive overlap here since TDmode - can only reside in even/odd register pairs. */ - if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) - { - rtx p_src, p_dst; - int i; + Name Reg. Set by entries contains: + made by addrs? fp? sum? - for (i = 0; i < nregs; i++) - { - if (REG_P (src) && FP_REGNO_P (REGNO (src))) - p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); - else - p_src = simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size); + AIX TOC 2 crt0 as Y option option + AIX minimal TOC 30 prolog gcc Y Y option + SVR4 SDATA 13 crt0 gcc N Y N + SVR4 pic 30 prolog ld Y not yet N + SVR4 PIC 30 prolog gcc Y option option + EABI TOC 30 prolog gcc Y option option - if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) - p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); - else - p_dst = simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size); +*/ - emit_insn (gen_rtx_SET (p_dst, p_src)); - } +/* Hash functions for the hash table. */ - return; - } +static unsigned +rs6000_hash_constant (rtx k) +{ + enum rtx_code code = GET_CODE (k); + machine_mode mode = GET_MODE (k); + unsigned result = (code << 3) ^ mode; + const char *format; + int flen, fidx; - /* The __vector_pair and __vector_quad modes are multi-register - modes, so if we have to load or store the registers, we have to be - careful to properly swap them if we're in little endian mode - below. This means the last register gets the first memory - location. We also need to be careful of using the right register - numbers if we are splitting XO to OO. */ - if (mode == OOmode || mode == XOmode) + format = GET_RTX_FORMAT (code); + flen = strlen (format); + fidx = 0; + + switch (code) { - nregs = hard_regno_nregs (reg, mode); - int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); - if (MEM_P (dst)) - { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); + case LABEL_REF: + return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); + case CONST_WIDE_INT: + { + int i; + flen = CONST_WIDE_INT_NUNITS (k); + for (i = 0; i < flen; i++) + result = result * 613 + CONST_WIDE_INT_ELT (k, i); + return result; + } - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = adjust_address (dst, reg_mode, offset); - rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } + case CONST_DOUBLE: + return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; - return; - } + case CODE_LABEL: + fidx = 3; + break; - if (MEM_P (src)) + default: + break; + } + + for (; fidx < flen; fidx++) + switch (format[fidx]) + { + case 's': { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); + unsigned i, len; + const char *str = XSTR (k, fidx); + len = strlen (str); + result = result * 613 + len; + for (i = 0; i < len; i++) + result = result * 613 + (unsigned) str[i]; + break; + } + case 'u': + case 'e': + result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); + break; + case 'i': + case 'n': + result = result * 613 + (unsigned) XINT (k, fidx); + break; + case 'w': + if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) + result = result * 613 + (unsigned) XWINT (k, fidx); + else + { + size_t i; + for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) + result = result * 613 + (unsigned) (XWINT (k, fidx) + >> CHAR_BIT * i); + } + break; + case '0': + break; + default: + gcc_unreachable (); + } - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); - rtx src2 = adjust_address (src, reg_mode, offset); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } + return result; +} - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); +hashval_t +toc_hasher::hash (toc_hash_struct *thc) +{ + return rs6000_hash_constant (thc->key) ^ thc->key_mode; +} - return; - } +/* Compare H1 and H2 for equivalence. */ - if (GET_CODE (src) == UNSPEC) - { - gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); - gcc_assert (REG_P (dst)); - if (GET_MODE (src) == XOmode) - gcc_assert (FP_REGNO_P (REGNO (dst))); - if (GET_MODE (src) == OOmode) - gcc_assert (VSX_REGNO_P (REGNO (dst))); - - reg_mode = GET_MODE (XVECEXP (src, 0, 0)); - int nvecs = XVECLEN (src, 0); - for (int i = 0; i < nvecs; i++) - { - int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; - rtx dst_i = gen_rtx_REG (reg_mode, reg + index); - emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); - } - - /* We are writing an accumulator register, so we have to - prime it after we've written it. */ - if (GET_MODE (src) == XOmode) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - return; - } - - /* Register -> register moves can use common code. */ - } - - if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) - { - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - /* Move register range backwards, if we might have destructive - overlap. */ - int i; - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - for (i = nregs - 1; i >= 0; i--) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - } - else - { - for (i = nregs - 1; i >= 0; i--) - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - } - else - { - int i; - int j = -1; - bool used_update = false; - rtx restore_basereg = NULL_RTX; - - if (MEM_P (src) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (src, 0)) == PRE_INC - || GET_CODE (XEXP (src, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (src, 0), 0); - delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - src = replace_equiv_address (src, breg); - } - else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) - { - if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (src, 0), 0); - if (TARGET_UPDATE) - { - rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); - emit_insn (gen_rtx_SET (ndst, - gen_rtx_MEM (reg_mode, - XEXP (src, 0)))); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (src, 0), 1))); - src = replace_equiv_address (src, basereg); - } - else - { - rtx basereg = gen_rtx_REG (Pmode, reg); - emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); - src = replace_equiv_address (src, basereg); - } - } - - breg = XEXP (src, 0); - if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) - breg = XEXP (breg, 0); - - /* If the base register we are using to address memory is - also a destination reg, then change that register last. */ - if (REG_P (breg) - && REGNO (breg) >= REGNO (dst) - && REGNO (breg) < REGNO (dst) + nregs) - j = REGNO (breg) - REGNO (dst); - } - else if (MEM_P (dst) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (dst, 0)) == PRE_INC - || GET_CODE (XEXP (dst, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (dst, 0), 0); - delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); - - /* We have to update the breg before doing the store. - Use store with update, if available. */ - - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (TARGET_32BIT - ? (TARGET_POWERPC64 - ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) - : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) - : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); - used_update = true; - } - else - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - dst = replace_equiv_address (dst, breg); - } - else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) - && GET_CODE (XEXP (dst, 0)) != LO_SUM) - { - if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, - XEXP (dst, 0)), - nsrc)); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (dst, 0), 1))); - dst = replace_equiv_address (dst, basereg); - } - else - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - rtx offsetreg = XEXP (XEXP (dst, 0), 1); - gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS - && REG_P (basereg) - && REG_P (offsetreg) - && REGNO (basereg) != REGNO (offsetreg)); - if (REGNO (basereg) == 0) - { - rtx tmp = offsetreg; - offsetreg = basereg; - basereg = tmp; - } - emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); - restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); - dst = replace_equiv_address (dst, basereg); - } - } - else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) - gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); - } - - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA && REG_P (src) - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - for (i = 0; i < nregs; i++) - { - /* Calculate index to next subword. */ - ++j; - if (j == nregs) - j = 0; - - /* If compiler already emitted move of first word by - store with update, no need to do anything. */ - if (j == 0 && used_update) - continue; - - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - else - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - j * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - j * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA && REG_P (dst) - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - if (restore_basereg != NULL_RTX) - emit_insn (restore_basereg); - } -} - -static GTY(()) alias_set_type TOC_alias_set = -1; - -alias_set_type -get_TOC_alias_set (void) -{ - if (TOC_alias_set == -1) - TOC_alias_set = new_alias_set (); - return TOC_alias_set; -} - -/* The mode the ABI uses for a word. This is not the same as word_mode - for -m32 -mpowerpc64. This is used to implement various target hooks. */ - -static scalar_int_mode -rs6000_abi_word_mode (void) -{ - return TARGET_32BIT ? SImode : DImode; -} - -/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ -static char * -rs6000_offload_options (void) -{ - if (TARGET_64BIT) - return xstrdup ("-foffload-abi=lp64"); - else - return xstrdup ("-foffload-abi=ilp32"); -} - - -/* A quick summary of the various types of 'constant-pool tables' - under PowerPC: - - Target Flags Name One table per - AIX (none) AIX TOC object file - AIX -mfull-toc AIX TOC object file - AIX -mminimal-toc AIX minimal TOC translation unit - SVR4/EABI (none) SVR4 SDATA object file - SVR4/EABI -fpic SVR4 pic object file - SVR4/EABI -fPIC SVR4 PIC translation unit - SVR4/EABI -mrelocatable EABI TOC function - SVR4/EABI -maix AIX TOC object file - SVR4/EABI -maix -mminimal-toc - AIX minimal TOC translation unit - - Name Reg. Set by entries contains: - made by addrs? fp? sum? - - AIX TOC 2 crt0 as Y option option - AIX minimal TOC 30 prolog gcc Y Y option - SVR4 SDATA 13 crt0 gcc N Y N - SVR4 pic 30 prolog ld Y not yet N - SVR4 PIC 30 prolog gcc Y option option - EABI TOC 30 prolog gcc Y option option - -*/ - -/* Hash functions for the hash table. */ - -static unsigned -rs6000_hash_constant (rtx k) -{ - enum rtx_code code = GET_CODE (k); - machine_mode mode = GET_MODE (k); - unsigned result = (code << 3) ^ mode; - const char *format; - int flen, fidx; - - format = GET_RTX_FORMAT (code); - flen = strlen (format); - fidx = 0; - - switch (code) - { - case LABEL_REF: - return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); - - case CONST_WIDE_INT: - { - int i; - flen = CONST_WIDE_INT_NUNITS (k); - for (i = 0; i < flen; i++) - result = result * 613 + CONST_WIDE_INT_ELT (k, i); - return result; - } - - case CONST_DOUBLE: - return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; - - case CODE_LABEL: - fidx = 3; - break; - - default: - break; - } - - for (; fidx < flen; fidx++) - switch (format[fidx]) - { - case 's': - { - unsigned i, len; - const char *str = XSTR (k, fidx); - len = strlen (str); - result = result * 613 + len; - for (i = 0; i < len; i++) - result = result * 613 + (unsigned) str[i]; - break; - } - case 'u': - case 'e': - result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); - break; - case 'i': - case 'n': - result = result * 613 + (unsigned) XINT (k, fidx); - break; - case 'w': - if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) - result = result * 613 + (unsigned) XWINT (k, fidx); - else - { - size_t i; - for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) - result = result * 613 + (unsigned) (XWINT (k, fidx) - >> CHAR_BIT * i); - } - break; - case '0': - break; - default: - gcc_unreachable (); - } - - return result; -} - -hashval_t -toc_hasher::hash (toc_hash_struct *thc) -{ - return rs6000_hash_constant (thc->key) ^ thc->key_mode; -} - -/* Compare H1 and H2 for equivalence. */ - -bool -toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) -{ - rtx r1 = h1->key; - rtx r2 = h2->key; +bool +toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) +{ + rtx r1 = h1->key; + rtx r2 = h2->key; if (h1->key_mode != h2->key_mode) return 0; @@ -26397,538 +26021,913 @@ prefixed_load_p (rtx_insn *insn) if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT); else - return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); + return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); +} + +/* Whether a store instruction is a prefixed instruction. This is called from + the prefixed attribute processing. */ + +bool +prefixed_store_p (rtx_insn *insn) +{ + /* Validate the insn to make sure it is a normal store insn. */ + extract_insn_cached (insn); + if (recog_data.n_operands < 2) + return false; + + rtx mem = recog_data.operand[0]; + rtx reg = recog_data.operand[1]; + + if (!REG_P (reg) && !SUBREG_P (reg)) + return false; + + if (!MEM_P (mem)) + return false; + + /* Prefixed store instructions do not support update or indexed forms. */ + if (get_attr_indexed (insn) == INDEXED_YES + || get_attr_update (insn) == UPDATE_YES) + return false; + + machine_mode mem_mode = GET_MODE (mem); + rtx addr = XEXP (mem, 0); + enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); + + /* Need to make sure we aren't looking at a stfs which doesn't look + like the other things reg_to_non_prefixed/address_is_prefixed + looks for. */ + if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) + return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); + else + return address_is_prefixed (addr, mem_mode, non_prefixed); +} + +/* Whether a load immediate or add instruction is a prefixed instruction. This + is called from the prefixed attribute processing. */ + +bool +prefixed_paddi_p (rtx_insn *insn) +{ + rtx set = single_set (insn); + if (!set) + return false; + + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + if (!REG_P (dest) && !SUBREG_P (dest)) + return false; + + /* Is this a load immediate that can't be done with a simple ADDI or + ADDIS? */ + if (CONST_INT_P (src)) + return (satisfies_constraint_eI (src) + && !satisfies_constraint_I (src) + && !satisfies_constraint_L (src)); + + /* Is this a PADDI instruction that can't be done with a simple ADDI or + ADDIS? */ + if (GET_CODE (src) == PLUS) + { + rtx op1 = XEXP (src, 1); + + return (CONST_INT_P (op1) + && satisfies_constraint_eI (op1) + && !satisfies_constraint_I (op1) + && !satisfies_constraint_L (op1)); + } + + /* If not, is it a load of a PC-relative address? */ + if (!TARGET_PCREL || GET_MODE (dest) != Pmode) + return false; + + if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) + return false; + + enum insn_form iform = address_to_insn_form (src, Pmode, + NON_PREFIXED_DEFAULT); + + return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); +} + +/* Whether the next instruction needs a 'p' prefix issued before the + instruction is printed out. */ +static bool prepend_p_to_next_insn; + +/* Define FINAL_PRESCAN_INSN if some processing needs to be done before + outputting the assembler code. On the PowerPC, we remember if the current + insn is a prefixed insn where we need to emit a 'p' before the insn. + + In addition, if the insn is part of a PC-relative reference to an external + label optimization, this is recorded also. */ +void +rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) +{ + prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn) + == MAYBE_PREFIXED_YES + && get_attr_prefixed (insn) == PREFIXED_YES); + return; +} + +/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. + We use it to emit a 'p' for prefixed insns that is set in + FINAL_PRESCAN_INSN. */ +void +rs6000_asm_output_opcode (FILE *stream) +{ + if (prepend_p_to_next_insn) + { + fprintf (stream, "p"); + + /* Reset the flag in the case where there are separate insn lines in the + sequence, so the 'p' is only emitted for the first line. This shows up + when we are doing the PCREL_OPT optimization, in that the label created + with %r would have a leading 'p' printed. */ + prepend_p_to_next_insn = false; + } + + return; +} + +/* Emit the relocation to tie the next instruction to a previous instruction + that loads up an external address. This is used to do the PCREL_OPT + optimization. Note, the label is generated after the PLD of the got + pc-relative address to allow for the assembler to insert NOPs before the PLD + instruction. The operand is a constant integer that is the label + number. */ + +void +output_pcrel_opt_reloc (rtx label_num) +{ + rtx operands[1] = { label_num }; + output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", + operands); +} + +/* Adjust the length of an INSN. LENGTH is the currently-computed length and + should be adjusted to reflect any required changes. This macro is used when + there is some systematic length adjustment required that would be difficult + to express in the length attribute. + + In the PowerPC, we use this to adjust the length of an instruction if one or + more prefixed instructions are generated, using the attribute + num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the + hardware requires that a prefied instruciton does not cross a 64-byte + boundary. This means the compiler has to assume the length of the first + prefixed instruction is 12 bytes instead of 8 bytes. Since the length is + already set for the non-prefixed instruction, we just need to udpate for the + difference. */ + +int +rs6000_adjust_insn_length (rtx_insn *insn, int length) +{ + if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) + { + rtx pattern = PATTERN (insn); + if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER + && get_attr_prefixed (insn) == PREFIXED_YES) + { + int num_prefixed = get_attr_max_prefixed_insns (insn); + length += 4 * (num_prefixed + 1); + } + } + + return length; +} + + +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +/* Fills in the label name that should be used for a 476 link stack thunk. */ + +void +get_ppc476_thunk_name (char name[32]) +{ + gcc_assert (TARGET_LINK_STACK); + + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__ppc476.get_thunk"); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); } -/* Whether a store instruction is a prefixed instruction. This is called from - the prefixed attribute processing. */ +/* This function emits the simple thunk routine that is used to preserve + the link stack on the 476 cpu. */ -bool -prefixed_store_p (rtx_insn *insn) +static void rs6000_code_end (void) ATTRIBUTE_UNUSED; +static void +rs6000_code_end (void) { - /* Validate the insn to make sure it is a normal store insn. */ - extract_insn_cached (insn); - if (recog_data.n_operands < 2) - return false; + char name[32]; + tree decl; - rtx mem = recog_data.operand[0]; - rtx reg = recog_data.operand[1]; + if (!TARGET_LINK_STACK) + return; - if (!REG_P (reg) && !SUBREG_P (reg)) - return false; + get_ppc476_thunk_name (name); - if (!MEM_P (mem)) - return false; + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), + build_function_type_list (void_type_node, NULL_TREE)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; - /* Prefixed store instructions do not support update or indexed forms. */ - if (get_attr_indexed (insn) == INDEXED_YES - || get_attr_update (insn) == UPDATE_YES) - return false; +#if RS6000_WEAK + if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) + { + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + targetm.asm_out.unique_section (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + DECL_WEAK (decl) = 1; + ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); + targetm.asm_out.globalize_label (asm_out_file, name); + targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + else +#endif + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, name); + } - machine_mode mem_mode = GET_MODE (mem); - rtx addr = XEXP (mem, 0); - enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); + init_function_start (decl); + first_function_block_is_cold = false; + /* Make sure unwind info is emitted for the thunk if needed. */ + final_start_function (emit_barrier (), asm_out_file, 1); - /* Need to make sure we aren't looking at a stfs which doesn't look - like the other things reg_to_non_prefixed/address_is_prefixed - looks for. */ - if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) - return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); - else - return address_is_prefixed (addr, mem_mode, non_prefixed); + fputs ("\tblr\n", asm_out_file); + + final_end_function (); + init_insn_lengths (); + free_after_compilation (cfun); + set_cfun (NULL); + current_function_decl = NULL; } -/* Whether a load immediate or add instruction is a prefixed instruction. This - is called from the prefixed attribute processing. */ +/* Add r30 to hard reg set if the prologue sets it up and it is not + pic_offset_table_rtx. */ -bool -prefixed_paddi_p (rtx_insn *insn) +static void +rs6000_set_up_by_prologue (struct hard_reg_set_container *set) { - rtx set = single_set (insn); - if (!set) - return false; + if (!TARGET_SINGLE_PIC_BASE + && TARGET_TOC + && TARGET_MINIMAL_TOC + && !constant_pool_empty_p ()) + add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); + if (cfun->machine->split_stack_argp_used) + add_to_hard_reg_set (&set->set, Pmode, 12); - rtx dest = SET_DEST (set); - rtx src = SET_SRC (set); + /* Make sure the hard reg set doesn't include r2, which was possibly added + via PIC_OFFSET_TABLE_REGNUM. */ + if (TARGET_TOC) + remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); +} - if (!REG_P (dest) && !SUBREG_P (dest)) - return false; + +/* Helper function for rs6000_split_logical to emit a logical instruction after + spliting the operation to single GPR registers. - /* Is this a load immediate that can't be done with a simple ADDI or - ADDIS? */ - if (CONST_INT_P (src)) - return (satisfies_constraint_eI (src) - && !satisfies_constraint_I (src) - && !satisfies_constraint_L (src)); + DEST is the destination register. + OP1 and OP2 are the input source registers. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ - /* Is this a PADDI instruction that can't be done with a simple ADDI or - ADDIS? */ - if (GET_CODE (src) == PLUS) +static void +rs6000_split_logical_inner (rtx dest, + rtx op1, + rtx op2, + enum rtx_code code, + machine_mode mode, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) +{ + rtx bool_rtx; + + /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ + if (op2 && CONST_INT_P (op2) + && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) + && !complement_final_p && !complement_op1_p && !complement_op2_p) { - rtx op1 = XEXP (src, 1); + HOST_WIDE_INT mask = GET_MODE_MASK (mode); + HOST_WIDE_INT value = INTVAL (op2) & mask; - return (CONST_INT_P (op1) - && satisfies_constraint_eI (op1) - && !satisfies_constraint_I (op1) - && !satisfies_constraint_L (op1)); + /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ + if (code == AND) + { + if (value == 0) + { + emit_insn (gen_rtx_SET (dest, const0_rtx)); + return; + } + + else if (value == mask) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } + + /* Optimize IOR/XOR of 0 to be a simple move. Split large operations + into separate ORI/ORIS or XORI/XORIS instrucitons. */ + else if (code == IOR || code == XOR) + { + if (value == 0) + { + if (!rtx_equal_p (dest, op1)) + emit_insn (gen_rtx_SET (dest, op1)); + return; + } + } } - /* If not, is it a load of a PC-relative address? */ - if (!TARGET_PCREL || GET_MODE (dest) != Pmode) - return false; + if (code == AND && mode == SImode + && !complement_final_p && !complement_op1_p && !complement_op2_p) + { + emit_insn (gen_andsi3 (dest, op1, op2)); + return; + } - if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) - return false; + if (complement_op1_p) + op1 = gen_rtx_NOT (mode, op1); - enum insn_form iform = address_to_insn_form (src, Pmode, - NON_PREFIXED_DEFAULT); + if (complement_op2_p) + op2 = gen_rtx_NOT (mode, op2); - return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); -} + /* For canonical RTL, if only one arm is inverted it is the first. */ + if (!complement_op1_p && complement_op2_p) + std::swap (op1, op2); -/* Whether the next instruction needs a 'p' prefix issued before the - instruction is printed out. */ -static bool prepend_p_to_next_insn; + bool_rtx = ((code == NOT) + ? gen_rtx_NOT (mode, op1) + : gen_rtx_fmt_ee (code, mode, op1, op2)); -/* Define FINAL_PRESCAN_INSN if some processing needs to be done before - outputting the assembler code. On the PowerPC, we remember if the current - insn is a prefixed insn where we need to emit a 'p' before the insn. + if (complement_final_p) + bool_rtx = gen_rtx_NOT (mode, bool_rtx); - In addition, if the insn is part of a PC-relative reference to an external - label optimization, this is recorded also. */ -void -rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) -{ - prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn) - == MAYBE_PREFIXED_YES - && get_attr_prefixed (insn) == PREFIXED_YES); - return; + emit_insn (gen_rtx_SET (dest, bool_rtx)); } -/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. - We use it to emit a 'p' for prefixed insns that is set in - FINAL_PRESCAN_INSN. */ -void -rs6000_asm_output_opcode (FILE *stream) +/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These + operations are split immediately during RTL generation to allow for more + optimizations of the AND/IOR/XOR. + + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + MODE is the machine mode. + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. + CLOBBER_REG is either NULL or a scratch register of type CC to allow + formation of the AND instructions. */ + +static void +rs6000_split_logical_di (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) { - if (prepend_p_to_next_insn) + const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); + const HOST_WIDE_INT upper_32bits = ~ lower_32bits; + const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); + enum hi_lo { hi = 0, lo = 1 }; + rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; + size_t i; + + op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); + op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); + op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); + op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); + + if (code == NOT) + op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; + else { - fprintf (stream, "p"); + if (!CONST_INT_P (operands[2])) + { + op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); + op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); + } + else + { + HOST_WIDE_INT value = INTVAL (operands[2]); + HOST_WIDE_INT value_hi_lo[2]; - /* Reset the flag in the case where there are separate insn lines in the - sequence, so the 'p' is only emitted for the first line. This shows up - when we are doing the PCREL_OPT optimization, in that the label created - with %r would have a leading 'p' printed. */ - prepend_p_to_next_insn = false; - } + gcc_assert (!complement_final_p); + gcc_assert (!complement_op1_p); + gcc_assert (!complement_op2_p); - return; -} + value_hi_lo[hi] = value >> 32; + value_hi_lo[lo] = value & lower_32bits; -/* Emit the relocation to tie the next instruction to a previous instruction - that loads up an external address. This is used to do the PCREL_OPT - optimization. Note, the label is generated after the PLD of the got - pc-relative address to allow for the assembler to insert NOPs before the PLD - instruction. The operand is a constant integer that is the label - number. */ + for (i = 0; i < 2; i++) + { + HOST_WIDE_INT sub_value = value_hi_lo[i]; -void -output_pcrel_opt_reloc (rtx label_num) -{ - rtx operands[1] = { label_num }; - output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", - operands); -} + if (sub_value & sign_bit) + sub_value |= upper_32bits; -/* Adjust the length of an INSN. LENGTH is the currently-computed length and - should be adjusted to reflect any required changes. This macro is used when - there is some systematic length adjustment required that would be difficult - to express in the length attribute. + op2_hi_lo[i] = GEN_INT (sub_value); - In the PowerPC, we use this to adjust the length of an instruction if one or - more prefixed instructions are generated, using the attribute - num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the - hardware requires that a prefied instruciton does not cross a 64-byte - boundary. This means the compiler has to assume the length of the first - prefixed instruction is 12 bytes instead of 8 bytes. Since the length is - already set for the non-prefixed instruction, we just need to udpate for the - difference. */ + /* If this is an AND instruction, check to see if we need to load + the value in a register. */ + if (code == AND && sub_value != -1 && sub_value != 0 + && !and_operand (op2_hi_lo[i], SImode)) + op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); + } + } + } -int -rs6000_adjust_insn_length (rtx_insn *insn, int length) -{ - if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) + for (i = 0; i < 2; i++) { - rtx pattern = PATTERN (insn); - if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER - && get_attr_prefixed (insn) == PREFIXED_YES) + /* Split large IOR/XOR operations. */ + if ((code == IOR || code == XOR) + && CONST_INT_P (op2_hi_lo[i]) + && !complement_final_p + && !complement_op1_p + && !complement_op2_p + && !logical_const_operand (op2_hi_lo[i], SImode)) { - int num_prefixed = get_attr_max_prefixed_insns (insn); - length += 4 * (num_prefixed + 1); + HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); + HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); + HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); + rtx tmp = gen_reg_rtx (SImode); + + /* Make sure the constant is sign extended. */ + if ((hi_16bits & sign_bit) != 0) + hi_16bits |= upper_32bits; + + rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), + code, SImode, false, false, false); + + rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), + code, SImode, false, false, false); } + else + rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], + code, SImode, complement_final_p, + complement_op1_p, complement_op2_p); } - return length; + return; } - -#ifdef HAVE_GAS_HIDDEN -# define USE_HIDDEN_LINKONCE 1 -#else -# define USE_HIDDEN_LINKONCE 0 -#endif +/* Split the insns that make up boolean operations operating on multiple GPR + registers. The boolean MD patterns ensure that the inputs either are + exactly the same as the output registers, or there is no overlap. -/* Fills in the label name that should be used for a 476 link stack thunk. */ + OPERANDS is an array containing the destination and two input operands. + CODE is the base operation (AND, IOR, XOR, NOT). + If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. + If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. + If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ void -get_ppc476_thunk_name (char name[32]) +rs6000_split_logical (rtx operands[3], + enum rtx_code code, + bool complement_final_p, + bool complement_op1_p, + bool complement_op2_p) { - gcc_assert (TARGET_LINK_STACK); - - if (USE_HIDDEN_LINKONCE) - sprintf (name, "__ppc476.get_thunk"); - else - ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); -} + machine_mode mode = GET_MODE (operands[0]); + machine_mode sub_mode; + rtx op0, op1, op2; + int sub_size, regno0, regno1, nregs, i; -/* This function emits the simple thunk routine that is used to preserve - the link stack on the 476 cpu. */ + /* If this is DImode, use the specialized version that can run before + register allocation. */ + if (mode == DImode && !TARGET_POWERPC64) + { + rs6000_split_logical_di (operands, code, complement_final_p, + complement_op1_p, complement_op2_p); + return; + } -static void rs6000_code_end (void) ATTRIBUTE_UNUSED; -static void -rs6000_code_end (void) -{ - char name[32]; - tree decl; + op0 = operands[0]; + op1 = operands[1]; + op2 = (code == NOT) ? NULL_RTX : operands[2]; + sub_mode = (TARGET_POWERPC64) ? DImode : SImode; + sub_size = GET_MODE_SIZE (sub_mode); + regno0 = REGNO (op0); + regno1 = REGNO (op1); - if (!TARGET_LINK_STACK) - return; + gcc_assert (reload_completed); + gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); - get_ppc476_thunk_name (name); + nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; + gcc_assert (nregs > 1); - decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), - build_function_type_list (void_type_node, NULL_TREE)); - DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, - NULL_TREE, void_type_node); - TREE_PUBLIC (decl) = 1; - TREE_STATIC (decl) = 1; + if (op2 && REG_P (op2)) + gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); -#if RS6000_WEAK - if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) - { - cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); - targetm.asm_out.unique_section (decl, 0); - switch_to_section (get_named_section (decl, NULL, 0)); - DECL_WEAK (decl) = 1; - ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); - targetm.asm_out.globalize_label (asm_out_file, name); - targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); - ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); - } - else -#endif + for (i = 0; i < nregs; i++) { - switch_to_section (text_section); - ASM_OUTPUT_LABEL (asm_out_file, name); - } - - DECL_INITIAL (decl) = make_node (BLOCK); - current_function_decl = decl; - allocate_struct_function (decl, false); - init_function_start (decl); - first_function_block_is_cold = false; - /* Make sure unwind info is emitted for the thunk if needed. */ - final_start_function (emit_barrier (), asm_out_file, 1); + int offset = i * sub_size; + rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); + rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); + rtx sub_op2 = ((code == NOT) + ? NULL_RTX + : simplify_subreg (sub_mode, op2, mode, offset)); - fputs ("\tblr\n", asm_out_file); + rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, + complement_final_p, complement_op1_p, + complement_op2_p); + } - final_end_function (); - init_insn_lengths (); - free_after_compilation (cfun); - set_cfun (NULL); - current_function_decl = NULL; + return; } -/* Add r30 to hard reg set if the prologue sets it up and it is not - pic_offset_table_rtx. */ +/* Emit instructions to move SRC to DST. Called by splitters for + multi-register moves. It will emit at most one instruction for + each register that is accessed; that is, it won't emit li/lis pairs + (or equivalent for 64-bit code). One of SRC or DST must be a hard + register. */ -static void -rs6000_set_up_by_prologue (struct hard_reg_set_container *set) +void +rs6000_split_multireg_move (rtx dst, rtx src) { - if (!TARGET_SINGLE_PIC_BASE - && TARGET_TOC - && TARGET_MINIMAL_TOC - && !constant_pool_empty_p ()) - add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); - if (cfun->machine->split_stack_argp_used) - add_to_hard_reg_set (&set->set, Pmode, 12); + /* The register number of the first register being moved. */ + int reg; + /* The mode that is to be moved. */ + machine_mode mode; + /* The mode that the move is being done in, and its size. */ + machine_mode reg_mode; + int reg_mode_size; + /* The number of registers that will be moved. */ + int nregs; - /* Make sure the hard reg set doesn't include r2, which was possibly added - via PIC_OFFSET_TABLE_REGNUM. */ - if (TARGET_TOC) - remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); -} + reg = REG_P (dst) ? REGNO (dst) : REGNO (src); + mode = GET_MODE (dst); + nregs = hard_regno_nregs (reg, mode); - -/* Helper function for rs6000_split_logical to emit a logical instruction after - spliting the operation to single GPR registers. + /* If we have a vector quad register for MMA, and this is a load or store, + see if we can use vector paired load/stores. */ + if (mode == XOmode && TARGET_MMA + && (MEM_P (dst) || MEM_P (src))) + { + reg_mode = OOmode; + nregs /= 2; + } + /* If we have a vector pair/quad mode, split it into two/four separate + vectors. */ + else if (mode == OOmode || mode == XOmode) + reg_mode = V1TImode; + else if (FP_REGNO_P (reg)) + reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : + (TARGET_HARD_FLOAT ? DFmode : SFmode); + else if (ALTIVEC_REGNO_P (reg)) + reg_mode = V16QImode; + else + reg_mode = word_mode; + reg_mode_size = GET_MODE_SIZE (reg_mode); - DEST is the destination register. - OP1 and OP2 are the input source registers. - CODE is the base operation (AND, IOR, XOR, NOT). - MODE is the machine mode. - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); -static void -rs6000_split_logical_inner (rtx dest, - rtx op1, - rtx op2, - enum rtx_code code, - machine_mode mode, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - rtx bool_rtx; + /* TDmode residing in FP registers is special, since the ISA requires that + the lower-numbered word of a register pair is always the most significant + word, even in little-endian mode. This does not match the usual subreg + semantics, so we cannnot use simplify_gen_subreg in those cases. Access + the appropriate constituent registers "by hand" in little-endian mode. - /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ - if (op2 && CONST_INT_P (op2) - && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) - && !complement_final_p && !complement_op1_p && !complement_op2_p) + Note we do not need to check for destructive overlap here since TDmode + can only reside in even/odd register pairs. */ + if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) { - HOST_WIDE_INT mask = GET_MODE_MASK (mode); - HOST_WIDE_INT value = INTVAL (op2) & mask; + rtx p_src, p_dst; + int i; - /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ - if (code == AND) + for (i = 0; i < nregs; i++) { - if (value == 0) - { - emit_insn (gen_rtx_SET (dest, const0_rtx)); - return; - } + if (REG_P (src) && FP_REGNO_P (REGNO (src))) + p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); + else + p_src = simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size); - else if (value == mask) - { - if (!rtx_equal_p (dest, op1)) - emit_insn (gen_rtx_SET (dest, op1)); - return; - } - } + if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) + p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); + else + p_dst = simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size); - /* Optimize IOR/XOR of 0 to be a simple move. Split large operations - into separate ORI/ORIS or XORI/XORIS instrucitons. */ - else if (code == IOR || code == XOR) - { - if (value == 0) - { - if (!rtx_equal_p (dest, op1)) - emit_insn (gen_rtx_SET (dest, op1)); - return; - } + emit_insn (gen_rtx_SET (p_dst, p_src)); } - } - if (code == AND && mode == SImode - && !complement_final_p && !complement_op1_p && !complement_op2_p) - { - emit_insn (gen_andsi3 (dest, op1, op2)); return; } - if (complement_op1_p) - op1 = gen_rtx_NOT (mode, op1); - - if (complement_op2_p) - op2 = gen_rtx_NOT (mode, op2); - - /* For canonical RTL, if only one arm is inverted it is the first. */ - if (!complement_op1_p && complement_op2_p) - std::swap (op1, op2); - - bool_rtx = ((code == NOT) - ? gen_rtx_NOT (mode, op1) - : gen_rtx_fmt_ee (code, mode, op1, op2)); + /* The __vector_pair and __vector_quad modes are multi-register + modes, so if we have to load or store the registers, we have to be + careful to properly swap them if we're in little endian mode + below. This means the last register gets the first memory + location. We also need to be careful of using the right register + numbers if we are splitting XO to OO. */ + if (mode == OOmode || mode == XOmode) + { + nregs = hard_regno_nregs (reg, mode); + int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); + if (MEM_P (dst)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); - if (complement_final_p) - bool_rtx = gen_rtx_NOT (mode, bool_rtx); + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - emit_insn (gen_rtx_SET (dest, bool_rtx)); -} + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = adjust_address (dst, reg_mode, offset); + rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } -/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These - operations are split immediately during RTL generation to allow for more - optimizations of the AND/IOR/XOR. + return; + } - OPERANDS is an array containing the destination and two input operands. - CODE is the base operation (AND, IOR, XOR, NOT). - MODE is the machine mode. - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. - CLOBBER_REG is either NULL or a scratch register of type CC to allow - formation of the AND instructions. */ + if (MEM_P (src)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); -static void -rs6000_split_logical_di (rtx operands[3], - enum rtx_code code, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); - const HOST_WIDE_INT upper_32bits = ~ lower_32bits; - const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); - enum hi_lo { hi = 0, lo = 1 }; - rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; - size_t i; + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); + rtx src2 = adjust_address (src, reg_mode, offset); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } - op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); - op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); - op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); - op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); - if (code == NOT) - op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; - else - { - if (!CONST_INT_P (operands[2])) - { - op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); - op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); + return; } - else + + if (GET_CODE (src) == UNSPEC) { - HOST_WIDE_INT value = INTVAL (operands[2]); - HOST_WIDE_INT value_hi_lo[2]; + gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); + gcc_assert (REG_P (dst)); + if (GET_MODE (src) == XOmode) + gcc_assert (FP_REGNO_P (REGNO (dst))); + if (GET_MODE (src) == OOmode) + gcc_assert (VSX_REGNO_P (REGNO (dst))); - gcc_assert (!complement_final_p); - gcc_assert (!complement_op1_p); - gcc_assert (!complement_op2_p); + reg_mode = GET_MODE (XVECEXP (src, 0, 0)); + int nvecs = XVECLEN (src, 0); + for (int i = 0; i < nvecs; i++) + { + int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; + rtx dst_i = gen_rtx_REG (reg_mode, reg + index); + emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); + } - value_hi_lo[hi] = value >> 32; - value_hi_lo[lo] = value & lower_32bits; + /* We are writing an accumulator register, so we have to + prime it after we've written it. */ + if (GET_MODE (src) == XOmode) + emit_insn (gen_mma_xxmtacc (dst, dst)); - for (i = 0; i < 2; i++) - { - HOST_WIDE_INT sub_value = value_hi_lo[i]; + return; + } - if (sub_value & sign_bit) - sub_value |= upper_32bits; + /* Register -> register moves can use common code. */ + } - op2_hi_lo[i] = GEN_INT (sub_value); + if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) + { + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - /* If this is an AND instruction, check to see if we need to load - the value in a register. */ - if (code == AND && sub_value != -1 && sub_value != 0 - && !and_operand (op2_hi_lo[i], SImode)) - op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); + /* Move register range backwards, if we might have destructive + overlap. */ + int i; + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + for (i = nregs - 1; i >= 0; i--) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); + emit_insn (gen_rtx_SET (dst_i, src_i)); } } - } + else + { + for (i = nregs - 1; i >= 0; i--) + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size))); + } - for (i = 0; i < 2; i++) + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); + } + else { - /* Split large IOR/XOR operations. */ - if ((code == IOR || code == XOR) - && CONST_INT_P (op2_hi_lo[i]) - && !complement_final_p - && !complement_op1_p - && !complement_op2_p - && !logical_const_operand (op2_hi_lo[i], SImode)) + int i; + int j = -1; + bool used_update = false; + rtx restore_basereg = NULL_RTX; + + if (MEM_P (src) && INT_REGNO_P (reg)) { - HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); - HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); - HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); - rtx tmp = gen_reg_rtx (SImode); + rtx breg; - /* Make sure the constant is sign extended. */ - if ((hi_16bits & sign_bit) != 0) - hi_16bits |= upper_32bits; + if (GET_CODE (XEXP (src, 0)) == PRE_INC + || GET_CODE (XEXP (src, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (src, 0), 0); + delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + src = replace_equiv_address (src, breg); + } + else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) + { + if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (src, 0), 0); + if (TARGET_UPDATE) + { + rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); + emit_insn (gen_rtx_SET (ndst, + gen_rtx_MEM (reg_mode, + XEXP (src, 0)))); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (src, 0), 1))); + src = replace_equiv_address (src, basereg); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, reg); + emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); + src = replace_equiv_address (src, basereg); + } + } - rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), - code, SImode, false, false, false); + breg = XEXP (src, 0); + if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) + breg = XEXP (breg, 0); - rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), - code, SImode, false, false, false); + /* If the base register we are using to address memory is + also a destination reg, then change that register last. */ + if (REG_P (breg) + && REGNO (breg) >= REGNO (dst) + && REGNO (breg) < REGNO (dst) + nregs) + j = REGNO (breg) - REGNO (dst); } - else - rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], - code, SImode, complement_final_p, - complement_op1_p, complement_op2_p); - } - - return; -} - -/* Split the insns that make up boolean operations operating on multiple GPR - registers. The boolean MD patterns ensure that the inputs either are - exactly the same as the output registers, or there is no overlap. + else if (MEM_P (dst) && INT_REGNO_P (reg)) + { + rtx breg; - OPERANDS is an array containing the destination and two input operands. - CODE is the base operation (AND, IOR, XOR, NOT). - If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. - If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. - If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ + if (GET_CODE (XEXP (dst, 0)) == PRE_INC + || GET_CODE (XEXP (dst, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (dst, 0), 0); + delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); -void -rs6000_split_logical (rtx operands[3], - enum rtx_code code, - bool complement_final_p, - bool complement_op1_p, - bool complement_op2_p) -{ - machine_mode mode = GET_MODE (operands[0]); - machine_mode sub_mode; - rtx op0, op1, op2; - int sub_size, regno0, regno1, nregs, i; + /* We have to update the breg before doing the store. + Use store with update, if available. */ - /* If this is DImode, use the specialized version that can run before - register allocation. */ - if (mode == DImode && !TARGET_POWERPC64) - { - rs6000_split_logical_di (operands, code, complement_final_p, - complement_op1_p, complement_op2_p); - return; - } + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (TARGET_32BIT + ? (TARGET_POWERPC64 + ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) + : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) + : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); + used_update = true; + } + else + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + dst = replace_equiv_address (dst, breg); + } + else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) + && GET_CODE (XEXP (dst, 0)) != LO_SUM) + { + if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, + XEXP (dst, 0)), + nsrc)); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (dst, 0), 1))); + dst = replace_equiv_address (dst, basereg); + } + else + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + rtx offsetreg = XEXP (XEXP (dst, 0), 1); + gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS + && REG_P (basereg) + && REG_P (offsetreg) + && REGNO (basereg) != REGNO (offsetreg)); + if (REGNO (basereg) == 0) + { + rtx tmp = offsetreg; + offsetreg = basereg; + basereg = tmp; + } + emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); + restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); + dst = replace_equiv_address (dst, basereg); + } + } + else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) + gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); + } - op0 = operands[0]; - op1 = operands[1]; - op2 = (code == NOT) ? NULL_RTX : operands[2]; - sub_mode = (TARGET_POWERPC64) ? DImode : SImode; - sub_size = GET_MODE_SIZE (sub_mode); - regno0 = REGNO (op0); - regno1 = REGNO (op1); + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA && REG_P (src) + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); - gcc_assert (reload_completed); - gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); - gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); + for (i = 0; i < nregs; i++) + { + /* Calculate index to next subword. */ + ++j; + if (j == nregs) + j = 0; - nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; - gcc_assert (nregs > 1); + /* If compiler already emitted move of first word by + store with update, no need to do anything. */ + if (j == 0 && used_update) + continue; - if (op2 && REG_P (op2)) - gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); + emit_insn (gen_rtx_SET (dst_i, src_i)); + } + else + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + j * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + j * reg_mode_size))); + } - for (i = 0; i < nregs; i++) - { - int offset = i * sub_size; - rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); - rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); - rtx sub_op2 = ((code == NOT) - ? NULL_RTX - : simplify_subreg (sub_mode, op2, mode, offset)); + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA && REG_P (dst) + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); - rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, - complement_final_p, complement_op1_p, - complement_op2_p); + if (restore_basereg != NULL_RTX) + emit_insn (restore_basereg); } - - return; } - /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on