From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id A199D3858C39; Thu, 29 Feb 2024 19:21:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A199D3858C39 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1709234510; bh=znTIxdHklOBJqn9eF3/c+eAzUZ4qDVC+dy9PCpa4NUE=; h=From:To:Subject:Date:From; b=opEHRzcf+JnXuXW6yyl2OpZxBMi6+7utMHK1TqK2VwQ3UxgLHirqkvT1yUmkliLvP urwxN3irOURjikyF+9kur9VXyFc/p/xJ6OnNR5yp1Sw5J4XIYc/NslwojahCbHibHL kVpaBAue2YsSvsZGfjDutMYV6K5DgKGsLnz3jXy8= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work161-dmf)] Revert changes X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work161-dmf X-Git-Oldrev: 277c483c11404c57e2fecb22993ceeb8e590658a X-Git-Newrev: 80553f00623c0549c880e9e1a2b5e0eec737ddc8 Message-Id: <20240229192150.A199D3858C39@sourceware.org> Date: Thu, 29 Feb 2024 19:21:50 +0000 (GMT) List-Id: https://gcc.gnu.org/g:80553f00623c0549c880e9e1a2b5e0eec737ddc8 commit 80553f00623c0549c880e9e1a2b5e0eec737ddc8 Author: Michael Meissner Date: Thu Feb 29 14:21:46 2024 -0500 Revert changes Diff: --- gcc/ChangeLog.dmf | 319 ++++++++++ gcc/config/rs6000/constraints.md | 3 - gcc/config/rs6000/mma.md | 688 ++++++---------------- gcc/config/rs6000/predicates.md | 32 - gcc/config/rs6000/rs6000-builtin.cc | 17 - gcc/config/rs6000/rs6000-c.cc | 3 - gcc/config/rs6000/rs6000-call.cc | 10 +- gcc/config/rs6000/rs6000-cpus.def | 2 - gcc/config/rs6000/rs6000-modes.def | 4 - gcc/config/rs6000/rs6000.cc | 323 ++-------- gcc/config/rs6000/rs6000.h | 51 +- gcc/config/rs6000/rs6000.md | 12 +- gcc/doc/md.texi | 7 - gcc/testsuite/gcc.target/powerpc/dm-1024bit.c | 63 -- gcc/testsuite/gcc.target/powerpc/dm-double-test.c | 194 ------ gcc/testsuite/lib/target-supports.exp | 19 - 16 files changed, 578 insertions(+), 1169 deletions(-) diff --git a/gcc/ChangeLog.dmf b/gcc/ChangeLog.dmf index 6f673db33f1..741b9ed82ba 100644 --- a/gcc/ChangeLog.dmf +++ b/gcc/ChangeLog.dmf @@ -1,5 +1,324 @@ +==================== Branch work161-dmf, patch #104 ==================== + +PowerPC: Add support for 1,024 bit DMR registers. + +This patch is a prelimianry patch to add the full 1,024 bit dense math register +(DMRs) for -mcpu=future. The MMA 512-bit accumulators map onto the top of the +DMR register. + +This patch only adds the new 1,024 bit register support. It does not add +support for any instructions that need 1,024 bit registers instead of 512 bit +registers. + +I used the new mode 'TDOmode' to be the opaque mode used for 1,024 bit +registers. The 'wD' constraint added in previous patches is used for these +registers. I added support to do load and store of DMRs via the VSX registers, +since there are no load/store dense math instructions. I added the new keyword +'__dmr' to create 1,024 bit types that can be loaded into DMRs. At present, I +don't have aliases for __dmr512 and __dmr1024 that we've discussed internally. + +The patches have been tested on both little and big endian systems. Can I check +it into the master branch? + +2024-02-29 Michael Meissner + +gcc/ + + * config/rs6000/mma.md (UNSPEC_DM_INSERT512_UPPER): New unspec. + (UNSPEC_DM_INSERT512_LOWER): Likewise. + (UNSPEC_DM_EXTRACT512): Likewise. + (UNSPEC_DMR_RELOAD_FROM_MEMORY): Likewise. + (UNSPEC_DMR_RELOAD_TO_MEMORY): Likewise. + (movtdo): New define_expand and define_insn_and_split to implement 1,024 + bit DMR registers. + (movtdo_insert512_upper): New insn. + (movtdo_insert512_lower): Likewise. + (movtdo_extract512): Likewise. + (reload_dmr_from_memory): Likewise. + (reload_dmr_to_memory): Likewise. + * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add DMR + support. + (rs6000_init_builtins): Add support for __dmr keyword. + * config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Add support + for TDOmode. + (rs6000_function_arg): Likewise. + * config/rs6000/rs6000-modes.def (TDOmode): New mode. + * config/rs6000/rs6000.cc (rs6000_hard_regno_nregs_internal): Add + support for TDOmode. + (rs6000_hard_regno_mode_ok_uncached): Likewise. + (rs6000_hard_regno_mode_ok): Likewise. + (rs6000_modes_tieable_p): Likewise. + (rs6000_debug_reg_global): Likewise. + (rs6000_setup_reg_addr_masks): Likewise. + (rs6000_init_hard_regno_mode_ok): Add support for TDOmode. Setup reload + hooks for DMR mode. + (reg_offset_addressing_ok_p): Add support for TDOmode. + (rs6000_emit_move): Likewise. + (rs6000_secondary_reload_simple_move): Likewise. + (rs6000_preferred_reload_class): Likewise. + (rs6000_secondary_reload_class): Likewise. + (rs6000_mangle_type): Add mangling for __dmr type. + (rs6000_dmr_register_move_cost): Add support for TDOmode. + (rs6000_split_multireg_move): Likewise. + (rs6000_invalid_conversion): Likewise. + * config/rs6000/rs6000.h (VECTOR_ALIGNMENT_P): Add TDOmode. + (enum rs6000_builtin_type_index): Add DMR type nodes. + (dmr_type_node): Likewise. + (ptr_dmr_type_node): Likewise. + +gcc/testsuite/ + + * gcc.target/powerpc/dm-1024bit.c: New test. + +==================== Branch work161-dmf, patch #103 ==================== + +PowerPC: Switch to dense math names for all MMA operations. + +This patch changes the assembler instruction names for MMA instructions from +the original name used in power10 to the new name when used with the dense math +system. I.e. xvf64gerpp becomes dmxvf64gerpp. The assembler will emit the +same bits for either spelling. + +The patches have been tested on both little and big endian systems. Can I check +it into the master branch? + +For the non-prefixed MMA instructions, we add a 'dm' prefix in front of the +instruction. However, the prefixed instructions have a 'pm' prefix, and we add +the 'dm' prefix afterwards. To prevent having two sets of parallel int +attributes, we remove the "pm" prefix from the instruction string in the +attributes, and add it later, both in the insn name and in the output template. + +For example, previously we had + + (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) + + ;; ... + + (define_insn "mma_" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] + MMA_VVI4I4I8))] + "TARGET_MMA" + " %A0,%x1,%x2,%3,%4,%5" + [(set_attr "type" "mma") + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + +And now we have: + + (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) + + ;; ... + + (define_insn "mma_pm" + [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] + MMA_VVI4I4I8))] + "TARGET_MMA" + "@ + pmdm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5 + pm %A0,%x1,%x2,%3,%4,%5" + [(set_attr "type" "mma") + (set_attr "prefixed" "yes") + (set_attr "isa" "dm,not_dm,not_dm")]) + + +2024-02-29 Michael Meissner + +gcc/ + + * config/rs6000/mma.md (vvi4i4i8): Change the instruction to not have a + "pm" prefix. + (avvi4i4i8): Likewise. + (vvi4i4i2): Likewise. + (avvi4i4i2): Likewise. + (vvi4i4): Likewise. + (avvi4i4): Likewise. + (pvi4i2): Likewise. + (apvi4i2): Likewise. + (vvi4i4i4): Likewise. + (avvi4i4i4): Likewise. + (mma_): Add support for running on DMF systems, generating the dense + math instruction and using the dense math accumulators. + (mma_): Likewise. + (mma_): Likewise. + (mma_): Likewise. + (mma_pm): Add support for running on DMF systems, generating + the dense math instruction and using the dense math accumulators. + Rename the insn with a 'pm' prefix and add either 'pm' or 'pmdm' + prefixes based on whether we have the original MMA specification or if + we have dense math support. + (mma_pm): Likewise. + (mma_pm): Likewise. + (mma_pm): Likewise. + (mma_pm): Likewise. + (mma_pm): Likewise. + (mma_pm): Likewise. + (mma_pm): Likewise. + +gcc/testsuite/ + + * gcc.target/powerpc/dm-double-test.c: New test. + * lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New + target test. + +==================== Branch work161-dmf, patch #102 ==================== + +PowerPC: Add support for accumulators in DMR registers. + +The MMA subsystem added the notion of accumulator registers as an optional +feature of ISA 3.1 (power10). In ISA 3.1, these accumulators overlapped with +the VSX registers 0..31, but logically the accumulator registers were separate +from the FPR registers. In ISA 3.1, it was anticipated that in future systems, +the accumulator registers may no overlap with the FPR registers. This patch +adds the support for dense math registers as separate registers. + +This particular patch does not change the MMA support to use the accumulators +within the dense math registers. This patch just adds the basic support for +having separate DMRs. The next patch will switch the MMA support to use the +accumulators if -mcpu=future is used. + +For testing purposes, I added an undocumented option '-mdense-math' to enable +or disable the dense math support. + +This patch adds a new constraint (wD). If MMA is selected but dense math is +not selected (i.e. -mcpu=power10), the wD constraint will allow access to +accumulators that overlap with VSX registers 0..31. If both MMA and dense math +are selected (i.e. -mcpu=future), the wD constraint will only allow dense math +registers. + +This patch modifies the existing %A output modifier. If MMA is selected but +dense math is not selected, then %A output modifier converts the VSX register +number to the accumulator number, by dividing it by 4. If both MMA and dense +math are selected, then %A will map the separate DMR registers into 0..7. + +The intention is that user code using extended asm can be modified to run on +both MMA without dense math and MMA with dense math: + + 1) If possible, don't use extended asm, but instead use the MMA built-in + functions; + + 2) If you do need to write extended asm, change the d constraints + targetting accumulators should now use wD; + + 3) Only use the built-in zero, assemble and disassemble functions create + move data between vector quad types and dense math accumulators. + I.e. do not use the xxmfacc, xxmtacc, and xxsetaccz directly in the + extended asm code. The reason is these instructions assume there is a + 1-to-1 correspondence between 4 adjacent FPR registers and an + accumulator that overlaps with those instructions. With accumulators + now being separate registers, there no longer is a 1-to-1 + correspondence. + +It is possible that the mangling for DMRs and the GDB register numbers may +produce other changes in the future. + +2024-02-29 Michael Meissner + +gcc/ + + * config/rs6000/constraints.md (wD constraint): New constraint. + * config/rs6000/mma.md (UNSPEC_DM_ASSEMBLE): New unspec. + (movxo): Convert into define_expand. + (movxo_nodm): Version of movxo where accumulators overlap with VSX vector + registers 0..31. + (movxo_dm): Verson of movxo that supports separate dense math + accumulators. + (mma_assemble_acc): Add dense math support to define_expand. + (mma_assemble_acc_nodm): Rename from mma_assemble_acc, and restrict it + to non dense math systems. + (mma_assemble_acc_dm): Dense math version of mma_assemble_acc. + (mma_disassemble_acc): Add dense math support to define_expand. + (mma_disassemble_acc_nodm): Rename from mma_disassemble_acc, and + restrict it to non dense math systems. + (mma_disassemble_acc_dm): Dense math version of mma_disassemble_acc. + * config/rs6000/predicates.md (dmr_operand): New predicate. + (accumulator_operand): Likewise. + * config/rs6000/rs6000.cc (enum rs6000_reg_type): Add DMR_REG_TYPE. + (enum rs6000_reload_reg_type): Add RELOAD_REG_DMR. + (LAST_RELOAD_REG_CLASS): Add support for DMR registers and the wD + constraint. + (reload_reg_map): Likewise. + (rs6000_reg_names): Likewise. + (alt_reg_names): Likewise. + (rs6000_hard_regno_nregs_internal): Likewise. + (rs6000_hard_regno_mode_ok_uncached): Likewise. + (rs6000_debug_reg_global): Likewise. + (rs6000_setup_reg_addr_masks): Likewise. + (rs6000_init_hard_regno_mode_ok): Likewise. + (rs6000_secondary_reload_memory): Add support for DMR registers. + (rs6000_secondary_reload_simple_move): Likewise. + (rs6000_preferred_reload_class): Likewise. + (rs6000_secondary_reload_class): Likewise. + (print_operand): Make %A handle both FPRs and DMRs. + (rs6000_dmr_register_move_cost): New helper function. + (rs6000_register_move_cost): Add support for DMR registers. + (rs6000_memory_move_cost): Likewise. + (rs6000_compute_pressure_classes): Likewise. + (rs6000_debugger_regno): Likewise. + (rs6000_split_multireg_move): Add support for DMRs. + * config/rs6000/rs6000.h (TARGET_DENSE_MATH): New macro. + (TARGET_MMA_NO_DENSE_MATH): Likewise + (UNITS_PER_DMR_WORD): New macro. + (FIRST_PSEUDO_REGISTER): Update for DMRs. + (FIXED_REGISTERS): Add DMRs. + (CALL_REALLY_USED_REGISTERS): Likewise. + (REG_ALLOC_ORDER): Likewise. + (DMR_REGNO_P): New macro. + (enum reg_class): Add DM_REGS. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + (enum r6000_reg_class_enum): Add RS6000_CONSTRAINT_wD. + (REGISTER_NAMES): Add DMR registers. + (ADDITIONAL_REGISTER_NAMES): Likewise. + * config/rs6000/rs6000.md (FIRST_DMR_REGNO): New constant. + (LAST_DMR_REGNO): Likewise. + (isa attribute): Add 'dm' and 'not_dm' attributes. + (enabled attribute): Support 'dm' and 'not_dm' attributes. + * doc/md.texi (PowerPC constraints): Document wD constraint. + +==================== Branch work161-dmf, patch #101 ==================== + +Use vector pair load/store for memcpy with -mcpu=future + +In the development for the power10 processor, GCC did not enable using the load +vector pair and store vector pair instructions when optimizing things like +memory copy. This patch enables using those instructions if -mcpu=future is +used. + +2024-02-29 Michael Meissner + +gcc/ + + * config/rs6000/rs6000-cpus.def (ISA_FUTURE_MASKS_SERVER): Enable using + load vector pair and store vector pair instructions for memory copy + operations. + (POWERPC_MASKS): Make the bit for enabling using load vector pair and + store vector pair operations set and reset when the PowerPC processor is + changed. + ==================== Branch work161-dmf, baseline ==================== +Add ChangeLog.dmf and update REVISION. + +2024-02-28 Michael Meissner + +gcc/ + + * ChangeLog.dmf: New file for branch. + * REVISION: Update. + 2024-02-28 Michael Meissner Clone branch diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 277a30a8245..369a7b75042 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -107,9 +107,6 @@ (match_test "TARGET_P8_VECTOR") (match_operand 0 "s5bit_cint_operand"))) -(define_register_constraint "wD" "rs6000_constraints[RS6000_CONSTRAINT_wD]" - "Accumulator register.") - (define_constraint "wE" "@internal Vector constant that can be loaded with the XXSPLTIB instruction." (match_test "xxspltib_constant_nosplit (op, mode)")) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index af16385b226..04e2d0066df 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -91,12 +91,6 @@ UNSPEC_MMA_XVI8GER4SPP UNSPEC_MMA_XXMFACC UNSPEC_MMA_XXMTACC - UNSPEC_DM_ASSEMBLE - UNSPEC_DM_INSERT512_UPPER - UNSPEC_DM_INSERT512_LOWER - UNSPEC_DM_EXTRACT512 - UNSPEC_DMR_RELOAD_FROM_MEMORY - UNSPEC_DMR_RELOAD_TO_MEMORY ]) (define_c_enum "unspecv" @@ -230,48 +224,44 @@ (UNSPEC_MMA_XVF64GERNP "xvf64gernp") (UNSPEC_MMA_XVF64GERNN "xvf64gernn")]) -;; Do not include the "pm" prefix in these instructions. If we have MMA but we -;; don't have dense math register support we want to issue the instruction with -;; a "pm" prefix, but if we have dense math registers, we want to issue it with -;; a "pmdm" prefix. I.e. pmxvi4ger8 vs. pmdmxvi4ger8 -(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")]) +(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")]) -(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")]) +(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")]) -(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "xvi16ger2") - (UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s") - (UNSPEC_MMA_PMXVF16GER2 "xvf16ger2") - (UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")]) +(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2") + (UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s") + (UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2") + (UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")]) -(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp") - (UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp") - (UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp") - (UNSPEC_MMA_PMXVF16GER2PN "xvf16ger2pn") - (UNSPEC_MMA_PMXVF16GER2NP "xvf16ger2np") - (UNSPEC_MMA_PMXVF16GER2NN "xvf16ger2nn") - (UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp") - (UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn") - (UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np") - (UNSPEC_MMA_PMXVBF16GER2NN "xvbf16ger2nn")]) +(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp") + (UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp") + (UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp") + (UNSPEC_MMA_PMXVF16GER2PN "pmxvf16ger2pn") + (UNSPEC_MMA_PMXVF16GER2NP "pmxvf16ger2np") + (UNSPEC_MMA_PMXVF16GER2NN "pmxvf16ger2nn") + (UNSPEC_MMA_PMXVBF16GER2PP "pmxvbf16ger2pp") + (UNSPEC_MMA_PMXVBF16GER2PN "pmxvbf16ger2pn") + (UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np") + (UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")]) -(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "xvf32ger")]) +(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")]) -(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "xvf32gerpp") - (UNSPEC_MMA_PMXVF32GERPN "xvf32gerpn") - (UNSPEC_MMA_PMXVF32GERNP "xvf32gernp") - (UNSPEC_MMA_PMXVF32GERNN "xvf32gernn")]) +(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp") + (UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn") + (UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp") + (UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")]) -(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "xvf64ger")]) +(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")]) -(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "xvf64gerpp") - (UNSPEC_MMA_PMXVF64GERPN "xvf64gerpn") - (UNSPEC_MMA_PMXVF64GERNP "xvf64gernp") - (UNSPEC_MMA_PMXVF64GERNN "xvf64gernn")]) +(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp") + (UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn") + (UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp") + (UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")]) -(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "xvi8ger4")]) +(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")]) -(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "xvi8ger4pp") - (UNSPEC_MMA_PMXVI8GER4SPP "xvi8ger4spp")]) +(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp") + (UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")]) ;; Vector pair support. OOmode can only live in VSRs. @@ -324,9 +314,7 @@ (set_attr "length" "*,*,8")]) -;; Vector quad support. Under the original MMA, XOmode can only live in VSX -;; registers 0..31. With dense math, XOmode can live in either VSX registers -;; (0..63) or DMR registers. +;; Vector quad support. XOmode can only live in FPRs. (define_expand "movxo" [(set (match_operand:XO 0 "nonimmediate_operand") (match_operand:XO 1 "input_operand"))] @@ -351,10 +339,10 @@ gcc_assert (false); }) -(define_insn_and_split "*movxo_nodm" +(define_insn_and_split "*movxo" [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d") (match_operand:XO 1 "input_operand" "ZwO,d,d"))] - "TARGET_MMA_NO_DENSE_MATH + "TARGET_MMA && (gpc_reg_operand (operands[0], XOmode) || gpc_reg_operand (operands[1], XOmode))" "@ @@ -371,31 +359,6 @@ (set_attr "length" "*,*,16") (set_attr "max_prefixed_insns" "2,2,*")]) -(define_insn_and_split "*movxo_dm" - [(set (match_operand:XO 0 "nonimmediate_operand" "=wa,QwO,wa,wD,wD,wa") - (match_operand:XO 1 "input_operand" "QwO,wa, wa,wa,wD,wD"))] - "TARGET_DENSE_MATH - && (gpc_reg_operand (operands[0], XOmode) - || gpc_reg_operand (operands[1], XOmode))" - "@ - # - # - # - dmxxinstdmr512 %0,%1,%Y1,0 - dmmr %0,%1 - dmxxextfdmr512 %0,%Y0,%1,0" - "&& reload_completed - && !dmr_operand (operands[0], XOmode) - && !dmr_operand (operands[1], XOmode)" - [(const_int 0)] -{ - rs6000_split_multireg_move (operands[0], operands[1]); - DONE; -} - [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma") - (set_attr "length" "*,*,16,*,*,*") - (set_attr "max_prefixed_insns" "2,2,*,*,*,*")]) - (define_expand "vsx_assemble_pair" [(match_operand:OO 0 "vsx_register_operand") (match_operand:V16QI 1 "mma_assemble_input_operand") @@ -463,38 +426,25 @@ }) (define_expand "mma_assemble_acc" - [(match_operand:XO 0 "accumulator_operand") + [(match_operand:XO 0 "fpr_reg_operand") (match_operand:V16QI 1 "mma_assemble_input_operand") (match_operand:V16QI 2 "mma_assemble_input_operand") (match_operand:V16QI 3 "mma_assemble_input_operand") (match_operand:V16QI 4 "mma_assemble_input_operand")] "TARGET_MMA" { - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx op2 = operands[2]; - rtx op3 = operands[3]; - rtx op4 = operands[4]; - - if (TARGET_DENSE_MATH) - { - rtx vpair1 = gen_reg_rtx (OOmode); - rtx vpair2 = gen_reg_rtx (OOmode); - emit_insn (gen_vsx_assemble_pair (vpair1, op1, op2)); - emit_insn (gen_vsx_assemble_pair (vpair2, op3, op4)); - emit_insn (gen_mma_assemble_acc_dm (op0, vpair1, vpair2)); - } - - else - emit_insn (gen_mma_assemble_acc_nodm (op0, op1, op2, op3, op4)); - + rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, + gen_rtvec (4, operands[1], operands[2], + operands[3], operands[4]), + UNSPECV_MMA_ASSEMBLE); + emit_move_insn (operands[0], src); DONE; }) ;; We cannot update the four output registers atomically, so mark the output -;; as an early clobber so we don't accidentally clobber the input operands. +;; as an early clobber so we don't accidentally clobber the input operands. */ -(define_insn_and_split "mma_assemble_acc_nodm" +(define_insn_and_split "*mma_assemble_acc" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") (unspec_volatile:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") @@ -502,7 +452,7 @@ (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] UNSPECV_MMA_ASSEMBLE))] - "TARGET_MMA_NO_DENSE_MATH + "TARGET_MMA && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" @@ -516,31 +466,28 @@ DONE; }) -;; On a system with dense math, we build the accumulators from two vector -;; pairs. - -(define_insn "mma_assemble_acc_dm" - [(set (match_operand:XO 0 "dmr_operand" "=wD") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand:OO 2 "vsx_register_operand" "wa")] - UNSPEC_DM_ASSEMBLE))] - "TARGET_DENSE_MATH" - "dmxxinstdmr512 %0,%1,%2,0" - [(set_attr "type" "mma")]) - (define_expand "mma_disassemble_acc" - [(set (match_operand:V16QI 0 "register_operand") - (unspec:V16QI [(match_operand:XO 1 "register_operand") - (match_operand 2 "const_0_to_3_operand")] - UNSPEC_MMA_EXTRACT))] - "TARGET_MMA") + [(match_operand:V16QI 0 "mma_disassemble_output_operand") + (match_operand:XO 1 "fpr_reg_operand") + (match_operand 2 "const_0_to_3_operand")] + "TARGET_MMA" +{ + rtx src; + int regoff = INTVAL (operands[2]); + src = gen_rtx_UNSPEC (V16QImode, + gen_rtvec (2, operands[1], GEN_INT (regoff)), + UNSPEC_MMA_EXTRACT); + emit_move_insn (operands[0], src); + DONE; +}) -(define_insn_and_split "*mma_disassemble_acc_nodm" +(define_insn_and_split "*mma_disassemble_acc" [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa") - (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") - (match_operand 2 "const_0_to_3_operand")] + (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d") + (match_operand 2 "const_0_to_3_operand")] UNSPEC_MMA_EXTRACT))] - "TARGET_MMA_NO_DENSE_MATH" + "TARGET_MMA + && fpr_reg_operand (operands[1], XOmode)" "#" "&& reload_completed" [(const_int 0)] @@ -552,447 +499,194 @@ DONE; }) -(define_insn "*mma_disassemble_acc_dm" - [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") - (unspec:V16QI [(match_operand:XO 1 "dmr_operand" "wD") - (match_operand 2 "const_0_to_3_operand")] - UNSPEC_MMA_EXTRACT))] - "TARGET_DENSE_MATH" - "dmxxextfdmr256 %0,%1,2" - [(set_attr "type" "mma")]) - -;; MMA instructions that do not use their accumulators as an input, still must -;; not allow their vector operands to overlap the registers used by the -;; accumulator. We enforce this by marking the output as early clobber. If we -;; have dense math, we don't need the whole prime/de-prime action, so just make -;; these instructions be NOPs. - -(define_expand "mma_" - [(set (match_operand:XO 0 "accumulator_operand") - (unspec:XO [(match_operand:XO 1 "accumulator_operand")] - MMA_ACC))] - "TARGET_MMA" -{ - if (TARGET_DENSE_MATH) - { - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - DONE; - } +;; MMA instructions that do not use their accumulators as an input, still +;; must not allow their vector operands to overlap the registers used by +;; the accumulator. We enforce this by marking the output as early clobber. - /* Generate the prime/de-prime code. */ -}) - -(define_insn "*mma__nodm" - [(set (match_operand:XO 0 "accumulator_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0")] +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] MMA_ACC))] - "TARGET_MMA_NO_DENSE_MATH" + "TARGET_MMA" " %A0" [(set_attr "type" "mma")]) ;; We can't have integer constants in XOmode so we wrap this in an -;; UNSPEC_VOLATILE for the non-dense math case. For dense math, we don't need -;; to disable optimization and we can do a normal UNSPEC. - -(define_expand "mma_xxsetaccz" - [(set (match_operand:XO 0 "register_operand") - (unspec_volatile:XO [(const_int 0)] - UNSPECV_MMA_XXSETACCZ))] - "TARGET_MMA" -{ - if (TARGET_DENSE_MATH) - { - emit_insn (gen_mma_xxsetaccz_dm (operands[0])); - DONE; - } -}) +;; UNSPEC_VOLATILE. -(define_insn "*mma_xxsetaccz_nodm" +(define_insn "mma_xxsetaccz" [(set (match_operand:XO 0 "fpr_reg_operand" "=d") (unspec_volatile:XO [(const_int 0)] UNSPECV_MMA_XXSETACCZ))] - "TARGET_MMA_NO_DENSE_MATH" + "TARGET_MMA" "xxsetaccz %A0" [(set_attr "type" "mma")]) - -(define_insn "mma_xxsetaccz_dm" - [(set (match_operand:XO 0 "dmr_operand" "=wD") - (unspec:XO [(const_int 0)] - UNSPECV_MMA_XXSETACCZ))] - "TARGET_DENSE_MATH" - "dmsetdmrz %0" - [(set_attr "type" "mma")]) - (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] "TARGET_MMA" - "@ - dm %A0,%x1,%x2 - %A0,%x1,%x2 - %A0,%x1,%x2" - [(set_attr "type" "mma") - (set_attr "isa" "dm,not_dm,not_dm")]) + " %A0,%x1,%x2" + [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] "TARGET_MMA" " %A0,%x2,%x3" - [(set_attr "type" "mma") - (set_attr "isa" "dm,not_dm,not_dm")]) + [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] "TARGET_MMA" - "@ - dm %A0,%x1,%x2 - %A0,%x1,%x2 - %A0,%x1,%x2" - [(set_attr "type" "mma") - (set_attr "isa" "dm,not_dm,not_dm")]) + " %A0,%x1,%x2" + [(set_attr "type" "mma")]) (define_insn "mma_" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] "TARGET_MMA" - "@ - dm %A0,%x2,%x3 - %A0,%x2,%x3 - %A0,%x2,%x3" - [(set_attr "type" "mma") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")] + " %A0,%x2,%x3" + [(set_attr "type" "mma")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n")] MMA_VVI4I4I8))] "TARGET_MMA" - "@ - pmdm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5" + " %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 6 "u8bit_cint_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "u8bit_cint_operand" "n,n")] MMA_AVVI4I4I8))] "TARGET_MMA" - "@ - pmdm %A0,%x2,%x3,%4,%5,%6 - pm %A0,%x2,%x3,%4,%5,%6 - pm %A0,%x2,%x3,%4,%5,%6" + " %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_VVI4I4I2))] "TARGET_MMA" - "@ - pmdm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5" + " %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_3_operand" "n,n")] MMA_AVVI4I4I2))] "TARGET_MMA" - "@ - pm %A0,%x2,%x3,%4,%5,%6 - pmdm %A0,%x2,%x3,%4,%5,%6 - pm %A0,%x2,%x3,%4,%5,%6" + " %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n")] MMA_VVI4I4))] "TARGET_MMA" - "@ - pmdm %A0,%x1,%x2,%3,%4 - pm %A0,%x1,%x2,%3,%4 - pm %A0,%x1,%x2,%3,%4" + " %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4))] "TARGET_MMA" - "@ - pmdm %A0,%x2,%x3,%4,%5 - pm %A0,%x2,%x3,%4,%5 - pm %A0,%x2,%x3,%4,%5" + " %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_3_operand" "n,n")] MMA_PVI4I2))] "TARGET_MMA" - "@ - dmpm %A0,%x1,%x2,%3,%4 - dm %A0,%x1,%x2,%3,%4 - dm %A0,%x1,%x2,%3,%4" + " %A0,%x1,%x2,%3,%4" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:OO 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_APVI4I2))] "TARGET_MMA" - "@ - pmdm %A0,%x2,%x3,%4,%5 - pm %A0,%x2,%x3,%4,%5 - pm %A0,%x2,%x3,%4,%5" + " %A0,%x2,%x3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_VVI4I4I4))] "TARGET_MMA" - "@ - pmdm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5 - pm %A0,%x1,%x2,%3,%4,%5" + " %A0,%x1,%x2,%3,%4,%5" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - -(define_insn "mma_pm" - [(set (match_operand:XO 0 "accumulator_operand" "=wD,&d,&d") - (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0,0") - (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa") - (match_operand:SI 4 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 5 "const_0_to_15_operand" "n,n,n") - (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")] + (set_attr "prefixed" "yes")]) + +(define_insn "mma_" + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4I4))] "TARGET_MMA" - "@ - pmdm %A0,%x2,%x3,%4,%5,%6 - pm %A0,%x2,%x3,%4,%5,%6 - pm %A0,%x2,%x3,%4,%5,%6" + " %A0,%x2,%x3,%4,%5,%6" [(set_attr "type" "mma") - (set_attr "prefixed" "yes") - (set_attr "isa" "dm,not_dm,not_dm")]) - - -;; TDOmode (i.e. __dmr). -(define_expand "movtdo" - [(set (match_operand:TDO 0 "nonimmediate_operand") - (match_operand:TDO 1 "input_operand"))] - "TARGET_DENSE_MATH" -{ - rs6000_emit_move (operands[0], operands[1], TDOmode); - DONE; -}) - -(define_insn_and_split "*movtdo" - [(set (match_operand:TDO 0 "nonimmediate_operand" "=wa,m,wa,wD,wD,wa") - (match_operand:TDO 1 "input_operand" "m,wa,wa,wa,wD,wD"))] - "TARGET_DENSE_MATH - && (gpc_reg_operand (operands[0], TDOmode) - || gpc_reg_operand (operands[1], TDOmode))" - "@ - # - # - # - # - dmmr %0,%1 - #" - "&& reload_completed - && (!dmr_operand (operands[0], TDOmode) || !dmr_operand (operands[1], TDOmode))" - [(const_int 0)] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - if (REG_P (op0) && REG_P (op1)) - { - int regno0 = REGNO (op0); - int regno1 = REGNO (op1); - - if (DMR_REGNO_P (regno0) && VSX_REGNO_P (regno1)) - { - rtx op1_upper = gen_rtx_REG (XOmode, regno1); - rtx op1_lower = gen_rtx_REG (XOmode, regno1 + 4); - emit_insn (gen_movtdo_insert512_upper (op0, op1_upper)); - emit_insn (gen_movtdo_insert512_lower (op0, op0, op1_lower)); - DONE; - } - - else if (VSX_REGNO_P (regno0) && DMR_REGNO_P (regno1)) - { - rtx op0_upper = gen_rtx_REG (XOmode, regno0); - rtx op0_lower = gen_rtx_REG (XOmode, regno0 + 4); - emit_insn (gen_movtdo_extract512 (op0_upper, op1, const0_rtx)); - emit_insn (gen_movtdo_extract512 (op0_lower, op1, const1_rtx)); - DONE; - } - - else - gcc_assert (VSX_REGNO_P (regno0) && VSX_REGNO_P (regno1)); - } - - rs6000_split_multireg_move (operands[0], operands[1]); - DONE; -} - [(set_attr "type" "vecload,vecstore,vecmove,vecmove,vecmove,vecmove") - (set_attr "length" "*,*,32,8,*,8") - (set_attr "max_prefixed_insns" "4,4,*,*,*,*")]) - -;; Move from VSX registers to DMR registers via two insert 512 bit -;; instructions. -(define_insn "movtdo_insert512_upper" - [(set (match_operand:TDO 0 "dmr_operand" "=wD") - (unspec:TDO [(match_operand:XO 1 "vsx_register_operand" "wa")] - UNSPEC_DM_INSERT512_UPPER))] - "TARGET_DENSE_MATH" - "dmxxinstdmr512 %0,%1,%Y1,0" - [(set_attr "type" "mma")]) - -(define_insn "movtdo_insert512_lower" - [(set (match_operand:TDO 0 "dmr_operand" "=wD") - (unspec:TDO [(match_operand:TDO 1 "dmr_operand" "0") - (match_operand:XO 2 "vsx_register_operand" "wa")] - UNSPEC_DM_INSERT512_LOWER))] - "TARGET_DENSE_MATH" - "dmxxinstdmr512 %0,%2,%Y2,1" - [(set_attr "type" "mma")]) - -;; Move from DMR registers to VSX registers via two extract 512 bit -;; instructions. -(define_insn "movtdo_extract512" - [(set (match_operand:XO 0 "vsx_register_operand" "=wa") - (unspec:XO [(match_operand:TDO 1 "dmr_operand" "wD") - (match_operand 2 "const_0_to_1_operand" "n")] - UNSPEC_DM_EXTRACT512))] - "TARGET_DENSE_MATH" - "dmxxextfdmr512 %0,%Y0,%1,%2" - [(set_attr "type" "mma")]) - -;; Reload DMR registers from memory -(define_insn_and_split "reload_dmr_from_memory" - [(set (match_operand:TDO 0 "dmr_operand" "=wD") - (unspec:TDO [(match_operand:TDO 1 "memory_operand" "m")] - UNSPEC_DMR_RELOAD_FROM_MEMORY)) - (clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))] - "TARGET_DENSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx dest = operands[0]; - rtx src = operands[1]; - rtx tmp = operands[2]; - rtx mem_upper = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 0 : 64); - rtx mem_lower = adjust_address (src, XOmode, BYTES_BIG_ENDIAN ? 64 : 0); - - emit_move_insn (tmp, mem_upper); - emit_insn (gen_movtdo_insert512_upper (dest, tmp)); - - emit_move_insn (tmp, mem_lower); - emit_insn (gen_movtdo_insert512_lower (dest, dest, tmp)); - DONE; -} - [(set_attr "length" "16") - (set_attr "max_prefixed_insns" "2") - (set_attr "type" "vecload")]) - -;; Reload dense math registers to memory -(define_insn_and_split "reload_dmr_to_memory" - [(set (match_operand:TDO 0 "memory_operand" "=m") - (unspec:TDO [(match_operand:TDO 1 "dmr_operand" "wD")] - UNSPEC_DMR_RELOAD_TO_MEMORY)) - (clobber (match_operand:XO 2 "vsx_register_operand" "=wa"))] - "TARGET_DENSE_MATH" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rtx dest = operands[0]; - rtx src = operands[1]; - rtx tmp = operands[2]; - rtx mem_upper = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 0 : 64); - rtx mem_lower = adjust_address (dest, XOmode, BYTES_BIG_ENDIAN ? 64 : 0); - - emit_insn (gen_movtdo_extract512 (tmp, src, const0_rtx)); - emit_move_insn (mem_upper, tmp); - - emit_insn (gen_movtdo_extract512 (tmp, src, const1_rtx)); - emit_move_insn (mem_lower, tmp); - DONE; -} - [(set_attr "length" "16") - (set_attr "max_prefixed_insns" "2")]) + (set_attr "prefixed" "yes")]) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 3040dcd50a3..d23ce9a77a3 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -186,38 +186,6 @@ return VLOGICAL_REGNO_P (REGNO (op)); }) -;; Return 1 if op is a DMR register -(define_predicate "dmr_operand" - (match_operand 0 "register_operand") -{ - if (!REG_P (op)) - return 0; - - if (!HARD_REGISTER_P (op)) - return 1; - - return DMR_REGNO_P (REGNO (op)); -}) - -;; Return 1 if op is an accumulator. On power10 systems, the accumulators -;; overlap with the FPRs, while on systems with dense math, the accumulators -;; are separate dense math registers and do not overlap with the FPR -;; registers.. -(define_predicate "accumulator_operand" - (match_operand 0 "register_operand") -{ - if (!REG_P (op)) - return 0; - - if (!HARD_REGISTER_P (op)) - return 1; - - int r = REGNO (op); - return (TARGET_DENSE_MATH - ? DMR_REGNO_P (r) - : FP_REGNO_P (r) && (r & 3) == 0); -}) - ;; Return 1 if op is the carry register. (define_predicate "ca_operand" (match_operand 0 "register_operand") diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 6f102ecc503..f3ba1eccdbd 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -495,8 +495,6 @@ const char *rs6000_type_string (tree type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) return "__vector_quad"; - else if (type_node == dmr_type_node) - return "__dmr"; return "unknown"; } @@ -783,21 +781,6 @@ rs6000_init_builtins (void) t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST); ptr_vector_quad_type_node = build_pointer_type (t); - /* For TDOmode (1,024 bit dense math accumulators), don't use an alignment of - 1,024, use 512. TDOmode loads and stores are always broken up into 2 - vector pair loads or stores. In addition, we don't have support for - aligning the stack to 1,024 bits. */ - dmr_type_node = make_node (OPAQUE_TYPE); - SET_TYPE_MODE (dmr_type_node, TDOmode); - TYPE_SIZE (dmr_type_node) = bitsize_int (GET_MODE_BITSIZE (TDOmode)); - TYPE_PRECISION (dmr_type_node) = GET_MODE_BITSIZE (TDOmode); - TYPE_SIZE_UNIT (dmr_type_node) = size_int (GET_MODE_SIZE (TDOmode)); - SET_TYPE_ALIGN (dmr_type_node, 512); - TYPE_USER_ALIGN (dmr_type_node) = 0; - lang_hooks.types.register_builtin_type (dmr_type_node, "__dmr"); - t = build_qualified_type (dmr_type_node, TYPE_QUAL_CONST); - ptr_dmr_type_node = build_pointer_type (t); - tdecl = add_builtin_type ("__bool char", bool_char_type_node); TYPE_NAME (bool_char_type_node) = tdecl; diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index ef65ed8c259..d15bb85743c 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -602,9 +602,6 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags) /* Tell the user if we support the MMA instructions. */ if ((flags & OPTION_MASK_MMA) != 0) rs6000_define_or_undefine_macro (define_p, "__MMA__"); - /* Tell the user if we support the dense math instructions. */ - if ((flags & DENSE_MATH_FLAGS) == DENSE_MATH_FLAGS) - rs6000_define_or_undefine_macro (define_p, "__PPC_DMR__"); /* Whether pc-relative code is being generated. */ if ((flags & OPTION_MASK_PCREL) != 0) rs6000_define_or_undefine_macro (define_p, "__PCREL__"); diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 5cda8375902..8c590903c86 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -437,15 +437,14 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) if (cfun && !cfun->machine->mma_return_type_error && TREE_TYPE (cfun->decl) == fntype - && OPAQUE_MODE_P (TYPE_MODE (type))) + && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode)) { /* Record we have now handled function CFUN, so the next time we are called, we do not re-report the same error. */ cfun->machine->mma_return_type_error = true; if (TYPE_CANONICAL (type) != NULL_TREE) type = TYPE_CANONICAL (type); - error ("invalid use of %s type %qs as a function return value", - (TYPE_MODE (type) == TDOmode) ? "dense math" : "MMA", + error ("invalid use of MMA type %qs as a function return value", IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)))); } @@ -1633,12 +1632,11 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) int n_elts; /* We do not allow MMA types being used as function arguments. */ - if (OPAQUE_MODE_P (mode)) + if (mode == OOmode || mode == XOmode) { if (TYPE_CANONICAL (type) != NULL_TREE) type = TYPE_CANONICAL (type); - error ("invalid use of %s operand of type %qs as a function parameter", - (mode == TDOmode) ? "dense math" : "MMA", + error ("invalid use of MMA operand of type %qs as a function parameter", IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)))); return NULL_RTX; } diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 8da1d560e49..77170915615 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -92,7 +92,6 @@ /* Flags for a potential future processor that may or may not be made. */ #define ISA_FUTURE_MASKS_SERVER (ISA_POWER11_MASKS_SERVER \ - | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ | OPTION_MASK_FUTURE) /* Flags that need to be turned off if -mno-vsx. */ @@ -124,7 +123,6 @@ /* Mask of all options to set the default isa flags based on -mcpu=. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ - | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DFP \ diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 43d839bf30c..094b246c834 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -86,7 +86,3 @@ PARTIAL_INT_MODE (TI, 128, PTI); /* Modes used by __vector_pair and __vector_quad. */ OPAQUE_MODE (OO, 32); OPAQUE_MODE (XO, 64); - -/* Mode used by __dmr. */ -OPAQUE_MODE (TDO, 128); - diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index aba2a6d7ba7..b1ad49d3734 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -292,8 +292,7 @@ enum rs6000_reg_type { ALTIVEC_REG_TYPE, FPR_REG_TYPE, SPR_REG_TYPE, - CR_REG_TYPE, - DMR_REG_TYPE + CR_REG_TYPE }; /* Map register class to register type. */ @@ -307,23 +306,22 @@ static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; /* Register classes we care about in secondary reload or go if legitimate - address. We only need to worry about GPR, FPR, Altivec, and DMR registers - here, along an ANY field that is the OR of the 4 register classes. */ + address. We only need to worry about GPR, FPR, and Altivec registers here, + along an ANY field that is the OR of the 3 register classes. */ enum rs6000_reload_reg_type { RELOAD_REG_GPR, /* General purpose registers. */ RELOAD_REG_FPR, /* Traditional floating point regs. */ RELOAD_REG_VMX, /* Altivec (VMX) registers. */ - RELOAD_REG_DMR, /* DMR registers. */ - RELOAD_REG_ANY, /* OR of GPR/FPR/VMX/DMR masks. */ + RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ N_RELOAD_REG }; -/* For setting up register classes, loop through the 4 register classes mapping +/* For setting up register classes, loop through the 3 register classes mapping into real registers, and skip the ANY class, which is just an OR of the bits. */ #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR -#define LAST_RELOAD_REG_CLASS RELOAD_REG_DMR +#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX /* Map reload register type to a register in the register class. */ struct reload_reg_map_type { @@ -335,7 +333,6 @@ static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ - { "DMR", FIRST_DMR_REGNO }, /* RELOAD_REG_DMR. */ { "Any", -1 }, /* RELOAD_REG_ANY. */ }; @@ -1229,8 +1226,6 @@ char rs6000_reg_names[][8] = "0", "1", "2", "3", "4", "5", "6", "7", /* vrsave vscr sfp */ "vrsave", "vscr", "sfp", - /* DMRs */ - "0", "1", "2", "3", "4", "5", "6", "7", }; #ifdef TARGET_REGNAMES @@ -1257,8 +1252,6 @@ static const char alt_reg_names[][8] = "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", /* vrsave vscr sfp */ "vrsave", "vscr", "sfp", - /* DMRs */ - "%dmr0", "%dmr1", "%dmr2", "%dmr3", "%dmr4", "%dmr5", "%dmr6", "%dmr7", }; #endif @@ -1836,17 +1829,13 @@ rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) 128-bit floating point that can go in vector registers, which has VSX memory addressing. */ if (FP_REGNO_P (regno)) - reg_size = (VECTOR_MEM_VSX_P (mode) - || VECTOR_ALIGNMENT_P (mode) + reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode) ? UNITS_PER_VSX_WORD : UNITS_PER_FP_WORD); else if (ALTIVEC_REGNO_P (regno)) reg_size = UNITS_PER_ALTIVEC_WORD; - else if (DMR_REGNO_P (regno)) - reg_size = UNITS_PER_DMR_WORD; - else reg_size = UNITS_PER_WORD; @@ -1868,56 +1857,9 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (mode == OOmode) return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0); - /* On ISA 3.1 (power10), MMA accumulator modes need FPR registers divisible - by 4. - - If dense math is enabled, allow all VSX registers plus the dense math - registers. We need to make sure we don't cross between the boundary of - FPRs and traditional Altiviec registers. */ + /* MMA accumulator modes need FPR registers divisible by 4. */ if (mode == XOmode) - { - if (TARGET_MMA && !TARGET_DENSE_MATH) - return (FP_REGNO_P (regno) && (regno & 3) == 0); - - else if (TARGET_DENSE_MATH) - { - if (DMR_REGNO_P (regno)) - return 1; - - if (FP_REGNO_P (regno)) - return ((regno & 1) == 0 && regno <= LAST_FPR_REGNO - 3); - - if (ALTIVEC_REGNO_P (regno)) - return ((regno & 1) == 0 && regno <= LAST_ALTIVEC_REGNO - 3); - } - - else - return 0; - } - - /* Dense math register modes need DMR registers or VSX registers divisible by - 2. We need to make sure we don't cross between the boundary of FPRs and - traditional Altiviec registers. */ - if (mode == TDOmode) - { - if (!TARGET_DENSE_MATH) - return 0; - - if (DMR_REGNO_P (regno)) - return 1; - - if (FP_REGNO_P (regno)) - return ((regno & 1) == 0 && regno <= LAST_FPR_REGNO - 7); - - if (ALTIVEC_REGNO_P (regno)) - return ((regno & 1) == 0 && regno <= LAST_ALTIVEC_REGNO - 7); - - return 0; - } - - /* No other types other than XOmode or TDOmode can go in DMRs. */ - if (DMR_REGNO_P (regno)) - return 0; + return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0); /* PTImode can only go in GPRs. Quad word memory operations require even/odd register combinations, and use PTImode where we need to deal with quad @@ -2023,11 +1965,9 @@ rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) GPR registers, and TImode can go in any GPR as well as VSX registers (PR 57744). - Similarly, don't allow OOmode (vector pair), XOmode (vector quad), or - TDOmode (dmr register) to pair with anything else. Vector pairs are - restricted to even/odd VSX registers. Without dense math, vector quads are - limited to FPR registers divisible by 4. With dense math, vector quads are - limited to even VSX registers or DMR registers. + Similarly, don't allow OOmode (vector pair, restricted to even VSX + registers) or XOmode (vector quad, restricted to FPR registers divisible + by 4) to tie with other modes. Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE 128-bit floating point on VSX systems ties with other vectors. */ @@ -2036,8 +1976,7 @@ static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode1 == TDOmode || mode2 == PTImode || mode2 == OOmode - || mode2 == XOmode || mode2 == TDOmode) + || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2328,7 +2267,6 @@ rs6000_debug_reg_global (void) V4DFmode, OOmode, XOmode, - TDOmode, CCmode, CCUNSmode, CCEQmode, @@ -2364,7 +2302,6 @@ rs6000_debug_reg_global (void) rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO, "vs"); - rs6000_debug_reg_print (FIRST_DMR_REGNO, LAST_DMR_REGNO, "dmr"); rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); @@ -2385,7 +2322,6 @@ rs6000_debug_reg_global (void) "wr reg_class = %s\n" "wx reg_class = %s\n" "wA reg_class = %s\n" - "wD reg_class = %s\n" "\n", reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], @@ -2393,8 +2329,7 @@ rs6000_debug_reg_global (void) reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], - reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], - reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wD]]); + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]); nl = "\n"; for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -2691,21 +2626,6 @@ rs6000_setup_reg_addr_masks (void) addr_mask = 0; reg = reload_reg_map[rc].reg; - /* Special case DMR registers. */ - if (rc == RELOAD_REG_DMR) - { - if (TARGET_DENSE_MATH && (m2 == XOmode || m2 == TDOmode)) - { - addr_mask = RELOAD_REG_VALID; - reg_addr[m].addr_mask[rc] = addr_mask; - any_addr_mask |= addr_mask; - } - else - reg_addr[m].addr_mask[rc] = 0; - - continue; - } - /* Can mode values go in the GPR/FPR/Altivec registers? */ if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) { @@ -2801,10 +2721,10 @@ rs6000_setup_reg_addr_masks (void) /* Vector pairs can do both indexed and offset loads if the instructions are enabled, otherwise they can only do offset loads - since it will be broken into two vector moves. Vector quads and - DMR registers can only do offset loads. */ + since it will be broken into two vector moves. Vector quads can + only do offset loads. */ else if ((addr_mask != 0) && TARGET_MMA - && (m2 == OOmode || m2 == XOmode || m2 == TDOmode)) + && (m2 == OOmode || m2 == XOmode)) { addr_mask |= RELOAD_REG_OFFSET; if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) @@ -2856,9 +2776,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) for (r = CR1_REGNO; r <= CR7_REGNO; ++r) rs6000_regno_regclass[r] = CR_REGS; - for (r = FIRST_DMR_REGNO; r <= LAST_DMR_REGNO; ++r) - rs6000_regno_regclass[r] = DM_REGS; - rs6000_regno_regclass[LR_REGNO] = LINK_REGS; rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; rs6000_regno_regclass[CA_REGNO] = NO_REGS; @@ -2883,7 +2800,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; - reg_class_to_reg_type[(int)DM_REGS] = DMR_REG_TYPE; if (TARGET_VSX) { @@ -3032,14 +2948,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[XOmode] = 512; } - /* Add support for 1,024 bit DMR registers. */ - if (TARGET_DENSE_MATH) - { - rs6000_vector_unit[TDOmode] = VECTOR_NONE; - rs6000_vector_mem[TDOmode] = VECTOR_VSX; - rs6000_vector_align[TDOmode] = 512; - } - /* Register class constraints for the constraints that depend on compile switches. When the VSX code was added, different constraints were added based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all @@ -3078,13 +2986,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_DIRECT_MOVE_128) rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; - /* Support for the accumulator registers, either FPR registers (aka original - mma) or DMR registers (dense math). */ - if (TARGET_DENSE_MATH) - rs6000_constraints[RS6000_CONSTRAINT_wD] = DM_REGS; - else if (TARGET_MMA) - rs6000_constraints[RS6000_CONSTRAINT_wD] = FLOAT_REGS; - /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { @@ -3253,12 +3154,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) } } - if (TARGET_DENSE_MATH) - { - reg_addr[TDOmode].reload_load = CODE_FOR_reload_dmr_from_memory; - reg_addr[TDOmode].reload_store = CODE_FOR_reload_dmr_to_memory; - } - /* Precalculate HARD_REGNO_NREGS. */ for (r = 0; HARD_REGISTER_NUM_P (r); ++r) for (m = 0; m < NUM_MACHINE_MODES; ++m) @@ -8734,15 +8629,12 @@ reg_offset_addressing_ok_p (machine_mode mode) return mode_supports_dq_form (mode); break; - /* The vector pair/quad types and the dense math types support offset - addressing if the underlying vectors support offset addressing. */ + /* The vector pair/quad types support offset addressing if the + underlying vectors support offset addressing. */ case E_OOmode: case E_XOmode: return TARGET_MMA; - case E_TDOmode: - return TARGET_DENSE_MATH; - case E_SDmode: /* If we can do direct load/stores of SDmode, restrict it to reg+reg addressing for the LFIWZX and STFIWX instructions. */ @@ -11291,12 +11183,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode) (mode == OOmode) ? "__vector_pair" : "__vector_quad"); break; - case E_TDOmode: - if (CONST_INT_P (operands[1])) - error ("%qs is an opaque type, and you cannot set it to constants", - "__dmr"); - break; - case E_SImode: case E_DImode: /* Use default pattern for address of ELF small data */ @@ -12425,11 +12311,6 @@ rs6000_secondary_reload_memory (rtx addr, addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & ~RELOAD_REG_AND_M16); - /* DMR registers use VSX registers, and need to generate some extra - instructions. */ - else if (rclass == DM_REGS) - return 2; - /* If the register allocator hasn't made up its mind yet on the register class to use, settle on defaults to use. */ else if (rclass == NO_REGS) @@ -12758,13 +12639,6 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) return true; - /* We can transfer between VSX registers and DMR registers without needing - extra registers. */ - if (TARGET_DENSE_MATH && (mode == XOmode || mode == TDOmode) - && ((to_type == DMR_REG_TYPE && from_type == VSX_REG_TYPE) - || (to_type == VSX_REG_TYPE && from_type == DMR_REG_TYPE))) - return true; - return false; } @@ -13459,10 +13333,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) machine_mode mode = GET_MODE (x); bool is_constant = CONSTANT_P (x); - /* DMR registers can't be loaded or stored. */ - if (rclass == DM_REGS) - return NO_REGS; - /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred reload class for it. */ if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) @@ -13559,10 +13429,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) return VSX_REGS; if (mode == XOmode) - return TARGET_DENSE_MATH ? VSX_REGS : FLOAT_REGS; - - if (mode == TDOmode) - return VSX_REGS; + return FLOAT_REGS; if (GET_MODE_CLASS (mode) == MODE_INT) return GENERAL_REGS; @@ -13687,12 +13554,6 @@ rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, else regno = -1; - /* Dense math registers don't have loads or stores. We have to go through - the VSX registers to load XOmode (vector quad) and TDOmode (dmr 1024 - bit). */ - if (TARGET_DENSE_MATH && rclass == DM_REGS) - return VSX_REGS; - /* If we have VSX register moves, prefer moving scalar values between Altivec registers and GPR by going via an FPR (and then via memory) instead of reloading the secondary memory address for Altivec moves. */ @@ -14206,19 +14067,8 @@ print_operand (FILE *file, rtx x, int code) output_operand. */ case 'A': - /* Write the MMA accumulator number associated with VSX register X. On - dense math systems, only allow DMR accumulators, not accumulators - overlapping with the FPR registers. */ - if (!REG_P (x)) - output_operand_lossage ("invalid %%A value"); - else if (TARGET_DENSE_MATH) - { - if (DMR_REGNO_P (REGNO (x))) - fprintf (file, "%d", REGNO (x) - FIRST_DMR_REGNO); - else - output_operand_lossage ("%%A operand is not a DMR"); - } - else if (!FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0) + /* Write the MMA accumulator number associated with VSX register X. */ + if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0) output_operand_lossage ("invalid %%A value"); else fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4); @@ -20781,8 +20631,6 @@ rs6000_mangle_type (const_tree type) return "u13__vector_pair"; if (type == vector_quad_type_node) return "u13__vector_quad"; - if (type == dmr_type_node) - return "u5__dmr"; /* For all other types, use the default mangling. */ return NULL; @@ -22890,35 +22738,6 @@ rs6000_debug_address_cost (rtx x, machine_mode mode, } -/* Subroutine to determine the move cost of dense math registers. If we are - moving to/from VSX_REGISTER registers, the cost is either 1 move (for - 512-bit accumulators) or 2 moves (for 1,024 dmr registers). If we are - moving to anything else like GPR registers, make the cost very high. */ - -static int -rs6000_dmr_register_move_cost (machine_mode mode, reg_class_t rclass) -{ - const int reg_move_base = 2; - HARD_REG_SET vsx_set = (reg_class_contents[rclass] - & reg_class_contents[VSX_REGS]); - - if (TARGET_DENSE_MATH && !hard_reg_set_empty_p (vsx_set)) - { - /* __vector_quad (i.e. XOmode) is tranfered in 1 instruction. */ - if (mode == XOmode) - return reg_move_base; - - /* __dmr (i.e. TDOmode) is transferred in 2 instructions. */ - else if (mode == TDOmode) - return reg_move_base * 2; - - else - return reg_move_base * 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode); - } - - return 1000 * 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode); -} - /* A C expression returning the cost of moving data from a register of class CLASS1 to one of CLASS2. */ @@ -22932,28 +22751,17 @@ rs6000_register_move_cost (machine_mode mode, if (TARGET_DEBUG_COST) dbg_cost_ctrl++; - HARD_REG_SET to_vsx, from_vsx; - to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS]; - from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS]; - - /* Special case DMR registers, that can only move to/from VSX registers. */ - if (from == DM_REGS && to == DM_REGS) - ret = 2 * hard_regno_nregs (FIRST_DMR_REGNO, mode); - - else if (from == DM_REGS) - ret = rs6000_dmr_register_move_cost (mode, to); - - else if (to == DM_REGS) - ret = rs6000_dmr_register_move_cost (mode, from); - /* If we have VSX, we can easily move between FPR or Altivec registers, otherwise we can only easily move within classes. Do this first so we give best-case answers for union classes containing both gprs and vsx regs. */ - else if (!hard_reg_set_empty_p (to_vsx) - && !hard_reg_set_empty_p (from_vsx) - && (TARGET_VSX - || hard_reg_set_intersect_p (to_vsx, from_vsx))) + HARD_REG_SET to_vsx, from_vsx; + to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS]; + from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS]; + if (!hard_reg_set_empty_p (to_vsx) + && !hard_reg_set_empty_p (from_vsx) + && (TARGET_VSX + || hard_reg_set_intersect_p (to_vsx, from_vsx))) { int reg = FIRST_FPR_REGNO; if (TARGET_VSX @@ -23048,9 +22856,6 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, ret = 4 * hard_regno_nregs (32, mode); else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode); - else if (reg_classes_intersect_p (rclass, DM_REGS)) - ret = (rs6000_dmr_register_move_cost (mode, VSX_REGS) - + rs6000_memory_move_cost (mode, VSX_REGS, false)); else ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); @@ -24259,8 +24064,6 @@ rs6000_compute_pressure_classes (enum reg_class *pressure_classes) if (TARGET_HARD_FLOAT) pressure_classes[n++] = FLOAT_REGS; } - if (TARGET_DENSE_MATH) - pressure_classes[n++] = DM_REGS; pressure_classes[n++] = CR_REGS; pressure_classes[n++] = SPECIAL_REGS; @@ -24425,10 +24228,6 @@ rs6000_debugger_regno (unsigned int regno, unsigned int format) return 67; if (regno == 64) return 64; - /* XXX: This is a guess. The GCC register number for FIRST_DMR_REGNO is 111, - but the frame pointer regnum uses that. */ - if (DMR_REGNO_P (regno)) - return regno - FIRST_DMR_REGNO + 112; gcc_unreachable (); } @@ -27599,10 +27398,9 @@ rs6000_split_multireg_move (rtx dst, rtx src) mode = GET_MODE (dst); nregs = hard_regno_nregs (reg, mode); - /* If we have a vector quad register for MMA or DMR register for dense math, - and this is a load or store, see if we can use vector paired - load/stores. */ - if ((mode == XOmode || mode == TDOmode) && TARGET_MMA + /* If we have a vector quad register for MMA, and this is a load or store, + see if we can use vector paired load/stores. */ + if (mode == XOmode && TARGET_MMA && (MEM_P (dst) || MEM_P (src))) { reg_mode = OOmode; @@ -27610,7 +27408,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) } /* If we have a vector pair/quad mode, split it into two/four separate vectors. */ - else if (mode == OOmode || mode == XOmode || mode == TDOmode) + else if (mode == OOmode || mode == XOmode) reg_mode = V1TImode; else if (FP_REGNO_P (reg)) reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : @@ -27656,13 +27454,13 @@ rs6000_split_multireg_move (rtx dst, rtx src) return; } - /* The __vector_pair, __vector_quad, and __dmr modes are multi-register - modes, so if we have to load or store the registers, we have to be careful - to properly swap them if we're in little endian mode below. This means - the last register gets the first memory location. We also need to be - careful of using the right register numbers if we are splitting XO to - OO. */ - if (mode == OOmode || mode == XOmode || mode == TDOmode) + /* The __vector_pair and __vector_quad modes are multi-register + modes, so if we have to load or store the registers, we have to be + careful to properly swap them if we're in little endian mode + below. This means the last register gets the first memory + location. We also need to be careful of using the right register + numbers if we are splitting XO to OO. */ + if (mode == OOmode || mode == XOmode) { nregs = hard_regno_nregs (reg, mode); int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); @@ -27673,7 +27471,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) /* If we are reading an accumulator register, we have to deprime it before we can access it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH + if (TARGET_MMA && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) emit_insn (gen_mma_xxmfacc (src, src)); @@ -27705,9 +27503,9 @@ rs6000_split_multireg_move (rtx dst, rtx src) emit_insn (gen_rtx_SET (dst2, src2)); } - /* If we are writing an accumulator register that overlaps with the - FPR registers, we have to prime it after we've written it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) emit_insn (gen_mma_xxmtacc (dst, dst)); @@ -27721,9 +27519,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE); gcc_assert (REG_P (dst)); if (GET_MODE (src) == XOmode) - gcc_assert ((TARGET_DENSE_MATH - ? VSX_REGNO_P (REGNO (dst)) - : FP_REGNO_P (REGNO (dst)))); + gcc_assert (FP_REGNO_P (REGNO (dst))); if (GET_MODE (src) == OOmode) gcc_assert (VSX_REGNO_P (REGNO (dst))); @@ -27776,9 +27572,9 @@ rs6000_split_multireg_move (rtx dst, rtx src) emit_insn (gen_rtx_SET (dst_i, op)); } - /* On systems without dense math where accumulators overlap with the - vector registers, we have to prime it after we've written it. */ - if (GET_MODE (src) == XOmode && !TARGET_DENSE_MATH) + /* We are writing an accumulator register, so we have to + prime it after we've written it. */ + if (GET_MODE (src) == XOmode) emit_insn (gen_mma_xxmtacc (dst, dst)); return; @@ -27789,9 +27585,9 @@ rs6000_split_multireg_move (rtx dst, rtx src) if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) { - /* If we are reading an accumulator register and we don't have dense - math, we have to deprime it before we can access it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) emit_insn (gen_mma_xxmfacc (src, src)); @@ -27799,7 +27595,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) overlap. */ int i; /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode || mode == TDOmode) + if (mode == OOmode || mode == XOmode ) { for (i = nregs - 1; i >= 0; i--) { @@ -27819,7 +27615,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) /* If we are writing an accumulator register, we have to prime it after we've written it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH + if (TARGET_MMA && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) emit_insn (gen_mma_xxmtacc (dst, dst)); } @@ -27956,7 +27752,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) /* If we are reading an accumulator register, we have to deprime it before we can access it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (src) + if (TARGET_MMA && REG_P (src) && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) emit_insn (gen_mma_xxmfacc (src, src)); @@ -27973,7 +27769,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) continue; /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode || mode == TDOmode) + if (mode == OOmode || mode == XOmode ) { rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); @@ -27988,7 +27784,7 @@ rs6000_split_multireg_move (rtx dst, rtx src) /* If we are writing an accumulator register, we have to prime it after we've written it. */ - if (TARGET_MMA && !TARGET_DENSE_MATH && REG_P (dst) + if (TARGET_MMA && REG_P (dst) && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) emit_insn (gen_mma_xxmtacc (dst, dst)); @@ -28955,8 +28751,7 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype) if (frommode != tomode) { - /* Do not allow conversions to/from XOmode, OOmode, and TDOmode - types. */ + /* Do not allow conversions to/from XOmode and OOmode types. */ if (frommode == XOmode) return N_("invalid conversion from type %<__vector_quad%>"); if (tomode == XOmode) @@ -28965,10 +28760,6 @@ rs6000_invalid_conversion (const_tree fromtype, const_tree totype) return N_("invalid conversion from type %<__vector_pair%>"); if (tomode == OOmode) return N_("invalid conversion to type %<__vector_pair%>"); - if (frommode == TDOmode) - return N_("invalid conversion from type %<__dmr%>"); - if (tomode == TDOmode) - return N_("invalid conversion to type %<__dmr%>"); } /* Conversion allowed. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index c533a5d6fe4..79ce1a8cbf1 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -562,13 +562,6 @@ extern int rs6000_vector_align[]; && TARGET_P8_VECTOR \ && TARGET_POWERPC64) -/* Whether we have dense math support. */ -#define TARGET_DENSE_MATH (TARGET_MMA && TARGET_FUTURE) -#define TARGET_MMA_NO_DENSE_MATH (TARGET_MMA && !TARGET_FUTURE) - -/* Dense math flags. */ -#define DENSE_MATH_FLAGS (OPTION_MASK_FUTURE & OPTION_MASK_MMA) - /* Inlining allows targets to define the meanings of bits in target_info field of ipa_fn_summary by itself, the used bits for rs6000 are listed below. */ @@ -666,7 +659,6 @@ extern unsigned char rs6000_recip_bits[]; #define UNITS_PER_FP_WORD 8 #define UNITS_PER_ALTIVEC_WORD 16 #define UNITS_PER_VSX_WORD 16 -#define UNITS_PER_DMR_WORD 128 /* Type used for ptrdiff_t, as a string used in a declaration. */ #define PTRDIFF_TYPE "int" @@ -794,7 +786,7 @@ enum data_align { align_abi, align_opt, align_both }; Another pseudo (not included in DWARF_FRAME_REGISTERS) is soft frame pointer, which is eventually eliminated in favor of SP or FP. */ -#define FIRST_PSEUDO_REGISTER 119 +#define FIRST_PSEUDO_REGISTER 111 /* Use standard DWARF numbering for DWARF debugging information. */ #define DEBUGGER_REGNO(REGNO) rs6000_debugger_regno ((REGNO), 0) @@ -831,9 +823,7 @@ enum data_align { align_abi, align_opt, align_both }; /* cr0..cr7 */ \ 0, 0, 0, 0, 0, 0, 0, 0, \ /* vrsave vscr sfp */ \ - 1, 1, 1, \ - /* DMR registers. */ \ - 0, 0, 0, 0, 0, 0, 0, 0 \ + 1, 1, 1 \ } /* Like `CALL_USED_REGISTERS' except this macro doesn't require that @@ -857,9 +847,7 @@ enum data_align { align_abi, align_opt, align_both }; /* cr0..cr7 */ \ 1, 1, 0, 0, 0, 1, 1, 1, \ /* vrsave vscr sfp */ \ - 0, 0, 0, \ - /* DMR registers. */ \ - 0, 0, 0, 0, 0, 0, 0, 0 \ + 0, 0, 0 \ } #define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1) @@ -896,7 +884,6 @@ enum data_align { align_abi, align_opt, align_both }; v2 (not saved; incoming vector arg reg; return value) v19 - v14 (not saved or used for anything) v31 - v20 (saved; order given to save least number) - dmr0 - dmr7 (not saved) vrsave, vscr (fixed) sfp (fixed) */ @@ -939,9 +926,6 @@ enum data_align { align_abi, align_opt, align_both }; 66, \ 83, 82, 81, 80, 79, 78, \ 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, \ - /* DMR registers. */ \ - 111, 112, 113, 114, 115, 116, 117, 118, \ - /* Vrsave, vscr, sfp. */ \ 108, 109, \ 110 \ } @@ -968,9 +952,6 @@ enum data_align { align_abi, align_opt, align_both }; /* True if register is a VSX register. */ #define VSX_REGNO_P(N) (FP_REGNO_P (N) || ALTIVEC_REGNO_P (N)) -/* True if register is a DMR register. */ -#define DMR_REGNO_P(N) ((N) >= FIRST_DMR_REGNO && (N) <= LAST_DMR_REGNO) - /* Alternate name for any vector register supporting floating point, no matter which instruction set(s) are available. */ #define VFLOAT_REGNO_P(N) \ @@ -1010,7 +991,7 @@ enum data_align { align_abi, align_opt, align_both }; /* Modes that are not vectors, but require vector alignment. Treat these like vectors in terms of loads and stores. */ #define VECTOR_ALIGNMENT_P(MODE) \ - (FLOAT128_VECTOR_P (MODE) || OPAQUE_MODE_P (MODE)) + (FLOAT128_VECTOR_P (MODE) || (MODE) == OOmode || (MODE) == XOmode) #define ALTIVEC_VECTOR_MODE(MODE) \ ((MODE) == V16QImode \ @@ -1106,7 +1087,6 @@ enum reg_class FLOAT_REGS, ALTIVEC_REGS, VSX_REGS, - DM_REGS, VRSAVE_REGS, VSCR_REGS, GEN_OR_FLOAT_REGS, @@ -1136,7 +1116,6 @@ enum reg_class "FLOAT_REGS", \ "ALTIVEC_REGS", \ "VSX_REGS", \ - "DM_REGS", \ "VRSAVE_REGS", \ "VSCR_REGS", \ "GEN_OR_FLOAT_REGS", \ @@ -1171,8 +1150,6 @@ enum reg_class { 0x00000000, 0x00000000, 0xffffffff, 0x00000000 }, \ /* VSX_REGS. */ \ { 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 }, \ - /* DM_REGS. */ \ - { 0x00000000, 0x00000000, 0x00000000, 0x007f8000 }, \ /* VRSAVE_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00001000 }, \ /* VSCR_REGS. */ \ @@ -1200,7 +1177,7 @@ enum reg_class /* CA_REGS. */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000004 }, \ /* ALL_REGS. */ \ - { 0xffffffff, 0xffffffff, 0xffffffff, 0x007fffff } \ + { 0xffffffff, 0xffffffff, 0xffffffff, 0x00007fff } \ } /* The same information, inverted: @@ -1224,7 +1201,6 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */ RS6000_CONSTRAINT_wA, /* BASE_REGS if 64-bit. */ - RS6000_CONSTRAINT_wD, /* Accumulator regs if MMA/Dense Math. */ RS6000_CONSTRAINT_MAX }; @@ -2101,16 +2077,7 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */ &rs6000_reg_names[108][0], /* vrsave */ \ &rs6000_reg_names[109][0], /* vscr */ \ \ - &rs6000_reg_names[110][0], /* sfp */ \ - \ - &rs6000_reg_names[111][0], /* dmr0 */ \ - &rs6000_reg_names[112][0], /* dmr1 */ \ - &rs6000_reg_names[113][0], /* dmr2 */ \ - &rs6000_reg_names[114][0], /* dmr3 */ \ - &rs6000_reg_names[115][0], /* dmr4 */ \ - &rs6000_reg_names[116][0], /* dmr5 */ \ - &rs6000_reg_names[117][0], /* dmr6 */ \ - &rs6000_reg_names[118][0], /* dmr7 */ \ + &rs6000_reg_names[110][0] /* sfp */ \ } /* Table of additional register names to use in user input. */ @@ -2164,8 +2131,6 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */ {"vs52", 84}, {"vs53", 85}, {"vs54", 86}, {"vs55", 87}, \ {"vs56", 88}, {"vs57", 89}, {"vs58", 90}, {"vs59", 91}, \ {"vs60", 92}, {"vs61", 93}, {"vs62", 94}, {"vs63", 95}, \ - {"dmr0", 111}, {"dmr1", 112}, {"dmr2", 113}, {"dmr3", 114}, \ - {"dmr4", 115}, {"dmr5", 116}, {"dmr6", 117}, {"dmr7", 118}, \ } /* This is how to output an element of a case-vector that is relative. */ @@ -2299,7 +2264,6 @@ enum rs6000_builtin_type_index RS6000_BTI_const_str, /* pointer to const char * */ RS6000_BTI_vector_pair, /* unsigned 256-bit types (vector pair). */ RS6000_BTI_vector_quad, /* unsigned 512-bit types (vector quad). */ - RS6000_BTI_dmr, /* unsigned 1,024-bit types (dmr). */ RS6000_BTI_const_ptr_void, /* const pointer to void */ RS6000_BTI_ptr_V16QI, RS6000_BTI_ptr_V1TI, @@ -2338,7 +2302,6 @@ enum rs6000_builtin_type_index RS6000_BTI_ptr_dfloat128, RS6000_BTI_ptr_vector_pair, RS6000_BTI_ptr_vector_quad, - RS6000_BTI_ptr_dmr, RS6000_BTI_ptr_long_long, RS6000_BTI_ptr_long_long_unsigned, RS6000_BTI_MAX @@ -2396,7 +2359,6 @@ enum rs6000_builtin_type_index #define const_str_type_node (rs6000_builtin_types[RS6000_BTI_const_str]) #define vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_vector_pair]) #define vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_vector_quad]) -#define dmr_type_node (rs6000_builtin_types[RS6000_BTI_dmr]) #define pcvoid_type_node (rs6000_builtin_types[RS6000_BTI_const_ptr_void]) #define ptr_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V16QI]) #define ptr_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_ptr_V1TI]) @@ -2435,7 +2397,6 @@ enum rs6000_builtin_type_index #define ptr_dfloat128_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dfloat128]) #define ptr_vector_pair_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_pair]) #define ptr_vector_quad_type_node (rs6000_builtin_types[RS6000_BTI_ptr_vector_quad]) -#define ptr_dmr_type_node (rs6000_builtin_types[RS6000_BTI_ptr_dmr]) #define ptr_long_long_integer_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long]) #define ptr_long_long_unsigned_type_node (rs6000_builtin_types[RS6000_BTI_ptr_long_long_unsigned]) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 99e6515ba1d..bc8bc6ab060 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -51,8 +51,6 @@ (VRSAVE_REGNO 108) (VSCR_REGNO 109) (FRAME_POINTER_REGNUM 110) - (FIRST_DMR_REGNO 111) - (LAST_DMR_REGNO 118) ]) ;; @@ -357,7 +355,7 @@ (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) ;; The ISA we implement. -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10,dm,not_dm" +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10" (const_string "any")) ;; Is this alternative enabled for the current CPU/ISA/etc.? @@ -405,14 +403,6 @@ (and (eq_attr "isa" "p10") (match_test "TARGET_POWER10")) (const_int 1) - - (and (eq_attr "isa" "dm") - (match_test "TARGET_DENSE_MATH")) - (const_int 1) - - (and (eq_attr "isa" "not_dm") - (match_test "!TARGET_DENSE_MATH")) - (const_int 1) ] (const_int 0))) ;; If this instruction is microcoded on the CELL processor diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index facb281d8ef..7b7e6507754 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3441,13 +3441,6 @@ Like @code{d}, if @option{-mpowerpc-gfxopt} is used; otherwise, @code{NO_REGS}. @item wA Like @code{b}, if @option{-mpowerpc64} is used; otherwise, @code{NO_REGS}. -@item wD -Accumulator register if @option{-mma} is used; otherwise, -@code{NO_REGS}. If @option{-mdense-math} is used, the accumulator -register will be in the dense match register set. If -@option{-mno-dense-math} is used, the accumulator register will -overlap with the VSX vector registers 0..31. - @item wB Signed 5-bit constant integer that can be loaded into an Altivec register. diff --git a/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c b/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c deleted file mode 100644 index 0a9884ddf63..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/dm-1024bit.c +++ /dev/null @@ -1,63 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target powerpc_dense_math_ok } */ -/* { dg-options "-mdejagnu-cpu=future -O2" } */ - -/* Test basic load/store for __dmr type. */ - -#ifndef CONSTRAINT -#if defined(USE_D) -#define CONSTRAINT "d" - -#elif defined(USE_V) -#define CONSTRAINT "v" - -#elif defined(USE_WA) -#define CONSTRAINT "wa" - -#else -#define CONSTRAINT "wD" -#endif -#endif -const char constraint[] = CONSTRAINT; - -void foo_mem_asm (__dmr *p, __dmr *q) -{ - /* 2 LXVP instructions. */ - __dmr vq = *p; - - /* 2 DMXXINSTDMR512 instructions to transfer VSX to DMR. */ - __asm__ ("# foo (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq)); - /* 2 DMXXEXTFDMR512 instructions to transfer DMR to VSX. */ - - /* 2 STXVP instructions. */ - *q = vq; -} - -void foo_mem_asm2 (__dmr *p, __dmr *q) -{ - /* 2 LXVP instructions. */ - __dmr vq = *p; - __dmr vq2; - __dmr vq3; - - /* 2 DMXXINSTDMR512 instructions to transfer VSX to DMR. */ - __asm__ ("# foo1 (" CONSTRAINT ") %A0" : "+" CONSTRAINT (vq)); - /* 2 DMXXEXTFDMR512 instructions to transfer DMR to VSX. */ - - vq2 = vq; - __asm__ ("# foo2 (wa) %0" : "+wa" (vq2)); - - /* 2 STXVP instructions. */ - *q = vq2; -} - -void foo_mem (__dmr *p, __dmr *q) -{ - /* 2 LXVP, 2 STXVP instructions, no DMR transfer. */ - *q = *p; -} - -/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mdmxxinstdmr512\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ -/* { dg-final { scan-assembler-times {\mstxvp\M} 12 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c deleted file mode 100644 index 66c19779585..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c +++ /dev/null @@ -1,194 +0,0 @@ -/* Test derived from mma-double-1.c, modified for dense math. */ -/* { dg-do compile } */ -/* { dg-require-effective-target powerpc_dense_math_ok } */ -/* { dg-options "-mdejagnu-cpu=future -O2" } */ - -#include -#include -#include - -typedef unsigned char vec_t __attribute__ ((vector_size (16))); -typedef double v4sf_t __attribute__ ((vector_size (16))); -#define SAVE_ACC(ACC, ldc, J) \ - __builtin_mma_disassemble_acc (result, ACC); \ - rowC = (v4sf_t *) &CO[0*ldc+J]; \ - rowC[0] += result[0]; \ - rowC = (v4sf_t *) &CO[1*ldc+J]; \ - rowC[0] += result[1]; \ - rowC = (v4sf_t *) &CO[2*ldc+J]; \ - rowC[0] += result[2]; \ - rowC = (v4sf_t *) &CO[3*ldc+J]; \ - rowC[0] += result[3]; - -void -DM (int m, int n, int k, double *A, double *B, double *C) -{ - __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; - v4sf_t result[4]; - v4sf_t *rowC; - for (int l = 0; l < n; l += 4) - { - double *CO; - double *AO; - AO = A; - CO = C; - C += m * 4; - for (int j = 0; j < m; j += 16) - { - double *BO = B; - __builtin_mma_xxsetaccz (&acc0); - __builtin_mma_xxsetaccz (&acc1); - __builtin_mma_xxsetaccz (&acc2); - __builtin_mma_xxsetaccz (&acc3); - __builtin_mma_xxsetaccz (&acc4); - __builtin_mma_xxsetaccz (&acc5); - __builtin_mma_xxsetaccz (&acc6); - __builtin_mma_xxsetaccz (&acc7); - unsigned long i; - - for (i = 0; i < k; i++) - { - vec_t *rowA = (vec_t *) & AO[i * 16]; - __vector_pair rowB; - vec_t *rb = (vec_t *) & BO[i * 4]; - __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]); - __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]); - __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]); - __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]); - __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]); - __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]); - __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]); - __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]); - __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]); - } - SAVE_ACC (&acc0, m, 0); - SAVE_ACC (&acc2, m, 4); - SAVE_ACC (&acc1, m, 2); - SAVE_ACC (&acc3, m, 6); - SAVE_ACC (&acc4, m, 8); - SAVE_ACC (&acc6, m, 12); - SAVE_ACC (&acc5, m, 10); - SAVE_ACC (&acc7, m, 14); - AO += k * 16; - BO += k * 4; - CO += 16; - } - B += k * 4; - } -} - -void -init (double *matrix, int row, int column) -{ - for (int j = 0; j < column; j++) - { - for (int i = 0; i < row; i++) - { - matrix[j * row + i] = (i * 16 + 2 + j) / 0.123; - } - } -} - -void -init0 (double *matrix, double *matrix1, int row, int column) -{ - for (int j = 0; j < column; j++) - for (int i = 0; i < row; i++) - matrix[j * row + i] = matrix1[j * row + i] = 0; -} - - -void -print (const char *name, const double *matrix, int row, int column) -{ - printf ("Matrix %s has %d rows and %d columns:\n", name, row, column); - for (int i = 0; i < row; i++) - { - for (int j = 0; j < column; j++) - { - printf ("%f ", matrix[j * row + i]); - } - printf ("\n"); - } - printf ("\n"); -} - -int -main (int argc, char *argv[]) -{ - int rowsA, colsB, common; - int i, j, k; - int ret = 0; - - for (int t = 16; t <= 128; t += 16) - { - for (int t1 = 4; t1 <= 16; t1 += 4) - { - rowsA = t; - colsB = t1; - common = 1; - /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */ - double A[rowsA * common]; - double B[common * colsB]; - double C[rowsA * colsB]; - double D[rowsA * colsB]; - - - init (A, rowsA, common); - init (B, common, colsB); - init0 (C, D, rowsA, colsB); - DM (rowsA, colsB, common, A, B, C); - - for (i = 0; i < colsB; i++) - { - for (j = 0; j < rowsA; j++) - { - D[i * rowsA + j] = 0; - for (k = 0; k < common; k++) - { - D[i * rowsA + j] += - A[k * rowsA + j] * B[k + common * i]; - } - } - } - for (i = 0; i < colsB; i++) - { - for (j = 0; j < rowsA; j++) - { - for (k = 0; k < common; k++) - { - if (D[i * rowsA + j] != C[i * rowsA + j]) - { - printf ("Error %d,%d,%d\n",i,j,k); - ret++; - } - } - } - } - if (ret) - { - print ("A", A, rowsA, common); - print ("B", B, common, colsB); - print ("C", C, rowsA, colsB); - print ("D", D, rowsA, colsB); - } - } - } - -#ifdef VERBOSE - if (ret) - printf ("DM double test fail: %d errors\n",ret); - else - printf ("DM double test success: 0 DM errors\n"); -#else - if (ret) - abort(); -#endif - - return ret; -} - -/* { dg-final { scan-assembler {\mdmsetdmrz\M} } } */ -/* { dg-final { scan-assembler {\mdmxvf64gerpp\M} } } */ -/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */ - diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 96033b950eb..e23d3ec8b3c 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7121,25 +7121,6 @@ proc check_effective_target_power11_ok { } { } } -# Return 1 if this is a PowerPC target supporting -mcpu=future which enables -# the dense math operations. -proc check_effective_target_powerpc_dense_math_ok { } { - return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly { - __vector_quad vq; - void test (void) - { - #ifndef __PPC_DMR__ - #error "target does not have dense math support." - #else - /* Make sure we have dense math support. */ - __vector_quad dmr; - __asm__ ("dmsetaccz %A0" : "=wD" (dmr)); - vq = dmr; - #endif - } - } "-mcpu=future"] -} - # Return 1 if this is a PowerPC target supporting -mfloat128 via either # software emulation on power7/power8 systems or hardware support on power9.