From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 72E8E38518BC; Tue, 15 Nov 2022 00:58:46 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 72E8E38518BC DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668473926; bh=I3+HMJw3inymkiESk6nXowO74/nyG4nlBM4bK1EWASM=; h=From:To:Subject:Date:From; b=bt+GeZOPZKqa0y+x8tge4AtKIB8xFINpvbJ7flvmUX4/SzVyLgswB3lzAw2ttdzPV eeYzO/QtHDd4WEvw+em8WYUrWgSbyOPk66Ls5x9J3t7yLagifDkl+mtenhi4+d9TVt A3wjuYvD6r3xbnJuLfHagnbfdsYGN+3qfr4ZlxBE= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/dmf004)] Use lxvl and stxvl for small variable memcpy moves. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/dmf004 X-Git-Oldrev: 385915af875395fa76112368d20a7b2906f5af62 X-Git-Newrev: a2e1c0fa8dc467907ef551d8c6e03baf2c5f99d5 Message-Id: <20221115005846.72E8E38518BC@sourceware.org> Date: Tue, 15 Nov 2022 00:58:46 +0000 (GMT) List-Id: https://gcc.gnu.org/g:a2e1c0fa8dc467907ef551d8c6e03baf2c5f99d5 commit a2e1c0fa8dc467907ef551d8c6e03baf2c5f99d5 Author: Michael Meissner Date: Mon Nov 14 19:56:25 2022 -0500 Use lxvl and stxvl for small variable memcpy moves. This patch adds support to generate inline code for block copy with a variable size if the size is 16 bytes or less. If the size is more than 16 bytes, just call memcpy. To handle variable sizes, I found we need DImode versions of the two insns for copying memory (cpymem and ). 2022-11-14 Michael Meissner gcc/ * config/rs6000/rs6000-string.cc (expand_block_move): Add support for using lxvl and stxvl to move up to 16 bytes inline without calling memcpy. * config/rs6000/rs6000.md (cpymem): Expand cpymemsi to also provide cpymemdi to handle DImode sizes as well as SImode sizes. (movmem): Expand movmemsi to also provide movmemdi to handle DImode sizes as well as SImode sizes. Diff: --- gcc/config/rs6000/rs6000-string.cc | 49 ++++++++++++++++++++++++++++++++++++-- gcc/config/rs6000/rs6000.md | 12 +++++----- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index cd8ee8c2f7e..596fbc634f4 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2760,9 +2760,54 @@ expand_block_move (rtx operands[], bool might_overlap) rtx stores[MAX_MOVE_REG]; int num_reg = 0; - /* If this is not a fixed size move, just call memcpy */ + /* If this is not a fixed size move, see if we can use load/store vector with + length to handle multiple bytes. Don't do the optimization if -Os. + Otherwise, just call memcpy. */ if (! constp) - return 0; + { + if (TARGET_BLOCK_OPS_UNALIGNED_VSX && TARGET_P9_VECTOR && TARGET_64BIT + && !optimize_size) + { + rtx join_label = gen_label_rtx (); + rtx inline_label = gen_label_rtx (); + rtx dest_addr = copy_addr_to_reg (XEXP (orig_dest, 0)); + rtx src_addr = copy_addr_to_reg (XEXP (orig_src, 0)); + + /* Call memcpy if the size is too large. */ + bytes_rtx = force_reg (Pmode, bytes_rtx); + rtx cr = gen_reg_rtx (CCUNSmode); + rtx max_size = GEN_INT (16); + emit_insn (gen_rtx_SET (cr, + gen_rtx_COMPARE (CCUNSmode, bytes_rtx, + max_size))); + + do_ifelse (CCUNSmode, LEU, NULL_RTX, NULL_RTX, cr, + inline_label, profile_probability::likely ()); + + tree fun = builtin_decl_explicit (BUILT_IN_MEMCPY); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + NULL_RTX, LCT_NORMAL, Pmode, + dest_addr, Pmode, + src_addr, Pmode, + bytes_rtx, Pmode); + + rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); + emit_jump_insn (gen_rtx_SET (pc_rtx, join_ref)); + emit_barrier (); + + emit_label (inline_label); + + /* Move the final 0..16 bytes. */ + rtx vreg = gen_reg_rtx (V16QImode); + emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + + emit_label (join_label); + return 1; + } + + return 0; + } /* This must be a fixed size alignment */ gcc_assert (CONST_INT_P (align_rtx)); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9dfb138603..12bae0d32a7 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -9880,11 +9880,11 @@ ;; Argument 2 is the length ;; Argument 3 is the alignment -(define_expand "cpymemsi" +(define_expand "cpymem" [(parallel [(set (match_operand:BLK 0 "") (match_operand:BLK 1 "")) - (use (match_operand:SI 2 "")) - (use (match_operand:SI 3 ""))])] + (use (match_operand:GPR 2 "")) + (use (match_operand:GPR 3 ""))])] "" { if (expand_block_move (operands, false)) @@ -9899,11 +9899,11 @@ ;; Argument 2 is the length ;; Argument 3 is the alignment -(define_expand "movmemsi" +(define_expand "movmem" [(parallel [(set (match_operand:BLK 0 "") (match_operand:BLK 1 "")) - (use (match_operand:SI 2 "")) - (use (match_operand:SI 3 ""))])] + (use (match_operand:GPR 2 "")) + (use (match_operand:GPR 3 ""))])] "" { if (expand_block_move (operands, true))