From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 8E4143835789; Tue, 15 Nov 2022 01:56:01 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8E4143835789 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668477361; bh=nXCziqSgpMNDMCpq393BrKkcnZpYpmbp6I4TUJoosQo=; h=From:To:Subject:Date:From; b=nMYrwP83tlHrTi+E9xzeGpiD/J4kgy+bs25uyFDs9rioSzDIwDzqlfT7NrZHjYsO1 2bGWhxbzyLn+fvDHBC1r0DUJMFdPfAh0HLm2N6R2kyzPLv2SPPoqAvF96XlKy9pnGV dqCc2S/KCId8+cq3/EYuslX6tLYEB2wdF2xBUr9Y= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/dmf004)] Use lxvl and stxvl for small variable memcpy moves. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/dmf004 X-Git-Oldrev: 8610efaa3f237b45314dfeb7393b8ea8f5029523 X-Git-Newrev: 8398557374d749782e51ecd4ecae776da82d209f Message-Id: <20221115015601.8E4143835789@sourceware.org> Date: Tue, 15 Nov 2022 01:56:01 +0000 (GMT) List-Id: https://gcc.gnu.org/g:8398557374d749782e51ecd4ecae776da82d209f commit 8398557374d749782e51ecd4ecae776da82d209f Author: Michael Meissner Date: Mon Nov 14 20:55:46 2022 -0500 Use lxvl and stxvl for small variable memcpy moves. This patch adds support to generate inline code for block copy with a variable size if the size is 16 bytes or less. If the size is more than 16 bytes, just call memcpy. To handle variable sizes, I found we need DImode versions of the two insns for copying memory (cpymem and ). 2022-11-14 Michael Meissner gcc/ * config/rs6000/rs6000-string.cc (expand_block_move): Add support for using lxvl and stxvl to move up to 16 bytes inline without calling memcpy. * config/rs6000/rs6000.md (cpymem): Expand cpymemsi to also provide cpymemdi to handle DImode sizes as well as SImode sizes. (movmem): Expand movmemsi to also provide movmemdi to handle DImode sizes as well as SImode sizes. Diff: --- gcc/config/rs6000/rs6000-string.cc | 53 ++++++++++++++++++++++++++++++++++++-- gcc/config/rs6000/rs6000.md | 12 ++++----- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index cd8ee8c2f7e..2468e375781 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2760,9 +2760,58 @@ expand_block_move (rtx operands[], bool might_overlap) rtx stores[MAX_MOVE_REG]; int num_reg = 0; - /* If this is not a fixed size move, just call memcpy */ + /* If this is not a fixed size move, see if we can use load/store vector with + length to handle multiple bytes. Don't do the optimization if -Os. + Otherwise, just call memcpy. */ if (! constp) - return 0; + { + if (TARGET_BLOCK_OPS_UNALIGNED_VSX && TARGET_P9_VECTOR && TARGET_64BIT + && !optimize_size) + { + rtx join_label = gen_label_rtx (); + rtx inline_label = gen_label_rtx (); + rtx dest_addr = copy_addr_to_reg (XEXP (orig_dest, 0)); + rtx src_addr = copy_addr_to_reg (XEXP (orig_src, 0)); + + /* Check if we want to handle this with inline code. */ + bytes_rtx = (GET_MODE (bytes_rtx) == Pmode + ? copy_to_reg (bytes_rtx) + : convert_to_mode (Pmode, bytes_rtx, true)); + + rtx cr = gen_reg_rtx (CCUNSmode); + rtx max_size = GEN_INT (16); + emit_insn (gen_rtx_SET (cr, + gen_rtx_COMPARE (CCUNSmode, bytes_rtx, + max_size))); + + do_ifelse (CCUNSmode, LEU, NULL_RTX, NULL_RTX, cr, + inline_label, profile_probability::likely ()); + + /* Call memcpy if the size is too large. */ + tree fun = builtin_decl_explicit (BUILT_IN_MEMCPY); + emit_library_call_value (XEXP (DECL_RTL (fun), 0), + NULL_RTX, LCT_NORMAL, Pmode, + dest_addr, Pmode, + src_addr, Pmode, + bytes_rtx, Pmode); + + rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); + emit_jump_insn (gen_rtx_SET (pc_rtx, join_ref)); + emit_barrier (); + + emit_label (inline_label); + + /* We want to move bytes inline. Move 0..16 bytes now. */ + rtx vreg = gen_reg_rtx (V16QImode); + emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + + emit_label (join_label); + return 1; + } + + return 0; + } /* This must be a fixed size alignment */ gcc_assert (CONST_INT_P (align_rtx)); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e9dfb138603..12bae0d32a7 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -9880,11 +9880,11 @@ ;; Argument 2 is the length ;; Argument 3 is the alignment -(define_expand "cpymemsi" +(define_expand "cpymem" [(parallel [(set (match_operand:BLK 0 "") (match_operand:BLK 1 "")) - (use (match_operand:SI 2 "")) - (use (match_operand:SI 3 ""))])] + (use (match_operand:GPR 2 "")) + (use (match_operand:GPR 3 ""))])] "" { if (expand_block_move (operands, false)) @@ -9899,11 +9899,11 @@ ;; Argument 2 is the length ;; Argument 3 is the alignment -(define_expand "movmemsi" +(define_expand "movmem" [(parallel [(set (match_operand:BLK 0 "") (match_operand:BLK 1 "")) - (use (match_operand:SI 2 "")) - (use (match_operand:SI 3 ""))])] + (use (match_operand:GPR 2 "")) + (use (match_operand:GPR 3 ""))])] "" { if (expand_block_move (operands, true))