From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 3668A384F6D3; Thu, 17 Nov 2022 21:55:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3668A384F6D3 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668722141; bh=ZA+PJbHHcKGSHvDk7X4UbWEjlbaHsE0WQqOjyWBoOnM=; h=From:To:Subject:Date:From; b=rGlsk917sfNEMFgQjgt5/lMQxoSMiYVqSzWs1goQ5zjbE8qR/epUfUk77YtCsswN2 TOG3km8QUWuTbQKav6n1E4Bvzya8ry03KwYtY1Y7x1twBztJbhnxGH4Y4pJT9Qynj7 LiMqccEdIQsD0S+PY7lE1MONtTu5qvZSi7YfqRU0= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/dmf004)] Limit memcpy inline copy to do just 2 variable moves. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/dmf004 X-Git-Oldrev: 0b7b794d002581e463edd9c0f5fbe5cd9cf65968 X-Git-Newrev: b3cecdfb43c2134a65833eaa415fef65d4e24b98 Message-Id: <20221117215541.3668A384F6D3@sourceware.org> Date: Thu, 17 Nov 2022 21:55:41 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b3cecdfb43c2134a65833eaa415fef65d4e24b98 commit b3cecdfb43c2134a65833eaa415fef65d4e24b98 Author: Michael Meissner Date: Wed Nov 16 15:55:46 2022 -0500 Limit memcpy inline copy to do just 2 variable moves. 2022-11-16 Michael Meissner gcc/ * config/rs6000/rs6000-string.cc (expand_lxvl_stxvl): New helper function. (expand_block_move_variable): Only do 2 variable moves per memcpy call. * config/rs6000/rs6000.cc (rs6000_option_override_internal): Remove bumping up rs6000_memcpy_inline_bytes if -mcpu=future. * config/rs6000/rs6000.opt (-param=rs6000-memcpy-inline-bytes=): Set default to 32, not 16. Diff: --- gcc/config/rs6000/rs6000-string.cc | 69 +++++++++++++++++++++++++++++++------- gcc/config/rs6000/rs6000.cc | 6 ---- gcc/config/rs6000/rs6000.opt | 4 +-- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index bc44d27773b..216c0c1cc93 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2735,6 +2735,32 @@ gen_lxvl_stxvl_move (rtx dest, rtx src, int length) return gen_lxvl (dest, addr, len); } +/* Generate a lxvl/stxvl or lxvprl/stxvprl pair of instructions to move up to + 16 or 32 bytes at a time. */ + +static void +expand_lxvl_stxvl (rtx dest_addr, + rtx src_addr, + rtx bytes_rtx, + int max_bytes) +{ + if (max_bytes > GET_MODE_SIZE (V16QImode)) + { + rtx vreg = gen_reg_rtx (XOmode); + emit_insn (gen_lxvprl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvprl (vreg, dest_addr, bytes_rtx)); + } + + else + { + rtx vreg = gen_reg_rtx (V16QImode); + emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + } + + return; +} + /* Expand a variable block move operation, and return 1 if successful. Return 0 if we should let the compiler generate normal code. @@ -2766,11 +2792,16 @@ expand_block_move_variable (rtx operands[], bool might_overlap) ? copy_to_reg (operands[2]) : convert_to_mode (Pmode, operands[2], true)); - int var_size_int + /* Maximum size to move at one time. */ + int vect_size_int = (TARGET_FUTURE ? GET_MODE_SIZE (XOmode) : GET_MODE_SIZE (V16QImode)); - if (var_size_int > rs6000_memcpy_inline_bytes) - var_size_int = rs6000_memcpy_inline_bytes; + /* Total size to move. Limit the number of bytes that we do in this + optimization to just 2 variable moves. Anything larger let the memcpy + glibc function handle it and do the extra optimizations it provides. */ + int var_size_int = (rs6000_memcpy_inline_bytes > (2 * vect_size_int) + ? 2 * vect_size_int + : rs6000_memcpy_inline_bytes); rtx var_size = GEN_INT (var_size_int); rtx var_cr = gen_reg_rtx (CCUNSmode); @@ -2797,17 +2828,29 @@ expand_block_move_variable (rtx operands[], bool might_overlap) emit_label (var_label); /* We want to move bytes inline. Move 0..16 or 0..32 bytes now. */ - if (var_size_int > GET_MODE_SIZE (V16QImode)) - { - rtx vreg = gen_reg_rtx (XOmode); - emit_insn (gen_lxvprl (vreg, src_addr, bytes_rtx)); - emit_insn (gen_stxvprl (vreg, dest_addr, bytes_rtx)); - } - else + if (vect_size_int > var_size_int) + vect_size_int = var_size_int; + + expand_lxvl_stxvl (dest_addr, src_addr, bytes_rtx, vect_size_int); + + /* If we have more than 16/32 bytes, adjust the pointers/length and generate + a second move. */ + if (var_size_int > vect_size_int) { - rtx vreg = gen_reg_rtx (V16QImode); - emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); - emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + emit_insn (gen_add2_insn (bytes_rtx, GEN_INT (- vect_size_int))); + + rtx vect_cr = gen_reg_rtx (CCmode); + emit_insn (gen_rtx_SET (vect_cr, + gen_rtx_COMPARE (CCmode, bytes_rtx, + const0_rtx))); + + do_ifelse (CCmode, GT, NULL_RTX, NULL_RTX, vect_cr, join_label, + profile_probability::likely ()); + + rtx ptr_adjust = GEN_INT (vect_size_int); + emit_insn (gen_add2_insn (dest_addr, ptr_adjust)); + emit_insn (gen_add2_insn (src_addr, ptr_adjust)); + expand_lxvl_stxvl (dest_addr, src_addr, bytes_rtx, vect_size_int); } emit_label (join_label); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index e401aa4c02b..dba37df3c61 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -4878,12 +4878,6 @@ rs6000_option_override_internal (bool global_init_p) SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_vect_partial_vector_usage, 0); - /* If we have the lxvprl/stxvprl instructions, bump up the default size - for doing inlining memcpy moves. */ - if (TARGET_FUTURE) - SET_OPTION_IF_UNSET (&global_options, &global_options_set, - rs6000_memcpy_inline_bytes, 32); - /* Use the 'model' -fsched-pressure algorithm by default. */ SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_sched_pressure_algorithm, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c594877ebc6..602930063cd 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -689,6 +689,6 @@ When reduction factor computed for a loop exceeds the threshold specified by this parameter, prefer to unroll this loop. The default value is 1. -param=rs6000-memcpy-inline-bytes= -Target Undocumented Joined UInteger Var(rs6000_memcpy_inline_bytes) Init(16) Param +Target Undocumented Joined UInteger Var(rs6000_memcpy_inline_bytes) Init(32) Param Maximum number of bytes to move with inline code before calling the memcpy -library function. The default value is 16. +library function. The default value is 32.