public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/dmf004)] Limit memcpy inline copy to do just 2 variable moves. Date: Thu, 17 Nov 2022 21:55:41 +0000 (GMT) [thread overview] Message-ID: <20221117215541.3668A384F6D3@sourceware.org> (raw) https://gcc.gnu.org/g:b3cecdfb43c2134a65833eaa415fef65d4e24b98 commit b3cecdfb43c2134a65833eaa415fef65d4e24b98 Author: Michael Meissner <meissner@linux.ibm.com> Date: Wed Nov 16 15:55:46 2022 -0500 Limit memcpy inline copy to do just 2 variable moves. 2022-11-16 Michael Meissner <meissner@linux.ibm.com> gcc/ * config/rs6000/rs6000-string.cc (expand_lxvl_stxvl): New helper function. (expand_block_move_variable): Only do 2 variable moves per memcpy call. * config/rs6000/rs6000.cc (rs6000_option_override_internal): Remove bumping up rs6000_memcpy_inline_bytes if -mcpu=future. * config/rs6000/rs6000.opt (-param=rs6000-memcpy-inline-bytes=): Set default to 32, not 16. Diff: --- gcc/config/rs6000/rs6000-string.cc | 69 +++++++++++++++++++++++++++++++------- gcc/config/rs6000/rs6000.cc | 6 ---- gcc/config/rs6000/rs6000.opt | 4 +-- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index bc44d27773b..216c0c1cc93 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2735,6 +2735,32 @@ gen_lxvl_stxvl_move (rtx dest, rtx src, int length) return gen_lxvl (dest, addr, len); } +/* Generate a lxvl/stxvl or lxvprl/stxvprl pair of instructions to move up to + 16 or 32 bytes at a time. */ + +static void +expand_lxvl_stxvl (rtx dest_addr, + rtx src_addr, + rtx bytes_rtx, + int max_bytes) +{ + if (max_bytes > GET_MODE_SIZE (V16QImode)) + { + rtx vreg = gen_reg_rtx (XOmode); + emit_insn (gen_lxvprl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvprl (vreg, dest_addr, bytes_rtx)); + } + + else + { + rtx vreg = gen_reg_rtx (V16QImode); + emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); + emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + } + + return; +} + /* Expand a variable block move operation, and return 1 if successful. Return 0 if we should let the compiler generate normal code. @@ -2766,11 +2792,16 @@ expand_block_move_variable (rtx operands[], bool might_overlap) ? copy_to_reg (operands[2]) : convert_to_mode (Pmode, operands[2], true)); - int var_size_int + /* Maximum size to move at one time. */ + int vect_size_int = (TARGET_FUTURE ? GET_MODE_SIZE (XOmode) : GET_MODE_SIZE (V16QImode)); - if (var_size_int > rs6000_memcpy_inline_bytes) - var_size_int = rs6000_memcpy_inline_bytes; + /* Total size to move. Limit the number of bytes that we do in this + optimization to just 2 variable moves. Anything larger let the memcpy + glibc function handle it and do the extra optimizations it provides. */ + int var_size_int = (rs6000_memcpy_inline_bytes > (2 * vect_size_int) + ? 2 * vect_size_int + : rs6000_memcpy_inline_bytes); rtx var_size = GEN_INT (var_size_int); rtx var_cr = gen_reg_rtx (CCUNSmode); @@ -2797,17 +2828,29 @@ expand_block_move_variable (rtx operands[], bool might_overlap) emit_label (var_label); /* We want to move bytes inline. Move 0..16 or 0..32 bytes now. */ - if (var_size_int > GET_MODE_SIZE (V16QImode)) - { - rtx vreg = gen_reg_rtx (XOmode); - emit_insn (gen_lxvprl (vreg, src_addr, bytes_rtx)); - emit_insn (gen_stxvprl (vreg, dest_addr, bytes_rtx)); - } - else + if (vect_size_int > var_size_int) + vect_size_int = var_size_int; + + expand_lxvl_stxvl (dest_addr, src_addr, bytes_rtx, vect_size_int); + + /* If we have more than 16/32 bytes, adjust the pointers/length and generate + a second move. */ + if (var_size_int > vect_size_int) { - rtx vreg = gen_reg_rtx (V16QImode); - emit_insn (gen_lxvl (vreg, src_addr, bytes_rtx)); - emit_insn (gen_stxvl (vreg, dest_addr, bytes_rtx)); + emit_insn (gen_add2_insn (bytes_rtx, GEN_INT (- vect_size_int))); + + rtx vect_cr = gen_reg_rtx (CCmode); + emit_insn (gen_rtx_SET (vect_cr, + gen_rtx_COMPARE (CCmode, bytes_rtx, + const0_rtx))); + + do_ifelse (CCmode, GT, NULL_RTX, NULL_RTX, vect_cr, join_label, + profile_probability::likely ()); + + rtx ptr_adjust = GEN_INT (vect_size_int); + emit_insn (gen_add2_insn (dest_addr, ptr_adjust)); + emit_insn (gen_add2_insn (src_addr, ptr_adjust)); + expand_lxvl_stxvl (dest_addr, src_addr, bytes_rtx, vect_size_int); } emit_label (join_label); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index e401aa4c02b..dba37df3c61 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -4878,12 +4878,6 @@ rs6000_option_override_internal (bool global_init_p) SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_vect_partial_vector_usage, 0); - /* If we have the lxvprl/stxvprl instructions, bump up the default size - for doing inlining memcpy moves. */ - if (TARGET_FUTURE) - SET_OPTION_IF_UNSET (&global_options, &global_options_set, - rs6000_memcpy_inline_bytes, 32); - /* Use the 'model' -fsched-pressure algorithm by default. */ SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_sched_pressure_algorithm, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c594877ebc6..602930063cd 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -689,6 +689,6 @@ When reduction factor computed for a loop exceeds the threshold specified by this parameter, prefer to unroll this loop. The default value is 1. -param=rs6000-memcpy-inline-bytes= -Target Undocumented Joined UInteger Var(rs6000_memcpy_inline_bytes) Init(16) Param +Target Undocumented Joined UInteger Var(rs6000_memcpy_inline_bytes) Init(32) Param Maximum number of bytes to move with inline code before calling the memcpy -library function. The default value is 16. +library function. The default value is 32.
next reply other threads:[~2022-11-17 21:55 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-11-17 21:55 Michael Meissner [this message] -- strict thread matches above, loose matches on Subject: below -- 2022-11-16 20:56 Michael Meissner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20221117215541.3668A384F6D3@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).