From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2119) id 209F73858C2A; Mon, 16 Oct 2023 20:04:13 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 209F73858C2A DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1697486653; bh=VKG1XZJIeegl1EEZfN/qhHscVrHlAaFrtkQzUwT5pD8=; h=From:To:Subject:Date:From; b=rSYJNjVN5+6+Qi70NThjpsV+RRrphBFcosp+INB45Gsn/JSMjMP/xqX41VVFRjwVr 3MUU5mSN1mYnf+LkVnRS98VDWA0qXex1G7yNyZqquLVe8iIU5fif01B5uIF/C/gJa0 GouT9iy5jsFEtC9waC2RYbLRVPG6s1HvfhvEJ1EU= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Jeff Law To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-4666] RISC-V: NFC: Move scalar block move expansion code into riscv-string.cc X-Act-Checkin: gcc X-Git-Author: =?utf-8?q?Christoph_M=C3=BCllner?= X-Git-Refname: refs/heads/master X-Git-Oldrev: c92737722ffff85c759247f96f281438bbf1418d X-Git-Newrev: 328745607c5d403a1c7b6bc2ecaa1574ee42122f Message-Id: <20231016200413.209F73858C2A@sourceware.org> Date: Mon, 16 Oct 2023 20:04:13 +0000 (GMT) List-Id: https://gcc.gnu.org/g:328745607c5d403a1c7b6bc2ecaa1574ee42122f commit r14-4666-g328745607c5d403a1c7b6bc2ecaa1574ee42122f Author: Christoph Müllner Date: Mon Oct 16 13:57:43 2023 -0600 RISC-V: NFC: Move scalar block move expansion code into riscv-string.cc This just moves a few functions out of riscv.cc into riscv-string.cc in an attempt to keep riscv.cc manageable. This was originally Christoph's code and I'm just pushing it on his behalf. Full disclosure: I built rv64gc after changing to verify everything still builds. Given it was just lifting code from one place to another, I didn't run the testsuite. gcc/ * config/riscv/riscv-protos.h (emit_block_move): Remove redundant prototype. Improve comment. * config/riscv/riscv.cc (riscv_block_move_straight): Move from riscv.cc into riscv-string.cc. (riscv_adjust_block_mem, riscv_block_move_loop): Likewise. (riscv_expand_block_move): Likewise. * config/riscv/riscv-string.cc (riscv_block_move_straight): Add moved function. (riscv_adjust_block_mem, riscv_block_move_loop): Likewise. (riscv_expand_block_move): Likewise. Diff: --- gcc/config/riscv/riscv-protos.h | 5 +- gcc/config/riscv/riscv-string.cc | 155 +++++++++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.cc | 155 --------------------------------------- 3 files changed, 158 insertions(+), 157 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 49bdcdf2f93c..6190faab5011 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -117,7 +117,6 @@ extern rtx riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y); extern bool riscv_expand_conditional_move (rtx, rtx, rtx, rtx); extern rtx riscv_legitimize_call_address (rtx); extern void riscv_set_return_address (rtx, rtx); -extern bool riscv_expand_block_move (rtx, rtx, rtx); extern rtx riscv_return_addr (int, rtx); extern poly_int64 riscv_initial_elimination_offset (int, int); extern void riscv_expand_prologue (void); @@ -125,7 +124,6 @@ extern void riscv_expand_epilogue (int); extern bool riscv_epilogue_uses (unsigned int); extern bool riscv_can_use_return_insn (void); extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode); -extern bool riscv_expand_block_move (rtx, rtx, rtx); extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); @@ -160,6 +158,9 @@ extern bool riscv_hard_regno_rename_ok (unsigned, unsigned); rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); +/* Routines implemented in riscv-string.c. */ +extern bool riscv_expand_block_move (rtx, rtx, rtx); + /* Information about one CPU we know about. */ struct riscv_cpu_info { /* This CPU's canonical name. */ diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 2bdff0374e8b..0b4606aa7b2d 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -592,3 +592,158 @@ riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align) return false; } + +/* Emit straight-line code to move LENGTH bytes from SRC to DEST. + Assume that the areas do not overlap. */ + +static void +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) +{ + unsigned HOST_WIDE_INT offset, delta; + unsigned HOST_WIDE_INT bits; + int i; + enum machine_mode mode; + rtx *regs; + + bits = MAX (BITS_PER_UNIT, + MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); + + mode = mode_for_size (bits, MODE_INT, 0).require (); + delta = bits / BITS_PER_UNIT; + + /* Allocate a buffer for the temporary registers. */ + regs = XALLOCAVEC (rtx, length / delta); + + /* Load as many BITS-sized chunks as possible. Use a normal load if + the source has enough alignment, otherwise use left/right pairs. */ + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + { + regs[i] = gen_reg_rtx (mode); + riscv_emit_move (regs[i], adjust_address (src, mode, offset)); + } + + /* Copy the chunks to the destination. */ + for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + riscv_emit_move (adjust_address (dest, mode, offset), regs[i]); + + /* Mop up any left-over bytes. */ + if (offset < length) + { + src = adjust_address (src, BLKmode, offset); + dest = adjust_address (dest, BLKmode, offset); + move_by_pieces (dest, src, length - offset, + MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); + } +} + +/* Helper function for doing a loop-based block operation on memory + reference MEM. Each iteration of the loop will operate on LENGTH + bytes of MEM. + + Create a new base register for use within the loop and point it to + the start of MEM. Create a new memory reference that uses this + register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ + +static void +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, + rtx *loop_reg, rtx *loop_mem) +{ + *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); + + /* Although the new mem does not refer to a known location, + it does keep up to LENGTH bytes of alignment. */ + *loop_mem = change_address (mem, BLKmode, *loop_reg); + set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); +} + +/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER + bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that + the memory regions do not overlap. */ + +static void +riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, + unsigned HOST_WIDE_INT bytes_per_iter) +{ + rtx label, src_reg, dest_reg, final_src, test; + unsigned HOST_WIDE_INT leftover; + + leftover = length % bytes_per_iter; + length -= leftover; + + /* Create registers and memory references for use within the loop. */ + riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); + riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); + + /* Calculate the value that SRC_REG should have after the last iteration + of the loop. */ + final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), + 0, 0, OPTAB_WIDEN); + + /* Emit the start of the loop. */ + label = gen_label_rtx (); + emit_label (label); + + /* Emit the loop body. */ + riscv_block_move_straight (dest, src, bytes_per_iter); + + /* Move on to the next block. */ + riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); + riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); + + /* Emit the loop condition. */ + test = gen_rtx_NE (VOIDmode, src_reg, final_src); + emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label)); + + /* Mop up any left-over bytes. */ + if (leftover) + riscv_block_move_straight (dest, src, leftover); + else + emit_insn(gen_nop ()); +} + +/* Expand a cpymemsi instruction, which copies LENGTH bytes from + memory reference SRC to memory reference DEST. */ + +bool +riscv_expand_block_move (rtx dest, rtx src, rtx length) +{ + if (CONST_INT_P (length)) + { + unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); + unsigned HOST_WIDE_INT factor, align; + + align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); + factor = BITS_PER_WORD / align; + + if (optimize_function_for_size_p (cfun) + && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) + return false; + + if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) + { + riscv_block_move_straight (dest, src, INTVAL (length)); + return true; + } + else if (optimize && align >= BITS_PER_WORD) + { + unsigned min_iter_words + = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD; + unsigned iter_words = min_iter_words; + unsigned HOST_WIDE_INT bytes = hwi_length; + unsigned HOST_WIDE_INT words = bytes / UNITS_PER_WORD; + + /* Lengthen the loop body if it shortens the tail. */ + for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++) + { + unsigned cur_cost = iter_words + words % iter_words; + unsigned new_cost = i + words % i; + if (new_cost <= cur_cost) + iter_words = i; + } + + riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD); + return true; + } + } + return false; +} diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 44746256f611..f2dcb0db6fbd 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5147,161 +5147,6 @@ riscv_legitimize_call_address (rtx addr) return addr; } -/* Emit straight-line code to move LENGTH bytes from SRC to DEST. - Assume that the areas do not overlap. */ - -static void -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) -{ - unsigned HOST_WIDE_INT offset, delta; - unsigned HOST_WIDE_INT bits; - int i; - enum machine_mode mode; - rtx *regs; - - bits = MAX (BITS_PER_UNIT, - MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); - - mode = mode_for_size (bits, MODE_INT, 0).require (); - delta = bits / BITS_PER_UNIT; - - /* Allocate a buffer for the temporary registers. */ - regs = XALLOCAVEC (rtx, length / delta); - - /* Load as many BITS-sized chunks as possible. Use a normal load if - the source has enough alignment, otherwise use left/right pairs. */ - for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) - { - regs[i] = gen_reg_rtx (mode); - riscv_emit_move (regs[i], adjust_address (src, mode, offset)); - } - - /* Copy the chunks to the destination. */ - for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) - riscv_emit_move (adjust_address (dest, mode, offset), regs[i]); - - /* Mop up any left-over bytes. */ - if (offset < length) - { - src = adjust_address (src, BLKmode, offset); - dest = adjust_address (dest, BLKmode, offset); - move_by_pieces (dest, src, length - offset, - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); - } -} - -/* Helper function for doing a loop-based block operation on memory - reference MEM. Each iteration of the loop will operate on LENGTH - bytes of MEM. - - Create a new base register for use within the loop and point it to - the start of MEM. Create a new memory reference that uses this - register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ - -static void -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, - rtx *loop_reg, rtx *loop_mem) -{ - *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); - - /* Although the new mem does not refer to a known location, - it does keep up to LENGTH bytes of alignment. */ - *loop_mem = change_address (mem, BLKmode, *loop_reg); - set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); -} - -/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER - bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that - the memory regions do not overlap. */ - -static void -riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, - unsigned HOST_WIDE_INT bytes_per_iter) -{ - rtx label, src_reg, dest_reg, final_src, test; - unsigned HOST_WIDE_INT leftover; - - leftover = length % bytes_per_iter; - length -= leftover; - - /* Create registers and memory references for use within the loop. */ - riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); - riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); - - /* Calculate the value that SRC_REG should have after the last iteration - of the loop. */ - final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), - 0, 0, OPTAB_WIDEN); - - /* Emit the start of the loop. */ - label = gen_label_rtx (); - emit_label (label); - - /* Emit the loop body. */ - riscv_block_move_straight (dest, src, bytes_per_iter); - - /* Move on to the next block. */ - riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); - riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); - - /* Emit the loop condition. */ - test = gen_rtx_NE (VOIDmode, src_reg, final_src); - emit_jump_insn (gen_cbranch4 (Pmode, test, src_reg, final_src, label)); - - /* Mop up any left-over bytes. */ - if (leftover) - riscv_block_move_straight (dest, src, leftover); - else - emit_insn(gen_nop ()); -} - -/* Expand a cpymemsi instruction, which copies LENGTH bytes from - memory reference SRC to memory reference DEST. */ - -bool -riscv_expand_block_move (rtx dest, rtx src, rtx length) -{ - if (CONST_INT_P (length)) - { - unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); - unsigned HOST_WIDE_INT factor, align; - - align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); - factor = BITS_PER_WORD / align; - - if (optimize_function_for_size_p (cfun) - && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) - return false; - - if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) - { - riscv_block_move_straight (dest, src, INTVAL (length)); - return true; - } - else if (optimize && align >= BITS_PER_WORD) - { - unsigned min_iter_words - = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD; - unsigned iter_words = min_iter_words; - unsigned HOST_WIDE_INT bytes = hwi_length; - unsigned HOST_WIDE_INT words = bytes / UNITS_PER_WORD; - - /* Lengthen the loop body if it shortens the tail. */ - for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++) - { - unsigned cur_cost = iter_words + words % iter_words; - unsigned new_cost = i + words % i; - if (new_cost <= cur_cost) - iter_words = i; - } - - riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD); - return true; - } - } - return false; -} - /* Print symbolic operand OP, which is part of a HIGH or LO_SUM in context CONTEXT. HI_RELOC indicates a high-part reloc. */