From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2140) id DDEB93858C36; Fri, 20 Jan 2023 23:01:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DDEB93858C36 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1674255713; bh=jHCmpSvYEQlJBzkulGnQpFrtVMLisbwBq8t4CXWoxIo=; h=From:To:Subject:Date:From; b=IYKprB2ivgnH7m9z7UGOh4K6Cy0Zmnrhiut2+aLf0AL950hpEOC+MztGfehLbmFW0 WR2hylUkMHDZJvU+ofjiN8srsMBKv6hLeSTKd+zTWqtOdfQbYbROaxanrIO03/4p5i dMnKvxs6kQBxl3BbuyasMzmtCSjQrvMPENwvB50Y= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Alexandre Oliva To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/aoliva/heads/testme)] add memcpy and memmove loop expanders X-Act-Checkin: gcc X-Git-Author: Alexandre Oliva X-Git-Refname: refs/users/aoliva/heads/testme X-Git-Oldrev: e0040ea65f4bc31d7ee62689353604dcd85d94f8 X-Git-Newrev: c0ddf0d3449743f447b825e68de70d6142301a52 Message-Id: <20230120230153.DDEB93858C36@sourceware.org> Date: Fri, 20 Jan 2023 23:01:53 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c0ddf0d3449743f447b825e68de70d6142301a52 commit c0ddf0d3449743f447b825e68de70d6142301a52 Author: Alexandre Oliva Date: Fri Jan 20 19:23:02 2023 -0300 add memcpy and memmove loop expanders Diff: --- gcc/builtins.cc | 2 +- gcc/expr.cc | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++----- gcc/expr.h | 6 +- 3 files changed, 182 insertions(+), 18 deletions(-) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index b7737678a7d..ca4d4721cdc 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -3756,7 +3756,7 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len, expected_align, expected_size, min_size, max_size, probable_max_size, use_mempcpy_call, &is_move_done, - might_overlap); + might_overlap, tree_ctz (len)); /* Bail out when a mempcpy call would be expanded as libcall and when we have a target that provides a fast implementation diff --git a/gcc/expr.cc b/gcc/expr.cc index 15be1c8db99..0bba36ee131 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -80,7 +80,9 @@ static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, bool); -static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); +static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned, int); +static void emit_block_move_via_sized_loop (rtx, rtx, rtx, unsigned, unsigned); +static void emit_block_move_via_oriented_loop (rtx, rtx, rtx, unsigned, unsigned); static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int); static rtx_insn *compress_float_constant (rtx, rtx); static rtx get_subtarget (rtx); @@ -1966,7 +1968,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, unsigned HOST_WIDE_INT max_size, unsigned HOST_WIDE_INT probable_max_size, bool bail_out_libcall, bool *is_move_done, - bool might_overlap) + bool might_overlap, unsigned ctz_size) { int may_use_call; rtx retval = 0; @@ -2052,6 +2054,14 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, } } + bool dynamic_direction = false; + if (!pattern_ok && !pieces_ok && may_use_call + && (flag_inline_stringops & (might_overlap ? ILSOP_MEMMOVE : ILSOP_MEMCPY))) + { + may_use_call = 0; + dynamic_direction = might_overlap; + } + if (pattern_ok) ; else if (pieces_ok) @@ -2073,10 +2083,12 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, retval = emit_block_copy_via_libcall (x, y, size, method == BLOCK_OP_TAILCALL); } + else if (dynamic_direction) + emit_block_move_via_oriented_loop (x, y, size, align, ctz_size); else if (might_overlap) *is_move_done = false; else - emit_block_move_via_loop (x, y, size, align); + emit_block_move_via_sized_loop (x, y, size, align, ctz_size); if (method == BLOCK_OP_CALL_PARM) OK_DEFER_POP; @@ -2085,7 +2097,8 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, } rtx -emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) +emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method, + unsigned int ctz_size) { unsigned HOST_WIDE_INT max, min = 0; if (GET_CODE (size) == CONST_INT) @@ -2093,7 +2106,8 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) else max = GET_MODE_MASK (GET_MODE (size)); return emit_block_move_hints (x, y, size, method, 0, -1, - min, max, max); + min, max, max, + false, NULL, false, ctz_size); } /* A subroutine of emit_block_move. Returns true if calling the @@ -2255,13 +2269,117 @@ emit_block_move_via_pattern (rtx x, rtx y, rtx size, unsigned int align, return false; } +/* Like emit_block_move_via_loop, but choose a suitable INCR based on + ALIGN and CTZ_SIZE. */ + +static void +emit_block_move_via_sized_loop (rtx x, rtx y, rtx size, + unsigned int align, + unsigned int ctz_size) +{ + int incr = align / BITS_PER_UNIT; + + if (CONST_INT_P (size)) + ctz_size = MAX (ctz_size, (unsigned) wi::ctz (UINTVAL (size))); + + if (HOST_WIDE_INT_1U << ctz_size < (unsigned HOST_WIDE_INT) incr) + incr = HOST_WIDE_INT_1U << ctz_size; + + while (incr > 1 && !can_move_by_pieces (incr, align)) + incr >>= 1; + + gcc_checking_assert (incr); + + return emit_block_move_via_loop (x, y, size, align, incr); +} + +/* Like emit_block_move_via_sized_loop, but besides choosing INCR so + as to ensure safe moves even in case of overlap, output dynamic + tests to choose between two loops, one moving downwards, another + moving upwards. */ + +static void +emit_block_move_via_oriented_loop (rtx x, rtx y, rtx size, + unsigned int align, + unsigned int ctz_size) +{ + int incr = align / BITS_PER_UNIT; + + if (CONST_INT_P (size)) + ctz_size = MAX (ctz_size, (unsigned) wi::ctz (UINTVAL (size))); + + if (HOST_WIDE_INT_1U << ctz_size < (unsigned HOST_WIDE_INT) incr) + incr = HOST_WIDE_INT_1U << ctz_size; + + while (incr > 1 && !int_mode_for_size (incr, 0).exists ()) + incr >>= 1; + + gcc_checking_assert (incr); + + rtx_code_label *upw_label, *end_label; + upw_label = gen_label_rtx (); + end_label = gen_label_rtx (); + + rtx x_addr = force_operand (XEXP (x, 0), NULL_RTX); + rtx y_addr = force_operand (XEXP (y, 0), NULL_RTX); + do_pending_stack_adjust (); + + machine_mode mode = GET_MODE (x_addr); + if (mode != GET_MODE (y_addr)) + { + scalar_int_mode xmode + = smallest_int_mode_for_size (GET_MODE_BITSIZE (mode)); + scalar_int_mode ymode + = smallest_int_mode_for_size (GET_MODE_BITSIZE + (GET_MODE (y_addr))); + if (GET_MODE_BITSIZE (xmode) < GET_MODE_BITSIZE (ymode)) + mode = ymode; + else + mode = xmode; + +#ifndef POINTERS_EXTEND_UNSIGNED + const int POINTERS_EXTEND_UNSIGNED = 1; +#endif + x_addr = convert_modes (mode, GET_MODE (x_addr), x_addr, + POINTERS_EXTEND_UNSIGNED); + y_addr = convert_modes (mode, GET_MODE (y_addr), y_addr, + POINTERS_EXTEND_UNSIGNED); + } + + /* Test for overlap: if (x >= y || x + size <= y) goto upw_label. */ + emit_cmp_and_jump_insns (x_addr, y_addr, GEU, NULL_RTX, mode, + true, upw_label, + profile_probability::guessed_always () + .apply_scale (5, 10)); + rtx tmp = convert_modes (GET_MODE (x_addr), GET_MODE (size), size, true); + tmp = simplify_gen_binary (PLUS, GET_MODE (x_addr), x_addr, tmp); + + emit_cmp_and_jump_insns (tmp, y_addr, LEU, NULL_RTX, mode, + true, upw_label, + profile_probability::guessed_always () + .apply_scale (8, 10)); + + emit_block_move_via_loop (x, y, size, align, -incr); + + emit_jump (end_label); + emit_label (upw_label); + + emit_block_move_via_loop (x, y, size, align, incr); + + emit_label (end_label); +} + /* A subroutine of emit_block_move. Copy the data via an explicit - loop. This is used only when libcalls are forbidden. */ -/* ??? It'd be nice to copy in hunks larger than QImode. */ + loop. This is used only when libcalls are forbidden, or when + inlining is required. INCR is the block size to be copied in each + loop iteration. If it is negative, the absolute value is used, and + the block is copied backwards. INCR must be a power of two, an + exact divisor for SIZE and ALIGN, and imply a mode that can be + safely copied per iteration assuming no overlap. */ static void emit_block_move_via_loop (rtx x, rtx y, rtx size, - unsigned int align ATTRIBUTE_UNUSED) + unsigned int align, int incr) { rtx_code_label *cmp_label, *top_label; rtx iter, x_addr, y_addr, tmp; @@ -2277,7 +2395,38 @@ emit_block_move_via_loop (rtx x, rtx y, rtx size, cmp_label = gen_label_rtx (); iter = gen_reg_rtx (iter_mode); - emit_move_insn (iter, const0_rtx); + bool downwards = incr < 0; + rtx iter_init; + rtx_code iter_cond; + rtx iter_limit; + rtx iter_incr; + machine_mode move_mode; + if (downwards) + { + incr = -incr; + iter_init = size; + iter_cond = GE; + iter_limit = const0_rtx; + iter_incr = GEN_INT (incr); + } + else + { + iter_init = const0_rtx; + iter_cond = LT; + iter_limit = size; + iter_incr = GEN_INT (incr); + } + emit_move_insn (iter, iter_init); + + scalar_int_mode int_move_mode + = smallest_int_mode_for_size (incr * BITS_PER_UNIT); + if (GET_MODE_BITSIZE (int_move_mode) != incr * BITS_PER_UNIT) + { + move_mode = BLKmode; + gcc_checking_assert (can_move_by_pieces (incr, align)); + } + else + move_mode = int_move_mode; x_addr = force_operand (XEXP (x, 0), NULL_RTX); y_addr = force_operand (XEXP (y, 0), NULL_RTX); @@ -2293,19 +2442,32 @@ emit_block_move_via_loop (rtx x, rtx y, rtx size, tmp = convert_modes (y_addr_mode, iter_mode, iter, true); y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp); - x = change_address (x, QImode, x_addr); - y = change_address (y, QImode, y_addr); + x = change_address (x, move_mode, x_addr); + y = change_address (y, move_mode, y_addr); - emit_move_insn (x, y); + if (move_mode == BLKmode) + { + bool done; + emit_block_move_hints (x, y, iter_incr, BLOCK_OP_NO_LIBCALL, + align, incr, incr, incr, incr, + false, &done, false); + gcc_checking_assert (done); + } + else + emit_move_insn (x, y); - tmp = expand_simple_binop (iter_mode, PLUS, iter, const1_rtx, iter, + if (downwards) + emit_label (cmp_label); + + tmp = expand_simple_binop (iter_mode, PLUS, iter, iter_incr, iter, true, OPTAB_LIB_WIDEN); if (tmp != iter) emit_move_insn (iter, tmp); - emit_label (cmp_label); + if (!downwards) + emit_label (cmp_label); - emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, + emit_cmp_and_jump_insns (iter, iter_limit, iter_cond, NULL_RTX, iter_mode, true, top_label, profile_probability::guessed_always () .apply_scale (9, 10)); diff --git a/gcc/expr.h b/gcc/expr.h index e3ba9eb5370..d9fc47c9114 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -126,7 +126,8 @@ struct by_pieces_prev fixed_size_mode mode; }; -extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods); +extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods, + unsigned ctz_size = 0); extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, unsigned int, HOST_WIDE_INT, unsigned HOST_WIDE_INT, @@ -134,7 +135,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, unsigned HOST_WIDE_INT, bool bail_out_libcall = false, bool *is_move_done = NULL, - bool might_overlap = false); + bool might_overlap = false, + unsigned ctz_size = 0); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, by_pieces_constfn, void *); extern bool emit_storent_insn (rtx to, rtx from);