From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2140) id 0E1503858C39; Sat, 21 Jan 2023 01:06:33 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0E1503858C39 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1674263193; bh=kuCMuYAahA1FWzXWQM3yTxBTO9uaZ8gi7v+OQBU2plw=; h=From:To:Subject:Date:From; b=VdA3bLl9g8e7S+MHVs3RS0e3xQmLiENZ+PFNcZN/PX55qjSS7Yk2xn6v2l4qLIj+B HBJjd2p5Eevz5Gc4VNdOONCQTHiIB4FuLQmVsmX2OuclcVC+MPiCubb8v6BMxyGjHv 896lzcpCy0y/u49deVfyfVJptpcZ193jmyy91Sfw= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Alexandre Oliva To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/aoliva/heads/testme)] add memcmp loop expander X-Act-Checkin: gcc X-Git-Author: Alexandre Oliva X-Git-Refname: refs/users/aoliva/heads/testme X-Git-Oldrev: 284bc638524a3055a189e84fdc58e4858af24feb X-Git-Newrev: ed9040c427c2092cb02ce21c6e228c65f68f13e2 Message-Id: <20230121010633.0E1503858C39@sourceware.org> Date: Sat, 21 Jan 2023 01:06:33 +0000 (GMT) List-Id: https://gcc.gnu.org/g:ed9040c427c2092cb02ce21c6e228c65f68f13e2 commit ed9040c427c2092cb02ce21c6e228c65f68f13e2 Author: Alexandre Oliva Date: Fri Jan 20 22:01:15 2023 -0300 add memcmp loop expander Diff: --- gcc/expr.cc | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/expr.h | 3 +- 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index a02b0b6ed52..aabb6ed963d 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -83,6 +83,8 @@ static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned, int); static void emit_block_move_via_sized_loop (rtx, rtx, rtx, unsigned, unsigned); static void emit_block_move_via_oriented_loop (rtx, rtx, rtx, unsigned, unsigned); +static rtx emit_block_cmp_via_loop (rtx, rtx, rtx, tree, rtx, bool, + unsigned, unsigned); static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int); static rtx_insn *compress_float_constant (rtx, rtx); static rtx get_subtarget (rtx); @@ -2583,7 +2585,7 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, rtx emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target, bool equality_only, by_pieces_constfn y_cfn, - void *y_cfndata) + void *y_cfndata, unsigned ctz_size) { rtx result = 0; @@ -2605,8 +2607,137 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target, else result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align); + if (!result && (flag_inline_stringops & ILSOP_MEMCMP)) + result = emit_block_cmp_via_loop (x, y, len, len_type, + target, equality_only, + align, ctz_size); + return result; } + +rtx +emit_block_cmp_via_loop (rtx x, rtx y, rtx len, tree size_type, rtx target, + bool equality_only, unsigned align, unsigned ctz_size) +{ + unsigned incr = align / BITS_PER_UNIT; + + if (CONST_INT_P (len)) + ctz_size = MAX (ctz_size, (unsigned) wi::ctz (UINTVAL (len))); + + if (HOST_WIDE_INT_1U << ctz_size < (unsigned HOST_WIDE_INT) incr) + incr = HOST_WIDE_INT_1U << ctz_size; + + while (incr > 1 + && !(equality_only + ? can_do_by_pieces (incr, align, COMPARE_BY_PIECES) + : int_mode_for_size (incr, 0).exists ())) + incr >>= 1; + + rtx_code_label *cmp_label, *top_label, *ne_label, *res_label; + rtx iter, x_addr, y_addr, tmp; + machine_mode x_addr_mode = get_address_mode (x); + machine_mode y_addr_mode = get_address_mode (y); + machine_mode iter_mode; + + iter_mode = GET_MODE (len); + if (iter_mode == VOIDmode) + iter_mode = word_mode; + + top_label = gen_label_rtx (); + cmp_label = gen_label_rtx (); + ne_label = gen_label_rtx (); + res_label = gen_label_rtx (); + iter = gen_reg_rtx (iter_mode); + + rtx iter_init = const0_rtx; + rtx_code iter_cond = LT; + rtx iter_limit = len; + rtx iter_incr = GEN_INT (incr); + machine_mode cmp_mode; + + emit_move_insn (iter, iter_init); + + scalar_int_mode int_cmp_mode + = smallest_int_mode_for_size (incr * BITS_PER_UNIT); + if (GET_MODE_BITSIZE (int_cmp_mode) != incr * BITS_PER_UNIT) + { + cmp_mode = BLKmode; + gcc_checking_assert (can_do_by_pieces (incr, align, COMPARE_BY_PIECES)); + if (!equality_only) + return NULL_RTX; + } + else + cmp_mode = int_cmp_mode; + + x_addr = force_operand (XEXP (x, 0), NULL_RTX); + y_addr = force_operand (XEXP (y, 0), NULL_RTX); + do_pending_stack_adjust (); + + emit_jump (cmp_label); + emit_label (top_label); + + tmp = convert_modes (x_addr_mode, iter_mode, iter, true); + x_addr = simplify_gen_binary (PLUS, x_addr_mode, x_addr, tmp); + + if (x_addr_mode != y_addr_mode) + tmp = convert_modes (y_addr_mode, iter_mode, iter, true); + y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp); + + x = change_address (x, cmp_mode, x_addr); + y = change_address (y, cmp_mode, y_addr); + + rtx part_res; + if (cmp_mode == BLKmode) + part_res = compare_by_pieces (x, y, incr, target, align, 0, 0); + else + part_res = expand_binop (cmp_mode, sub_optab, x, y, NULL_RTX, + true, OPTAB_LIB_WIDEN); + + emit_cmp_and_jump_insns (part_res, GEN_INT (0), NE, NULL_RTX, + GET_MODE (part_res), true, ne_label, + profile_probability::guessed_always () + .apply_scale (9, 10)); + + tmp = expand_simple_binop (iter_mode, PLUS, iter, iter_incr, iter, + true, OPTAB_LIB_WIDEN); + if (tmp != iter) + emit_move_insn (iter, tmp); + + emit_label (cmp_label); + emit_cmp_and_jump_insns (iter, iter_limit, iter_cond, NULL_RTX, iter_mode, + true, top_label, + profile_probability::guessed_always () + .apply_scale (9, 10)); + + if (target == NULL_RTX + || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + target = gen_reg_rtx (TYPE_MODE (integer_type_node)); + + emit_move_insn (target, const0_rtx); + emit_jump (res_label); + emit_barrier (); + + emit_label (ne_label); + + if (equality_only) + emit_move_insn (target, const1_rtx); + else if (incr == 1) + convert_move (target, part_res, false); + else + { + /* ??? Re-compare the block found to be different one byte at a + time. We could do better using part_res, and being careful + about endianness. */ + part_res = emit_block_cmp_via_loop (x, y, GEN_INT (incr), size_type, + target, equality_only, 1, 0); + convert_move (target, part_res, false); + } + + emit_label (res_label); + + return target; +} + /* Copy all or part of a value X into registers starting at REGNO. The number of registers to be filled is NREGS. */ diff --git a/gcc/expr.h b/gcc/expr.h index d9fc47c9114..6366675f231 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -138,7 +138,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, bool might_overlap = false, unsigned ctz_size = 0); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, - by_pieces_constfn, void *); + by_pieces_constfn, void *, + unsigned ctz_size = 0); extern bool emit_storent_insn (rtx to, rtx from); /* Copy all or part of a value X into registers starting at REGNO.