From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2140) id DA4743858C00; Thu, 26 Jan 2023 08:21:39 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DA4743858C00 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1674721299; bh=sSY1J7iIl8F9QnyMgRQfPmH+OmgUgydllYVZzOYJUic=; h=From:To:Subject:Date:From; b=f53A730YzjlVTxjWeqz4Tyt5FRAUPy/DsEtWn+lGasStSEbbTjuMX/B9wINZ5rr+n eQKgUzkzsbmgapsKPxxPBNnHNOSARRCApeNwSnpy2gNMWHrr+tVExz+VDQwoar5QPZ RNwyTZ6Yre3E9Y9Qq4p1ncd9J6BiWCGVoAjIW+HA= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Alexandre Oliva To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/aoliva/heads/testme)] add memcmp loop expander X-Act-Checkin: gcc X-Git-Author: Alexandre Oliva X-Git-Refname: refs/users/aoliva/heads/testme X-Git-Oldrev: cd610334640ffd43dfd9313f8405e139b6a973fa X-Git-Newrev: c234c022348c1ada7594294ade986951521a6a5b Message-Id: <20230126082139.DA4743858C00@sourceware.org> Date: Thu, 26 Jan 2023 08:21:39 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c234c022348c1ada7594294ade986951521a6a5b commit c234c022348c1ada7594294ade986951521a6a5b Author: Alexandre Oliva Date: Fri Jan 20 22:01:15 2023 -0300 add memcmp loop expander Diff: --- gcc/builtins.cc | 3 +- gcc/expr.cc | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/expr.h | 3 +- 3 files changed, 167 insertions(+), 4 deletions(-) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index ca4d4721cdc..e55b53485e2 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -4809,7 +4809,8 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, TREE_TYPE (len), target, result_eq, constfn, - CONST_CAST (char *, rep)); + CONST_CAST (char *, rep), + tree_ctz (len)); if (result) { diff --git a/gcc/expr.cc b/gcc/expr.cc index aceb3f514fc..7cb693525ba 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -83,6 +83,8 @@ static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned, int); static void emit_block_move_via_sized_loop (rtx, rtx, rtx, unsigned, unsigned); static void emit_block_move_via_oriented_loop (rtx, rtx, rtx, unsigned, unsigned); +static rtx emit_block_cmp_via_loop (rtx, rtx, rtx, tree, rtx, bool, + unsigned, unsigned); static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int); static rtx_insn *compress_float_constant (rtx, rtx); static rtx get_subtarget (rtx); @@ -2569,7 +2571,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, Both X and Y must be MEM rtx's. LEN is an rtx that says how long they are. LEN_TYPE is the type of the expression that was used to - calculate it. + calculate it, and CTZ_LEN is the known trailing-zeros count of LEN, + so LEN must be a multiple of 1< 1 + && !can_do_by_pieces (incr, align, COMPARE_BY_PIECES)) + incr >>= 1; + + rtx_code_label *cmp_label, *top_label, *ne_label, *res_label; + rtx iter, x_addr, y_addr, tmp; + machine_mode x_addr_mode = get_address_mode (x); + machine_mode y_addr_mode = get_address_mode (y); + machine_mode iter_mode; + + iter_mode = GET_MODE (len); + if (iter_mode == VOIDmode) + iter_mode = word_mode; + + top_label = gen_label_rtx (); + cmp_label = gen_label_rtx (); + ne_label = gen_label_rtx (); + res_label = gen_label_rtx (); + iter = gen_reg_rtx (iter_mode); + + rtx iter_init = const0_rtx; + rtx_code iter_cond = LT; + rtx iter_limit = len; + rtx iter_incr = GEN_INT (incr); + machine_mode cmp_mode; + + emit_move_insn (iter, iter_init); + + scalar_int_mode int_cmp_mode + = smallest_int_mode_for_size (incr * BITS_PER_UNIT); + if (GET_MODE_BITSIZE (int_cmp_mode) != incr * BITS_PER_UNIT + || !can_compare_p (NE, int_cmp_mode, ccp_jump)) + { + cmp_mode = BLKmode; + gcc_checking_assert (incr != 1); + } + else + cmp_mode = int_cmp_mode; + + x_addr = force_operand (XEXP (x, 0), NULL_RTX); + y_addr = force_operand (XEXP (y, 0), NULL_RTX); + do_pending_stack_adjust (); + + emit_jump (cmp_label); + emit_label (top_label); + + tmp = convert_modes (x_addr_mode, iter_mode, iter, true); + x_addr = simplify_gen_binary (PLUS, x_addr_mode, x_addr, tmp); + + if (x_addr_mode != y_addr_mode) + tmp = convert_modes (y_addr_mode, iter_mode, iter, true); + y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp); + + x = change_address (x, cmp_mode, x_addr); + y = change_address (y, cmp_mode, y_addr); + + rtx part_res; + if (cmp_mode == BLKmode) + part_res = compare_by_pieces (x, y, incr, target, align, 0, 0); + else + part_res = expand_binop (cmp_mode, sub_optab, x, y, NULL_RTX, + true, OPTAB_LIB_WIDEN); + + emit_cmp_and_jump_insns (part_res, GEN_INT (0), NE, NULL_RTX, + GET_MODE (part_res), true, ne_label, + profile_probability::guessed_always () + .apply_scale (1, 10)); + + tmp = expand_simple_binop (iter_mode, PLUS, iter, iter_incr, iter, + true, OPTAB_LIB_WIDEN); + if (tmp != iter) + emit_move_insn (iter, tmp); + + emit_label (cmp_label); + emit_cmp_and_jump_insns (iter, iter_limit, iter_cond, NULL_RTX, iter_mode, + true, top_label, + profile_probability::guessed_always () + .apply_scale (9, 10)); + + if (target == NULL_RTX + || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + target = gen_reg_rtx (TYPE_MODE (integer_type_node)); + + emit_move_insn (target, const0_rtx); + emit_jump (res_label); + emit_barrier (); + + emit_label (ne_label); + + if (equality_only) + emit_move_insn (target, const1_rtx); + else + { + if (incr > UNITS_PER_WORD) + /* ??? Re-compare the block found to be different one word at a + time. */ + part_res = emit_block_cmp_via_loop (x, y, GEN_INT (incr), size_type, + target, equality_only, + BITS_PER_WORD, 0); + else if (incr > 1) + /* ??? Re-compare the block found to be different one byte at a + time. We could do better using part_res, and being careful + about endianness. */ + part_res = emit_block_cmp_via_loop (x, y, GEN_INT (incr), size_type, + target, equality_only, + BITS_PER_UNIT, 0); + else if (GET_MODE_BITSIZE (GET_MODE (target)) + > GET_MODE_BITSIZE (cmp_mode)) + part_res = expand_binop (GET_MODE (target), sub_optab, x, y, target, + true, OPTAB_LIB_WIDEN); + else + { + /* In the odd chance target is QImode, we can't count on + widening subtract to capture the result of the unsigned + compares. */ + rtx_code_label *ltu_label; + ltu_label = gen_label_rtx (); + emit_cmp_and_jump_insns (x, y, LTU, NULL_RTX, + cmp_mode, true, ltu_label, + profile_probability::guessed_always () + .apply_scale (5, 10)); + + emit_move_insn (target, const1_rtx); + emit_jump (res_label); + emit_barrier (); + + emit_label (ltu_label); + emit_move_insn (target, constm1_rtx); + part_res = target; + } + + if (target != part_res) + convert_move (target, part_res, false); + } + + emit_label (res_label); + + return target; +} + /* Copy all or part of a value X into registers starting at REGNO. The number of registers to be filled is NREGS. */ diff --git a/gcc/expr.h b/gcc/expr.h index d9fc47c9114..976c8b69fc1 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -138,7 +138,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, bool might_overlap = false, unsigned ctz_size = 0); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, - by_pieces_constfn, void *); + by_pieces_constfn, void *, + unsigned ctz_len = 0); extern bool emit_storent_insn (rtx to, rtx from); /* Copy all or part of a value X into registers starting at REGNO.