public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Alexandre Oliva <aoliva@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/aoliva/heads/testme)] add memcmp loop expander Date: Thu, 26 Jan 2023 08:21:39 +0000 (GMT) [thread overview] Message-ID: <20230126082139.DA4743858C00@sourceware.org> (raw) https://gcc.gnu.org/g:c234c022348c1ada7594294ade986951521a6a5b commit c234c022348c1ada7594294ade986951521a6a5b Author: Alexandre Oliva <oliva@gnu.org> Date: Fri Jan 20 22:01:15 2023 -0300 add memcmp loop expander Diff: --- gcc/builtins.cc | 3 +- gcc/expr.cc | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- gcc/expr.h | 3 +- 3 files changed, 167 insertions(+), 4 deletions(-) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index ca4d4721cdc..e55b53485e2 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -4809,7 +4809,8 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, TREE_TYPE (len), target, result_eq, constfn, - CONST_CAST (char *, rep)); + CONST_CAST (char *, rep), + tree_ctz (len)); if (result) { diff --git a/gcc/expr.cc b/gcc/expr.cc index aceb3f514fc..7cb693525ba 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -83,6 +83,8 @@ static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned, int); static void emit_block_move_via_sized_loop (rtx, rtx, rtx, unsigned, unsigned); static void emit_block_move_via_oriented_loop (rtx, rtx, rtx, unsigned, unsigned); +static rtx emit_block_cmp_via_loop (rtx, rtx, rtx, tree, rtx, bool, + unsigned, unsigned); static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int); static rtx_insn *compress_float_constant (rtx, rtx); static rtx get_subtarget (rtx); @@ -2569,7 +2571,8 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, Both X and Y must be MEM rtx's. LEN is an rtx that says how long they are. LEN_TYPE is the type of the expression that was used to - calculate it. + calculate it, and CTZ_LEN is the known trailing-zeros count of LEN, + so LEN must be a multiple of 1<<CTZ_LEN even if it's not constant. If EQUALITY_ONLY is true, it means we don't have to return the tri-state value of a normal memcmp call, instead we can just compare for equality. @@ -2585,7 +2588,7 @@ emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, rtx emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target, bool equality_only, by_pieces_constfn y_cfn, - void *y_cfndata) + void *y_cfndata, unsigned ctz_len) { rtx result = 0; @@ -2607,8 +2610,166 @@ emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target, else result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align); + if (!result && (flag_inline_stringops & ILSOP_MEMCMP)) + result = emit_block_cmp_via_loop (x, y, len, len_type, + target, equality_only, + align, ctz_len); + return result; } + +rtx +emit_block_cmp_via_loop (rtx x, rtx y, rtx len, tree size_type, rtx target, + bool equality_only, unsigned align, unsigned ctz_len) +{ + unsigned incr = align / BITS_PER_UNIT; + + if (CONST_INT_P (len)) + ctz_len = MAX (ctz_len, (unsigned) wi::ctz (UINTVAL (len))); + + if (HOST_WIDE_INT_1U << ctz_len < (unsigned HOST_WIDE_INT) incr) + incr = HOST_WIDE_INT_1U << ctz_len; + + while (incr > 1 + && !can_do_by_pieces (incr, align, COMPARE_BY_PIECES)) + incr >>= 1; + + rtx_code_label *cmp_label, *top_label, *ne_label, *res_label; + rtx iter, x_addr, y_addr, tmp; + machine_mode x_addr_mode = get_address_mode (x); + machine_mode y_addr_mode = get_address_mode (y); + machine_mode iter_mode; + + iter_mode = GET_MODE (len); + if (iter_mode == VOIDmode) + iter_mode = word_mode; + + top_label = gen_label_rtx (); + cmp_label = gen_label_rtx (); + ne_label = gen_label_rtx (); + res_label = gen_label_rtx (); + iter = gen_reg_rtx (iter_mode); + + rtx iter_init = const0_rtx; + rtx_code iter_cond = LT; + rtx iter_limit = len; + rtx iter_incr = GEN_INT (incr); + machine_mode cmp_mode; + + emit_move_insn (iter, iter_init); + + scalar_int_mode int_cmp_mode + = smallest_int_mode_for_size (incr * BITS_PER_UNIT); + if (GET_MODE_BITSIZE (int_cmp_mode) != incr * BITS_PER_UNIT + || !can_compare_p (NE, int_cmp_mode, ccp_jump)) + { + cmp_mode = BLKmode; + gcc_checking_assert (incr != 1); + } + else + cmp_mode = int_cmp_mode; + + x_addr = force_operand (XEXP (x, 0), NULL_RTX); + y_addr = force_operand (XEXP (y, 0), NULL_RTX); + do_pending_stack_adjust (); + + emit_jump (cmp_label); + emit_label (top_label); + + tmp = convert_modes (x_addr_mode, iter_mode, iter, true); + x_addr = simplify_gen_binary (PLUS, x_addr_mode, x_addr, tmp); + + if (x_addr_mode != y_addr_mode) + tmp = convert_modes (y_addr_mode, iter_mode, iter, true); + y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp); + + x = change_address (x, cmp_mode, x_addr); + y = change_address (y, cmp_mode, y_addr); + + rtx part_res; + if (cmp_mode == BLKmode) + part_res = compare_by_pieces (x, y, incr, target, align, 0, 0); + else + part_res = expand_binop (cmp_mode, sub_optab, x, y, NULL_RTX, + true, OPTAB_LIB_WIDEN); + + emit_cmp_and_jump_insns (part_res, GEN_INT (0), NE, NULL_RTX, + GET_MODE (part_res), true, ne_label, + profile_probability::guessed_always () + .apply_scale (1, 10)); + + tmp = expand_simple_binop (iter_mode, PLUS, iter, iter_incr, iter, + true, OPTAB_LIB_WIDEN); + if (tmp != iter) + emit_move_insn (iter, tmp); + + emit_label (cmp_label); + emit_cmp_and_jump_insns (iter, iter_limit, iter_cond, NULL_RTX, iter_mode, + true, top_label, + profile_probability::guessed_always () + .apply_scale (9, 10)); + + if (target == NULL_RTX + || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) + target = gen_reg_rtx (TYPE_MODE (integer_type_node)); + + emit_move_insn (target, const0_rtx); + emit_jump (res_label); + emit_barrier (); + + emit_label (ne_label); + + if (equality_only) + emit_move_insn (target, const1_rtx); + else + { + if (incr > UNITS_PER_WORD) + /* ??? Re-compare the block found to be different one word at a + time. */ + part_res = emit_block_cmp_via_loop (x, y, GEN_INT (incr), size_type, + target, equality_only, + BITS_PER_WORD, 0); + else if (incr > 1) + /* ??? Re-compare the block found to be different one byte at a + time. We could do better using part_res, and being careful + about endianness. */ + part_res = emit_block_cmp_via_loop (x, y, GEN_INT (incr), size_type, + target, equality_only, + BITS_PER_UNIT, 0); + else if (GET_MODE_BITSIZE (GET_MODE (target)) + > GET_MODE_BITSIZE (cmp_mode)) + part_res = expand_binop (GET_MODE (target), sub_optab, x, y, target, + true, OPTAB_LIB_WIDEN); + else + { + /* In the odd chance target is QImode, we can't count on + widening subtract to capture the result of the unsigned + compares. */ + rtx_code_label *ltu_label; + ltu_label = gen_label_rtx (); + emit_cmp_and_jump_insns (x, y, LTU, NULL_RTX, + cmp_mode, true, ltu_label, + profile_probability::guessed_always () + .apply_scale (5, 10)); + + emit_move_insn (target, const1_rtx); + emit_jump (res_label); + emit_barrier (); + + emit_label (ltu_label); + emit_move_insn (target, constm1_rtx); + part_res = target; + } + + if (target != part_res) + convert_move (target, part_res, false); + } + + emit_label (res_label); + + return target; +} + \f /* Copy all or part of a value X into registers starting at REGNO. The number of registers to be filled is NREGS. */ diff --git a/gcc/expr.h b/gcc/expr.h index d9fc47c9114..976c8b69fc1 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -138,7 +138,8 @@ extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods, bool might_overlap = false, unsigned ctz_size = 0); extern rtx emit_block_cmp_hints (rtx, rtx, rtx, tree, rtx, bool, - by_pieces_constfn, void *); + by_pieces_constfn, void *, + unsigned ctz_len = 0); extern bool emit_storent_insn (rtx to, rtx from); /* Copy all or part of a value X into registers starting at REGNO.
next reply other threads:[~2023-01-26 8:21 UTC|newest] Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-01-26 8:21 Alexandre Oliva [this message] -- strict thread matches above, loose matches on Subject: below -- 2023-01-27 5:58 Alexandre Oliva 2023-01-27 2:25 Alexandre Oliva 2023-01-27 1:57 Alexandre Oliva 2023-01-26 8:44 Alexandre Oliva 2023-01-26 8:01 Alexandre Oliva 2023-01-26 7:02 Alexandre Oliva 2023-01-26 6:40 Alexandre Oliva 2023-01-26 6:38 Alexandre Oliva 2023-01-26 6:34 Alexandre Oliva 2023-01-26 6:14 Alexandre Oliva 2023-01-26 6:09 Alexandre Oliva 2023-01-26 5:17 Alexandre Oliva 2023-01-26 4:59 Alexandre Oliva 2023-01-26 3:37 Alexandre Oliva 2023-01-21 1:06 Alexandre Oliva
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230126082139.DA4743858C00@sourceware.org \ --to=aoliva@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).