From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1880) id 3D852382D444; Fri, 27 May 2022 04:55:49 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3D852382D444 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Max Filippov To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-789] xtensa: Add setmemsi insn pattern X-Act-Checkin: gcc X-Git-Author: Takayuki 'January June' Suwa X-Git-Refname: refs/heads/master X-Git-Oldrev: b2cb88fb45a68fc91bda1c69d9486e824c4a559e X-Git-Newrev: 6454b4a8f5d90dd355c3c7e31a592a439223b645 Message-Id: <20220527045549.3D852382D444@sourceware.org> Date: Fri, 27 May 2022 04:55:49 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 27 May 2022 04:55:49 -0000 https://gcc.gnu.org/g:6454b4a8f5d90dd355c3c7e31a592a439223b645 commit r13-789-g6454b4a8f5d90dd355c3c7e31a592a439223b645 Author: Takayuki 'January June' Suwa Date: Tue May 24 00:52:44 2022 +0900 xtensa: Add setmemsi insn pattern This patch introduces setmemsi insn pattern of two kinds, unrolled loop and small loop, for fixed small length and constant initialization value. gcc/ChangeLog: * config/xtensa/xtensa-protos.h (xtensa_expand_block_set_unrolled_loop, xtensa_expand_block_set_small_loop): New prototypes. * config/xtensa/xtensa.cc (xtensa_sizeof_MOVI, xtensa_expand_block_set_unrolled_loop, xtensa_expand_block_set_small_loop): New functions. * config/xtensa/xtensa.md (setmemsi): New expansion pattern. * config/xtensa/xtensa.opt (mlongcalls): Add target mask. Diff: --- gcc/config/xtensa/xtensa-protos.h | 2 + gcc/config/xtensa/xtensa.cc | 211 ++++++++++++++++++++++++++++++++++++++ gcc/config/xtensa/xtensa.md | 16 +++ gcc/config/xtensa/xtensa.opt | 2 +- 4 files changed, 230 insertions(+), 1 deletion(-) diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h index 4bc42da2320..30e4b54394a 100644 --- a/gcc/config/xtensa/xtensa-protos.h +++ b/gcc/config/xtensa/xtensa-protos.h @@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode); extern int xtensa_expand_conditional_move (rtx *, int); extern int xtensa_expand_scc (rtx *, machine_mode); extern int xtensa_expand_block_move (rtx *); +extern int xtensa_expand_block_set_unrolled_loop (rtx *); +extern int xtensa_expand_block_set_small_loop (rtx *); extern void xtensa_split_operand_pair (rtx *, machine_mode); extern int xtensa_emit_move_sequence (rtx *, machine_mode); extern rtx xtensa_copy_incoming_a7 (rtx); diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index d2aabf38339..c7b54babc37 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -1373,6 +1373,217 @@ xtensa_expand_block_move (rtx *operands) } +/* Try to expand a block set operation to a sequence of RTL move + instructions. If not optimizing, or if the block size is not a + constant, or if the block is too large, or if the value to + initialize the block with is not a constant, the expansion + fails and GCC falls back to calling memset(). + + operands[0] is the destination + operands[1] is the length + operands[2] is the initialization value + operands[3] is the alignment */ + +static int +xtensa_sizeof_MOVI (HOST_WIDE_INT imm) +{ + return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3; +} + +int +xtensa_expand_block_set_unrolled_loop (rtx *operands) +{ + rtx dst_mem = operands[0]; + HOST_WIDE_INT bytes, value, align; + int expand_len, funccall_len; + rtx x, reg; + int offset; + + if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) + return 0; + + bytes = INTVAL (operands[1]); + if (bytes <= 0) + return 0; + value = (int8_t)INTVAL (operands[2]); + align = INTVAL (operands[3]); + if (align > MOVE_MAX) + align = MOVE_MAX; + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ + if (align == 1) + expand_len = xtensa_sizeof_MOVI (value); + else if (value == 0 || value == -1) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; + /* Insn expansion: a series of aligned memory stores. + Consist of S8I, S16I or S32I(.N). */ + expand_len += (bytes / align) * (TARGET_DENSITY + && align == 4 ? 2 : 3); + /* Insn expansion: the remainder, sub-aligned memory stores. + A combination of S8I and S16I as needed. */ + expand_len += ((bytes % align + 1) / 2) * 3; + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); + funccall_len += xtensa_sizeof_MOVI (bytes); + /* Function call: calling memset(). */ + funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; + + /* Apply expansion bonus (2x) if optimizing for speed. */ + if (optimize > 1 && !optimize_size) + funccall_len *= 2; + + /* Decide whether to expand or not, based on the sum of the length + of instructions. */ + if (expand_len > funccall_len) + return 0; + + x = XEXP (dst_mem, 0); + if (!REG_P (x)) + dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x)); + switch (align) + { + case 1: + break; + case 2: + value = (int16_t)((uint8_t)value * 0x0101U); + break; + case 4: + value = (int32_t)((uint8_t)value * 0x01010101U); + break; + default: + gcc_unreachable (); + } + reg = force_reg (SImode, GEN_INT (value)); + + offset = 0; + do + { + int unit_size = MIN (bytes, align); + machine_mode unit_mode = (unit_size >= 4 ? SImode : + (unit_size >= 2 ? HImode : + QImode)); + unit_size = GET_MODE_SIZE (unit_mode); + + emit_move_insn (adjust_address (dst_mem, unit_mode, offset), + unit_mode == SImode ? reg + : convert_to_mode (unit_mode, reg, true)); + + offset += unit_size; + bytes -= unit_size; + } + while (bytes > 0); + + return 1; +} + +int +xtensa_expand_block_set_small_loop (rtx *operands) +{ + HOST_WIDE_INT bytes, value, align; + int expand_len, funccall_len; + rtx x, dst, end, reg; + machine_mode unit_mode; + rtx_code_label *label; + + if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2])) + return 0; + + bytes = INTVAL (operands[1]); + if (bytes <= 0) + return 0; + value = (int8_t)INTVAL (operands[2]); + align = INTVAL (operands[3]); + if (align > MOVE_MAX) + align = MOVE_MAX; + + /* Totally-aligned block only. */ + if (bytes % align != 0) + return 0; + + /* If 4-byte aligned, small loop substitution is almost optimal, thus + limited to only offset to the end address for ADDI/ADDMI instruction. */ + if (align == 4 + && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0))) + return 0; + + /* If no 4-byte aligned, loop count should be treated as the constraint. */ + if (align != 4 + && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15)) + return 0; + + /* Insn expansion: holding the init value. + Either MOV(.N) or L32R w/litpool. */ + if (align == 1) + expand_len = xtensa_sizeof_MOVI (value); + else if (value == 0 || value == -1) + expand_len = TARGET_DENSITY ? 2 : 3; + else + expand_len = 3 + 4; + /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */ + expand_len += bytes > 127 ? 3 + : (TARGET_DENSITY && bytes <= 15) ? 2 : 3; + + /* Insn expansion: the loop body and branch instruction. + For store, one of S8I, S16I or S32I(.N). + For advance, ADDI(.N). + For branch, BNE. */ + expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3) + + (TARGET_DENSITY ? 2 : 3) + 3; + + /* Function call: preparing two arguments. */ + funccall_len = xtensa_sizeof_MOVI (value); + funccall_len += xtensa_sizeof_MOVI (bytes); + /* Function call: calling memset(). */ + funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3; + + /* Apply expansion bonus (2x) if optimizing for speed. */ + if (optimize > 1 && !optimize_size) + funccall_len *= 2; + + /* Decide whether to expand or not, based on the sum of the length + of instructions. */ + if (expand_len > funccall_len) + return 0; + + x = XEXP (operands[0], 0); + if (!REG_P (x)) + x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0); + dst = gen_reg_rtx (SImode); + emit_move_insn (dst, x); + end = gen_reg_rtx (SImode); + emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */)); + switch (align) + { + case 1: + unit_mode = QImode; + break; + case 2: + value = (int16_t)((uint8_t)value * 0x0101U); + unit_mode = HImode; + break; + case 4: + value = (int32_t)((uint8_t)value * 0x01010101U); + unit_mode = SImode; + break; + default: + gcc_unreachable (); + } + reg = force_reg (unit_mode, GEN_INT (value)); + + label = gen_label_rtx (); + emit_label (label); + emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg); + emit_insn (gen_addsi3 (dst, dst, GEN_INT (align))); + emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label); + + return 1; +} + + void xtensa_expand_nonlocal_goto (rtx *operands) { diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 96e043b26b5..2d146b7995c 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -1080,6 +1080,22 @@ DONE; }) +;; Block sets + +(define_expand "setmemsi" + [(match_operand:BLK 0 "memory_operand") + (match_operand:SI 1 "") + (match_operand:SI 2 "") + (match_operand:SI 3 "const_int_operand")] + "!optimize_debug && optimize" +{ + if (xtensa_expand_block_set_unrolled_loop (operands)) + DONE; + if (xtensa_expand_block_set_small_loop (operands)) + DONE; + FAIL; +}) + ;; Shift instructions. diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt index c406297af0d..1fc68a3d994 100644 --- a/gcc/config/xtensa/xtensa.opt +++ b/gcc/config/xtensa/xtensa.opt @@ -27,7 +27,7 @@ Target Mask(FORCE_NO_PIC) Disable position-independent code (PIC) for use in OS kernel code. mlongcalls -Target +Target Mask(LONGCALLS) Use indirect CALLXn instructions for large programs. mtarget-align