* [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
@ 2022-05-23 15:52 Takayuki 'January June' Suwa
2022-05-26 16:57 ` Max Filippov
2022-05-27 4:58 ` Max Filippov
0 siblings, 2 replies; 6+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-05-23 15:52 UTC (permalink / raw)
To: gcc-patches
This patch introduces setmemsi insn pattern of two kinds, unrolled loop and
small loop, for fixed small length and constant initialization value.
gcc/ChangeLog:
* gcc/config/xtensa/xtensa-protos.h
(xtensa_expand_block_set_unrolled_loop,
xtensa_expand_block_set_small_loop): New prototypes.
* gcc/config/xtensa/xtensa.cc (xtensa_sizeof_MOVI,
xtensa_expand_block_set_unrolled_loop,
xtensa_expand_block_set_small_loop): New functions.
* gcc/config/xtensa/xtensa.md (setmemsi): New expansion pattern.
* gcc/config/xtensa/xtensa.opt (mlongcalls): Add target mask.
---
gcc/config/xtensa/xtensa-protos.h | 2 +
gcc/config/xtensa/xtensa.cc | 211 ++++++++++++++++++++++++++++++
gcc/config/xtensa/xtensa.md | 16 +++
gcc/config/xtensa/xtensa.opt | 2 +-
4 files changed, 230 insertions(+), 1 deletion(-)
diff --git a/gcc/config/xtensa/xtensa-protos.h
b/gcc/config/xtensa/xtensa-protos.h
index 4bc42da2320..30e4b54394a 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -41,6 +41,8 @@ extern void xtensa_expand_conditional_branch (rtx *,
machine_mode);
extern int xtensa_expand_conditional_move (rtx *, int);
extern int xtensa_expand_scc (rtx *, machine_mode);
extern int xtensa_expand_block_move (rtx *);
+extern int xtensa_expand_block_set_unrolled_loop (rtx *);
+extern int xtensa_expand_block_set_small_loop (rtx *);
extern void xtensa_split_operand_pair (rtx *, machine_mode);
extern int xtensa_emit_move_sequence (rtx *, machine_mode);
extern rtx xtensa_copy_incoming_a7 (rtx);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d2aabf38339..c7b54babc37 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -1373,6 +1373,217 @@ xtensa_expand_block_move (rtx *operands)
}
+/* Try to expand a block set operation to a sequence of RTL move
+ instructions. If not optimizing, or if the block size is not a
+ constant, or if the block is too large, or if the value to
+ initialize the block with is not a constant, the expansion
+ fails and GCC falls back to calling memset().
+
+ operands[0] is the destination
+ operands[1] is the length
+ operands[2] is the initialization value
+ operands[3] is the alignment */
+
+static int
+xtensa_sizeof_MOVI (HOST_WIDE_INT imm)
+{
+ return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3;
+}
+
+int
+xtensa_expand_block_set_unrolled_loop (rtx *operands)
+{
+ rtx dst_mem = operands[0];
+ HOST_WIDE_INT bytes, value, align;
+ int expand_len, funccall_len;
+ rtx x, reg;
+ int offset;
+
+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2]))
+ return 0;
+
+ bytes = INTVAL (operands[1]);
+ if (bytes <= 0)
+ return 0;
+ value = (int8_t)INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+ if (align > MOVE_MAX)
+ align = MOVE_MAX;
+
+ /* Insn expansion: holding the init value.
+ Either MOV(.N) or L32R w/litpool. */
+ if (align == 1)
+ expand_len = xtensa_sizeof_MOVI (value);
+ else if (value == 0 || value == -1)
+ expand_len = TARGET_DENSITY ? 2 : 3;
+ else
+ expand_len = 3 + 4;
+ /* Insn expansion: a series of aligned memory stores.
+ Consist of S8I, S16I or S32I(.N). */
+ expand_len += (bytes / align) * (TARGET_DENSITY
+ && align == 4 ? 2 : 3);
+ /* Insn expansion: the remainder, sub-aligned memory stores.
+ A combination of S8I and S16I as needed. */
+ expand_len += ((bytes % align + 1) / 2) * 3;
+
+ /* Function call: preparing two arguments. */
+ funccall_len = xtensa_sizeof_MOVI (value);
+ funccall_len += xtensa_sizeof_MOVI (bytes);
+ /* Function call: calling memset(). */
+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3;
+
+ /* Apply expansion bonus (2x) if optimizing for speed. */
+ if (optimize > 1 && !optimize_size)
+ funccall_len *= 2;
+
+ /* Decide whether to expand or not, based on the sum of the length
+ of instructions. */
+ if (expand_len > funccall_len)
+ return 0;
+
+ x = XEXP (dst_mem, 0);
+ if (!REG_P (x))
+ dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x));
+ switch (align)
+ {
+ case 1:
+ break;
+ case 2:
+ value = (int16_t)((uint8_t)value * 0x0101U);
+ break;
+ case 4:
+ value = (int32_t)((uint8_t)value * 0x01010101U);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ reg = force_reg (SImode, GEN_INT (value));
+
+ offset = 0;
+ do
+ {
+ int unit_size = MIN (bytes, align);
+ machine_mode unit_mode = (unit_size >= 4 ? SImode :
+ (unit_size >= 2 ? HImode :
+ QImode));
+ unit_size = GET_MODE_SIZE (unit_mode);
+
+ emit_move_insn (adjust_address (dst_mem, unit_mode, offset),
+ unit_mode == SImode ? reg
+ : convert_to_mode (unit_mode, reg, true));
+
+ offset += unit_size;
+ bytes -= unit_size;
+ }
+ while (bytes > 0);
+
+ return 1;
+}
+
+int
+xtensa_expand_block_set_small_loop (rtx *operands)
+{
+ HOST_WIDE_INT bytes, value, align;
+ int expand_len, funccall_len;
+ rtx x, dst, end, reg;
+ machine_mode unit_mode;
+ rtx_code_label *label;
+
+ if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2]))
+ return 0;
+
+ bytes = INTVAL (operands[1]);
+ if (bytes <= 0)
+ return 0;
+ value = (int8_t)INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+ if (align > MOVE_MAX)
+ align = MOVE_MAX;
+
+ /* Totally-aligned block only. */
+ if (bytes % align != 0)
+ return 0;
+
+ /* If 4-byte aligned, small loop substitution is almost optimal, thus
+ limited to only offset to the end address for ADDI/ADDMI
instruction. */
+ if (align == 4
+ && ! (bytes <= 127 || (bytes <= 32512 && bytes % 256 == 0)))
+ return 0;
+
+ /* If no 4-byte aligned, loop count should be treated as the
constraint. */
+ if (align != 4
+ && bytes / align > ((optimize > 1 && !optimize_size) ? 8 : 15))
+ return 0;
+
+ /* Insn expansion: holding the init value.
+ Either MOV(.N) or L32R w/litpool. */
+ if (align == 1)
+ expand_len = xtensa_sizeof_MOVI (value);
+ else if (value == 0 || value == -1)
+ expand_len = TARGET_DENSITY ? 2 : 3;
+ else
+ expand_len = 3 + 4;
+ /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */
+ expand_len += bytes > 127 ? 3
+ : (TARGET_DENSITY && bytes <= 15) ? 2 : 3;
+
+ /* Insn expansion: the loop body and branch instruction.
+ For store, one of S8I, S16I or S32I(.N).
+ For advance, ADDI(.N).
+ For branch, BNE. */
+ expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3)
+ + (TARGET_DENSITY ? 2 : 3) + 3;
+
+ /* Function call: preparing two arguments. */
+ funccall_len = xtensa_sizeof_MOVI (value);
+ funccall_len += xtensa_sizeof_MOVI (bytes);
+ /* Function call: calling memset(). */
+ funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3;
+
+ /* Apply expansion bonus (2x) if optimizing for speed. */
+ if (optimize > 1 && !optimize_size)
+ funccall_len *= 2;
+
+ /* Decide whether to expand or not, based on the sum of the length
+ of instructions. */
+ if (expand_len > funccall_len)
+ return 0;
+
+ x = XEXP (operands[0], 0);
+ if (!REG_P (x))
+ x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode,
x)), 0);
+ dst = gen_reg_rtx (SImode);
+ emit_move_insn (dst, x);
+ end = gen_reg_rtx (SImode);
+ emit_insn (gen_addsi3 (end, dst, operands[1] /* the length */));
+ switch (align)
+ {
+ case 1:
+ unit_mode = QImode;
+ break;
+ case 2:
+ value = (int16_t)((uint8_t)value * 0x0101U);
+ unit_mode = HImode;
+ break;
+ case 4:
+ value = (int32_t)((uint8_t)value * 0x01010101U);
+ unit_mode = SImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ reg = force_reg (unit_mode, GEN_INT (value));
+
+ label = gen_label_rtx ();
+ emit_label (label);
+ emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg);
+ emit_insn (gen_addsi3 (dst, dst, GEN_INT (align)));
+ emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label);
+
+ return 1;
+}
+
+
void
xtensa_expand_nonlocal_goto (rtx *operands)
{
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 96e043b26b5..2d146b7995c 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1080,6 +1080,22 @@
DONE;
})
+;; Block sets
+
+(define_expand "setmemsi"
+ [(match_operand:BLK 0 "memory_operand")
+ (match_operand:SI 1 "")
+ (match_operand:SI 2 "")
+ (match_operand:SI 3 "const_int_operand")]
+ "!optimize_debug && optimize"
+{
+ if (xtensa_expand_block_set_unrolled_loop (operands))
+ DONE;
+ if (xtensa_expand_block_set_small_loop (operands))
+ DONE;
+ FAIL;
+})
+
\f
;; Shift instructions.
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
index c406297af0d..1fc68a3d994 100644
--- a/gcc/config/xtensa/xtensa.opt
+++ b/gcc/config/xtensa/xtensa.opt
@@ -27,7 +27,7 @@ Target Mask(FORCE_NO_PIC)
Disable position-independent code (PIC) for use in OS kernel code.
mlongcalls
-Target
+Target Mask(LONGCALLS)
Use indirect CALLXn instructions for large programs.
mtarget-align
--
2.20.1
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
2022-05-23 15:52 [PATCH v3 4/5] xtensa: Add setmemsi insn pattern Takayuki 'January June' Suwa
@ 2022-05-26 16:57 ` Max Filippov
2022-05-27 3:00 ` Takayuki 'January June' Suwa
2022-05-27 4:58 ` Max Filippov
1 sibling, 1 reply; 6+ messages in thread
From: Max Filippov @ 2022-05-26 16:57 UTC (permalink / raw)
To: Takayuki 'January June' Suwa; +Cc: GCC Patches
On Mon, May 23, 2022 at 8:52 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch introduces setmemsi insn pattern of two kinds, unrolled loop and
> small loop, for fixed small length and constant initialization value.
>
> gcc/ChangeLog:
>
> * gcc/config/xtensa/xtensa-protos.h
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New prototypes.
> * gcc/config/xtensa/xtensa.cc (xtensa_sizeof_MOVI,
> xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New functions.
> * gcc/config/xtensa/xtensa.md (setmemsi): New expansion pattern.
> * gcc/config/xtensa/xtensa.opt (mlongcalls): Add target mask.
> ---
> gcc/config/xtensa/xtensa-protos.h | 2 +
> gcc/config/xtensa/xtensa.cc | 211 ++++++++++++++++++++++++++++++
> gcc/config/xtensa/xtensa.md | 16 +++
> gcc/config/xtensa/xtensa.opt | 2 +-
> 4 files changed, 230 insertions(+), 1 deletion(-)
With this patch applied for the following test program
void f(char *p);
void g(void)
{
char c[72] = {0};
f(c);
}
the following code is generated with -O2:
.text
.literal_position
.literal .LC0, f@PLT
.align 4
.global g
.type g, @function
g:
entry sp, 112
movi.n a10, 0
s32i.n a10, sp, 0
addi.n a9, sp, 4
movi.n a8, 0x11
loop a8, .L2_LEND
.L2:
s32i.n a10, a9, 0
addi.n a9, a9, 4
.L2_LEND:
l32r a8, .LC0
mov.n a10, sp
callx8 a8
retw.n
The part
s32i.n a10, sp, 0
addi.n a9, sp, 4
movi.n a8, 0x11
looks redundant and could be just
mov a9, sp
movi a8, 0x12
is that something that can be addressed in this patch?
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
2022-05-26 16:57 ` Max Filippov
@ 2022-05-27 3:00 ` Takayuki 'January June' Suwa
2022-05-27 4:57 ` Max Filippov
2022-05-27 9:23 ` Takayuki 'January June' Suwa
0 siblings, 2 replies; 6+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-05-27 3:00 UTC (permalink / raw)
To: Max Filippov; +Cc: GCC Patches
On 2022/05/27 1:57, Max Filippov wrote:
> is that something that can be addressed in this patch?
seems hard to resolve, because the RTL-generation pass passes only 68
bytes in that case:
> void f(char *p);
>
> void g(void)
> {
> char c[72] = {0};
> f(c);
> }
without this patch, we would get as:
g:
entry sp, 112
movi.n a8, 0
movi.n a12, 0x44 ; 68, not 72
mov.n a11, a8
addi.n a10, sp, 4 ; skipped first 4 bytes
s32i.n a8, sp, 0 ; cleared without using memset()
call8 memset
mov.n a10, sp
call8 f
retw.n
parhaps, it can be solved it by using peephole2 pattern... (depends on
whether peephole2 can capture code_label)
this behavior does not occur in configuration without zero-overhead
loop, eg. in xtensa-lx106 (ESP8266 SoC):
g:
addi sp, sp, -96
movi.n a3, 0
s32i a0, sp, 92
s32i.n a3, sp, 0
addi.n a2, sp, 4
addi a4, sp, 72
.L2:
s32i.n a3, a2, 0
addi.n a2, a2, 4
bne a2, a4, .L2
mov.n a2, sp
call0 f
l32i a0, sp, 92
addi sp, sp, 96
ret.n
in x86_64-linux:
g:
.LFB0:
.cfi_startproc
subq $88, %rsp
.cfi_def_cfa_offset 96
pxor %xmm0, %xmm0
movq %rsp, %rdi
movaps %xmm0, (%rsp)
movaps %xmm0, 16(%rsp)
movaps %xmm0, 32(%rsp)
movaps %xmm0, 48(%rsp)
movq $0, 64(%rsp)
call f@PLT
addq $88, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE0:
or, dword-aligned element:
void f(int *p);
void g(void)
{
int c[18] = { 0 };
f(c);
}
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
2022-05-27 3:00 ` Takayuki 'January June' Suwa
@ 2022-05-27 4:57 ` Max Filippov
2022-05-27 9:23 ` Takayuki 'January June' Suwa
1 sibling, 0 replies; 6+ messages in thread
From: Max Filippov @ 2022-05-27 4:57 UTC (permalink / raw)
To: Takayuki 'January June' Suwa; +Cc: GCC Patches
On Thu, May 26, 2022 at 8:00 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> On 2022/05/27 1:57, Max Filippov wrote:
> > is that something that can be addressed in this patch?
>
> seems hard to resolve, because the RTL-generation pass passes only 68
> bytes in that case:
...
> this behavior does not occur in configuration without zero-overhead
> loop, eg. in xtensa-lx106 (ESP8266 SoC):
Ok, I'll commit it as is then.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
2022-05-23 15:52 [PATCH v3 4/5] xtensa: Add setmemsi insn pattern Takayuki 'January June' Suwa
2022-05-26 16:57 ` Max Filippov
@ 2022-05-27 4:58 ` Max Filippov
1 sibling, 0 replies; 6+ messages in thread
From: Max Filippov @ 2022-05-27 4:58 UTC (permalink / raw)
To: Takayuki 'January June' Suwa; +Cc: GCC Patches
On Mon, May 23, 2022 at 8:52 AM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch introduces setmemsi insn pattern of two kinds, unrolled loop and
> small loop, for fixed small length and constant initialization value.
>
> gcc/ChangeLog:
>
> * gcc/config/xtensa/xtensa-protos.h
> (xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New prototypes.
> * gcc/config/xtensa/xtensa.cc (xtensa_sizeof_MOVI,
> xtensa_expand_block_set_unrolled_loop,
> xtensa_expand_block_set_small_loop): New functions.
> * gcc/config/xtensa/xtensa.md (setmemsi): New expansion pattern.
> * gcc/config/xtensa/xtensa.opt (mlongcalls): Add target mask.
> ---
> gcc/config/xtensa/xtensa-protos.h | 2 +
> gcc/config/xtensa/xtensa.cc | 211 ++++++++++++++++++++++++++++++
> gcc/config/xtensa/xtensa.md | 16 +++
> gcc/config/xtensa/xtensa.opt | 2 +-
> 4 files changed, 230 insertions(+), 1 deletion(-)
Regtested for target=xtensa-linux-uclibc, no new regressions.
Changelog has extra 'gcc/' in paths, so I've dropped this part.
Committed to master.
--
Thanks.
-- Max
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3 4/5] xtensa: Add setmemsi insn pattern
2022-05-27 3:00 ` Takayuki 'January June' Suwa
2022-05-27 4:57 ` Max Filippov
@ 2022-05-27 9:23 ` Takayuki 'January June' Suwa
1 sibling, 0 replies; 6+ messages in thread
From: Takayuki 'January June' Suwa @ 2022-05-27 9:23 UTC (permalink / raw)
To: Max Filippov; +Cc: GCC Patches
On 2022/05/27 12:00, Takayuki 'January June' Suwa via Gcc-patches wrote:
> On 2022/05/27 1:57, Max Filippov wrote:
>> is that something that can be addressed in this patch?
>
> seems hard to resolve, because the RTL-generation pass passes only 68
> bytes in that case:
the culprit is here, but i don't know whether it is known regression or not.
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 7197996cec7..be100dd9946 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -6043,13 +6043,19 @@ store_expr (tree exp, rtx target, int call_param_p,
if (!can_store_by_pieces (str_copy_len, string_cst_read_str,
(void *) str, MEM_ALIGN (target), false))
goto normal_expr;
-
- dest_mem = store_by_pieces (target, str_copy_len,
string_cst_read_str,
- (void *) str, MEM_ALIGN (target), false,
- RETURN_END);
- clear_storage (adjust_address_1 (dest_mem, BLKmode, 0, 1, 1, 0,
- exp_len - str_copy_len),
- GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
+ if (TREE_STRING_LENGTH (str) == 1 && *TREE_STRING_POINTER (str) == 0)
+ clear_storage (adjust_address_1 (target, BLKmode, 0, 1, 1, 0,
+ exp_len),
+ GEN_INT (exp_len), BLOCK_OP_NORMAL);
+ else
+ {
+ dest_mem = store_by_pieces (target, str_copy_len, string_cst_read_str,
+ (void *) str, MEM_ALIGN (target), false,
+ RETURN_END);
+ clear_storage (adjust_address_1 (dest_mem, BLKmode, 0, 1, 1, 0,
+ exp_len - str_copy_len),
+ GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
+ }
return NULL_RTX;
}
else
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-05-27 9:23 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-23 15:52 [PATCH v3 4/5] xtensa: Add setmemsi insn pattern Takayuki 'January June' Suwa
2022-05-26 16:57 ` Max Filippov
2022-05-27 3:00 ` Takayuki 'January June' Suwa
2022-05-27 4:57 ` Max Filippov
2022-05-27 9:23 ` Takayuki 'January June' Suwa
2022-05-27 4:58 ` Max Filippov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).