From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 4EA283858C78; Mon, 29 May 2023 14:54:12 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4EA283858C78 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1685372052; bh=poUDmRNUnhVDKHWmxVDKqGDWNvQU4Z0QidT38CJXV5c=; h=From:To:Subject:Date:In-Reply-To:References:From; b=xIkkGCcE9Vg6JIYAVLG0ksPJxJti8p5pr4+bC3R31btJ8IJLFkT19nB3P02DkxyAu tqId7U6WbXRc8lZMTWVZVHyQN3Gbdc+sVRcVF7eGHHwKPmzS0Ej/ZiYM+4TbdGlyIK K7ON/FGsZjB9Vajh1z7AiEYQlU+d/DxJfwOAI4aQ= From: "klepikov.alex+bugs at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/49263] SH Target: underutilized "TST #imm, R0" instruction Date: Mon, 29 May 2023 14:54:11 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 4.6.1 X-Bugzilla-Keywords: X-Bugzilla-Severity: enhancement X-Bugzilla-Who: klepikov.alex+bugs at gmail dot com X-Bugzilla-Status: REOPENED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: olegendo at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D49263 --- Comment #45 from Alexander Klepikov --- >I have an idea. If it's impossible to defer initial optimization, > maybe it's possible to emit some intermediate insn and catch it and optim= ize > later? Good news. I've made a proof of concept. It works at least sometimes - on simple tests. $ cat f.c #define A 0xFFFF0000 #define P ((unsigned char *)A) #define F 64 #define S 8 unsigned char f_non_zero(unsigned char v){ return (v & F) !=3D 0; } unsigned f_sym_non_zero(void){ return (*P & F) !=3D 0; } unsigned f_sym_mask(void){ return (*P & F) =3D=3D F; } int f_rshift(char v){ return v >> S; } $ /usr/local/sh-toolchain/bin/sh-elf-gcc -O2 -mb -m2e -da -S f.c $ cat f.s .file "f.c" .text .text .align 1 .align 2 .global _f_non_zero .type _f_non_zero, @function _f_non_zero: mov r4,r0 sts.l pr,@-r15 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .size _f_non_zero, .-_f_non_zero .align 1 .align 2 .global _f_sym_non_zero .type _f_sym_non_zero, @function _f_sym_non_zero: mov.l .L6,r1 sts.l pr,@-r15 mov.b @r1,r0 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .L7: .align 2 .L6: .long -65536 .size _f_sym_non_zero, .-_f_sym_non_zero .align 1 .align 2 .global _f_sym_mask .type _f_sym_mask, @function _f_sym_mask: mov.l .L10,r1 sts.l pr,@-r15 mov.b @r1,r0 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .L11: .align 2 .L10: .long -65536 .size _f_sym_mask, .-_f_sym_mask .align 1 .align 2 .global _f_rshift .type _f_rshift, @function _f_rshift: mov.l .L14,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 lds.l @r15+,pr rts nop .L15: .align 2 .L14: .long ___ashiftrt_r4_8 .size _f_rshift, .-_f_rshift .ident "GCC: (GNU) 13.1.0" $ /usr/local/sh-toolchain/bin/sh-elf-gcc -O2 -ml -m2e -da -S f.c $ cat f.s .file "f.c" .text .little .text .align 1 .align 2 .global _f_non_zero .type _f_non_zero, @function _f_non_zero: mov r4,r0 sts.l pr,@-r15 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .size _f_non_zero, .-_f_non_zero .align 1 .align 2 .global _f_sym_non_zero .type _f_sym_non_zero, @function _f_sym_non_zero: mov.l .L6,r1 sts.l pr,@-r15 mov.b @r1,r0 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .L7: .align 2 .L6: .long -65536 .size _f_sym_non_zero, .-_f_sym_non_zero .align 1 .align 2 .global _f_sym_mask .type _f_sym_mask, @function _f_sym_mask: mov.l .L10,r1 sts.l pr,@-r15 mov.b @r1,r0 tst #64,r0 mov #-1,r0 negc r0,r0 lds.l @r15+,pr rts nop .L11: .align 2 .L10: .long -65536 .size _f_sym_mask, .-_f_sym_mask .align 1 .align 2 .global _f_rshift .type _f_rshift, @function _f_rshift: mov.l .L14,r1 sts.l pr,@-r15 jsr @r1 exts.b r4,r4 mov r4,r0 lds.l @r15+,pr rts nop .L15: .align 2 .L14: .long ___ashiftrt_r4_8 .size _f_rshift, .-_f_rshift .ident "GCC: (GNU) 13.1.0" Splitting takes place at split1 pass as expected. Here is the patch itself. $ cat gcc-13.1.0-ashrsi3_libcall.patch diff -ur gcc-13.1.0.orig/gcc/config/sh/sh-protos.h gcc-13.1.0/gcc/config/sh/sh-protos.h --- gcc-13.1.0.orig/gcc/config/sh/sh-protos.h 2023-04-26 10:09:39.0000000= 00 +0300 +++ gcc-13.1.0/gcc/config/sh/sh-protos.h 2023-05-29 11:45:05.1347234= 35 +0300 @@ -78,6 +78,7 @@ extern void gen_shifty_op (int, rtx *); extern void gen_shifty_hi_op (int, rtx *); extern bool expand_ashiftrt (rtx *); +extern bool expand_ashrsi3_libcall (rtx *);//delete extern bool sh_dynamicalize_shift_p (rtx); extern int shl_and_kind (rtx, rtx, int *); extern int shl_and_length (rtx); diff -ur gcc-13.1.0.orig/gcc/config/sh/sh.cc gcc-13.1.0/gcc/config/sh/sh.cc --- gcc-13.1.0.orig/gcc/config/sh/sh.cc 2023-04-26 10:09:39.000000000 +0300 +++ gcc-13.1.0/gcc/config/sh/sh.cc 2023-05-29 17:09:54.602787537 +0300 @@ -3875,11 +3877,37 @@ wrk =3D gen_reg_rtx (Pmode); /* Load the value into an arg reg and call a helper. */ - emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + /*emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); sprintf (func, "__ashiftrt_r4_%d", value); rtx lab =3D function_symbol (wrk, func, SFUNC_STATIC).lab; emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab)); emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); + return true;*/ + + if (dump_file) + fprintf(dump_file, "ashrsi3: Emitting collapsed libcall\n"); + emit_insn (gen_ashrsi3_libcall_collapsed (operands[0], operands[1], GEN_INT(value)));//delete + return true;//delete +} + +//delete +bool +expand_ashrsi3_libcall (rtx *operands) { + char func[18]; + + if (dump_file) + fprintf(dump_file, "ashrsi3_libcall_collapsed: Expanding ashrsi3 libcall\n"); + + rtx wrk =3D gen_reg_rtx (Pmode); + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + + sprintf (func, "__ashiftrt_r4_%d", INTVAL (operands[2])); + + rtx lab =3D function_symbol (wrk, func, SFUNC_STATIC).lab; + + emit_insn (gen_ashrsi3_n (operands[2], wrk, lab)); + emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); + return true; } diff -ur gcc-13.1.0.orig/gcc/config/sh/sh.md gcc-13.1.0/gcc/config/sh/sh.md --- gcc-13.1.0.orig/gcc/config/sh/sh.md 2023-04-26 10:09:39.000000000 +0300 +++ gcc-13.1.0/gcc/config/sh/sh.md 2023-05-29 17:10:42.752779922 +0300 @@ -3867,6 +3867,35 @@ [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) +(define_insn "ashrsi3_libcall_collapsed" + [(set (match_operand:SI 0 "arith_reg_dest" "=3Dr") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand"))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1" + "OOPS" + [(set_attr "type" "dyn_shift") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn_and_split "ashrsi3_libcall_expand" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + )(clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + ])] + "TARGET_SH1" + "OOPS_1" + "&& 1" + [(const_int 0)] +{ + if (expand_ashrsi3_libcall(operands)) + DONE; + else + FAIL; +}) + ;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .= . ;; DImode arithmetic shift right I did it by feel, actually picking up the parameters until it worked. So pl= ease check it and improve it because I'm sure it will break something. Thank you= .=