From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 247B23858D37; Tue, 23 May 2023 12:34:49 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 247B23858D37 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1684845289; bh=b2L3mChWNSzNT4t9Q3xki+Jd1FWW4qsx/xigUubQgSc=; h=From:To:Subject:Date:In-Reply-To:References:From; b=ZfCbGevMDAXh3tW29H1214cuWfSod1xpBIlZgzTdXZrAUwLNDIOYMk2L32T9hgKFw DiCYEVsdKM+T/PjpdTMhcNQZ+apZ0JTSu9k5zr2tYty3/phLvQzgy+kAt5BRhpcmXx TpoFOlAHw3tcbaVsen3PTUHgdzSFmAXwsmgFk8EM= From: "klepikov.alex+bugs at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/49263] SH Target: underutilized "TST #imm, R0" instruction Date: Tue, 23 May 2023 12:34:45 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 4.6.1 X-Bugzilla-Keywords: X-Bugzilla-Severity: enhancement X-Bugzilla-Who: klepikov.alex+bugs at gmail dot com X-Bugzilla-Status: REOPENED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: olegendo at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D49263 --- Comment #32 from Alexander Klepikov --- I'm not sure whether I should write here or open new discussion, but these topics are related very closely. I've been writing a patch to eliminate the generation of dynamic shift instructions 'shad' and 'shld' completely at le= ast for SH4 CPU. And then I get a surprising result - in all the examples I gave earlier, library call converted to 'tst' instructions! Here is the patch itself (I also will attach a file): --- ../gcc-12.3.0.orig/gcc/config/sh/sh.cc 2023-05-08 15:14:39.6811616= 95 +0300 +++ ./gcc/config/sh/sh.cc 2023-05-23 12:23:25.964375731 +0300 @@ -3061,7 +3061,7 @@ else insn_count =3D ashl_lshr_seq[shift_amount_i].insn_count; - return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST); + return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST) && ! disable_dynshift; } /* Assuming we have a value that has been sign-extended by at least one bi= t, @@ -3812,8 +3812,10 @@ rtx wrk; char func[18]; int value; + int long_shift =3D disable_dynshift ? 30 : 19; + int short_shift =3D disable_dynshift ? 15 : 5; - if (TARGET_DYNSHIFT) + if (TARGET_DYNSHIFT && ! disable_dynshift) { if (!CONST_INT_P (operands[2])) { @@ -3851,7 +3853,7 @@ emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); return true; } - else if (value >=3D 16 && value <=3D 19) + else if (value >=3D 16 && value <=3D long_shift) { wrk =3D gen_reg_rtx (SImode); emit_insn (gen_ashrsi2_16 (wrk, operands[1])); @@ -3862,7 +3864,7 @@ return true; } /* Expand a short sequence inline, longer call a magic routine. */ - else if (value <=3D 5) + else if (value <=3D short_shift) { wrk =3D gen_reg_rtx (SImode); emit_move_insn (wrk, operands[1]); diff -ur ../gcc-12.3.0.orig/gcc/config/sh/sh.opt ./gcc/config/sh/sh.opt --- ../gcc-12.3.0.orig/gcc/config/sh/sh.opt 2023-05-08 15:14:39.6891618= 10 +0300 +++ ./gcc/config/sh/sh.opt 2023-05-23 10:45:36.814371159 +0300 @@ -301,3 +301,7 @@ mlra Target Var(sh_lra_flag) Init(0) Save Use LRA instead of reload (transitional). + +mdisable-dynshift +Target Var(disable_dynshift) Init(0) +Disable dynamic shift 'shad' and 'shld' instructions And here are my tests: $ cat f.c #define ADDR 0xFFFF0000 #define P ((unsigned char *)ADDR) #define FLAG 0x40 #define S 7 unsigned char f(char v){ return (v & FLAG) =3D=3D FLAG; } unsigned char f_(unsigned char v){ return (v & FLAG) =3D=3D FLAG; } unsigned char f1(void){ return (*P & FLAG) =3D=3D FLAG; } int f_signed_rshift(int v){ return v >> S; } int f_signed_lshift(int v){ return v << S; } unsigned int f_unsigned_rshift(unsigned int v){ return v >> S; } unsigned int f_unsigned_lshift(unsigned int v){ return v << S; } $ /usr/local/sh-toolchain/bin/sh-elf-gcc -c -mrenesas -m2e -mb -O -fno-toplevel-reorder -mdisable-dynshift -S f.c $ cat f.s .file "f.c" .text .text .align 1 .global _f .type _f, @function _f: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f, .-_f .align 1 .global _f_ .type _f_, @function _f_: mov r4,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .size _f_, .-_f_ .align 1 .global _f1 .type _f1, @function _f1: mov.l .L4,r1 mov.b @r1,r0 tst #64,r0 mov #-1,r0 rts negc r0,r0 .L5: .align 2 .L4: .long -65536 .size _f1, .-_f1 .align 1 .global _f_signed_rshift .type _f_signed_rshift, @function _f_signed_rshift: mov r4,r0 shar r0 shar r0 shar r0 shar r0 shar r0 shar r0 rts shar r0 .size _f_signed_rshift, .-_f_signed_rshift .align 1 .global _f_signed_lshift .type _f_signed_lshift, @function _f_signed_lshift: mov r4,r0 shll2 r0 shll2 r0 add r0,r0 rts shll2 r0 .size _f_signed_lshift, .-_f_signed_lshift .align 1 .global _f_unsigned_rshift .type _f_unsigned_rshift, @function _f_unsigned_rshift: mov r4,r0 shlr2 r0 shlr2 r0 shlr r0 rts shlr2 r0 .size _f_unsigned_rshift, .-_f_unsigned_rshift .align 1 .global _f_unsigned_lshift .type _f_unsigned_lshift, @function _f_unsigned_lshift: mov r4,r0 shll2 r0 shll2 r0 add r0,r0 rts shll2 r0 .size _f_unsigned_lshift, .-_f_unsigned_lshift .ident "GCC: (GNU) 12.3.0" I also compiled my project with '-m2e' and new '-mdisable-dynshift' options= and tested it in SH-2E mone on Renesas's emulator that comes with High-performa= nce Embedded Workshop and all unit tests run as expected. If this patch is useful let's include it in GCC.=