public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "klepikov.alex+bugs at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/49263] SH Target: underutilized "TST #imm, R0" instruction
Date: Mon, 29 May 2023 14:54:11 +0000	[thread overview]
Message-ID: <bug-49263-4-jEXnm1aPyX@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-49263-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49263

--- Comment #45 from Alexander Klepikov <klepikov.alex+bugs at gmail dot com> ---
>I have an idea. If it's impossible to defer initial optimization,
> maybe it's possible to emit some intermediate insn and catch it and optimize
> later?


Good news. I've made a proof of concept. It works at least sometimes - on
simple tests.

$ cat f.c
#define A 0xFFFF0000
#define P ((unsigned char *)A)
#define F 64
#define S 8

unsigned char f_non_zero(unsigned char v){
    return (v & F) != 0;
}

unsigned f_sym_non_zero(void){
    return (*P & F) != 0;
}

unsigned f_sym_mask(void){
    return (*P & F) == F;
}

int f_rshift(char v){
    return v >> S;
}

$ /usr/local/sh-toolchain/bin/sh-elf-gcc -O2 -mb -m2e -da -S f.c

$ cat f.s
        .file   "f.c"
        .text
        .text
        .align 1
        .align 2
        .global _f_non_zero
        .type   _f_non_zero, @function
_f_non_zero:
        mov     r4,r0
        sts.l   pr,@-r15
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
        .size   _f_non_zero, .-_f_non_zero
        .align 1
        .align 2
        .global _f_sym_non_zero
        .type   _f_sym_non_zero, @function
_f_sym_non_zero:
        mov.l   .L6,r1
        sts.l   pr,@-r15
        mov.b   @r1,r0
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
.L7:
        .align 2
.L6:
        .long   -65536
        .size   _f_sym_non_zero, .-_f_sym_non_zero
        .align 1
        .align 2
        .global _f_sym_mask
        .type   _f_sym_mask, @function
_f_sym_mask:
        mov.l   .L10,r1
        sts.l   pr,@-r15
        mov.b   @r1,r0
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
.L11:
        .align 2
.L10:
        .long   -65536
        .size   _f_sym_mask, .-_f_sym_mask
        .align 1
        .align 2
        .global _f_rshift
        .type   _f_rshift, @function
_f_rshift:
        mov.l   .L14,r1
        sts.l   pr,@-r15
        jsr     @r1
        exts.b  r4,r4
        mov     r4,r0
        lds.l   @r15+,pr
        rts
        nop
.L15:
        .align 2
.L14:
        .long   ___ashiftrt_r4_8
        .size   _f_rshift, .-_f_rshift
        .ident  "GCC: (GNU) 13.1.0"

$ /usr/local/sh-toolchain/bin/sh-elf-gcc -O2 -ml -m2e -da -S f.c

$ cat f.s
        .file   "f.c"
        .text
        .little
        .text
        .align 1
        .align 2
        .global _f_non_zero
        .type   _f_non_zero, @function
_f_non_zero:
        mov     r4,r0
        sts.l   pr,@-r15
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
        .size   _f_non_zero, .-_f_non_zero
        .align 1
        .align 2
        .global _f_sym_non_zero
        .type   _f_sym_non_zero, @function
_f_sym_non_zero:
        mov.l   .L6,r1
        sts.l   pr,@-r15
        mov.b   @r1,r0
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
.L7:
        .align 2
.L6:
        .long   -65536
        .size   _f_sym_non_zero, .-_f_sym_non_zero
        .align 1
        .align 2
        .global _f_sym_mask
        .type   _f_sym_mask, @function
_f_sym_mask:
        mov.l   .L10,r1
        sts.l   pr,@-r15
        mov.b   @r1,r0
        tst     #64,r0
        mov     #-1,r0
        negc    r0,r0
        lds.l   @r15+,pr
        rts
        nop
.L11:
        .align 2
.L10:
        .long   -65536
        .size   _f_sym_mask, .-_f_sym_mask
        .align 1
        .align 2
        .global _f_rshift
        .type   _f_rshift, @function
_f_rshift:
        mov.l   .L14,r1
        sts.l   pr,@-r15
        jsr     @r1
        exts.b  r4,r4
        mov     r4,r0
        lds.l   @r15+,pr
        rts
        nop
.L15:
        .align 2
.L14:
        .long   ___ashiftrt_r4_8
        .size   _f_rshift, .-_f_rshift
        .ident  "GCC: (GNU) 13.1.0"

Splitting takes place at split1 pass as expected. Here is the patch itself.

$ cat gcc-13.1.0-ashrsi3_libcall.patch
diff -ur gcc-13.1.0.orig/gcc/config/sh/sh-protos.h
gcc-13.1.0/gcc/config/sh/sh-protos.h
--- gcc-13.1.0.orig/gcc/config/sh/sh-protos.h   2023-04-26 10:09:39.000000000
+0300
+++ gcc-13.1.0/gcc/config/sh/sh-protos.h        2023-05-29 11:45:05.134723435
+0300
@@ -78,6 +78,7 @@
 extern void gen_shifty_op (int, rtx *);
 extern void gen_shifty_hi_op (int, rtx *);
 extern bool expand_ashiftrt (rtx *);
+extern bool expand_ashrsi3_libcall (rtx *);//delete
 extern bool sh_dynamicalize_shift_p (rtx);
 extern int shl_and_kind (rtx, rtx, int *);
 extern int shl_and_length (rtx);
diff -ur gcc-13.1.0.orig/gcc/config/sh/sh.cc gcc-13.1.0/gcc/config/sh/sh.cc
--- gcc-13.1.0.orig/gcc/config/sh/sh.cc 2023-04-26 10:09:39.000000000 +0300
+++ gcc-13.1.0/gcc/config/sh/sh.cc      2023-05-29 17:09:54.602787537 +0300
@@ -3875,11 +3877,37 @@
   wrk = gen_reg_rtx (Pmode);

   /* Load the value into an arg reg and call a helper.  */
-  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  /*emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
   sprintf (func, "__ashiftrt_r4_%d", value);
   rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
   emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
   emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
+  return true;*/
+
+  if (dump_file)
+    fprintf(dump_file, "ashrsi3: Emitting collapsed libcall\n");
+  emit_insn (gen_ashrsi3_libcall_collapsed (operands[0], operands[1],
GEN_INT(value)));//delete
+  return true;//delete
+}
+
+//delete
+bool
+expand_ashrsi3_libcall (rtx *operands) {
+  char func[18];
+
+  if (dump_file)
+    fprintf(dump_file, "ashrsi3_libcall_collapsed: Expanding ashrsi3
libcall\n");
+
+  rtx wrk = gen_reg_rtx (Pmode);
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+
+  sprintf (func, "__ashiftrt_r4_%d", INTVAL (operands[2]));
+
+  rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
+
+  emit_insn (gen_ashrsi3_n (operands[2], wrk, lab));
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
+
   return true;
 }

diff -ur gcc-13.1.0.orig/gcc/config/sh/sh.md gcc-13.1.0/gcc/config/sh/sh.md
--- gcc-13.1.0.orig/gcc/config/sh/sh.md 2023-04-26 10:09:39.000000000 +0300
+++ gcc-13.1.0/gcc/config/sh/sh.md      2023-05-29 17:10:42.752779922 +0300
@@ -3867,6 +3867,35 @@
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])

+(define_insn "ashrsi3_libcall_collapsed"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+       (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+                    (match_operand:SI 2 "const_int_operand")))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+  "OOPS"
+  [(set_attr "type" "dyn_shift")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn_and_split "ashrsi3_libcall_expand"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+       (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+           (match_operand:SI 2 "const_int_operand"))
+       )(clobber (reg:SI T_REG))
+       (clobber (reg:SI PR_REG))
+  ])]
+  "TARGET_SH1"
+  "OOPS_1"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (expand_ashrsi3_libcall(operands))
+    DONE;
+  else
+    FAIL;
+})
+
 ;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 ;; DImode arithmetic shift right

I did it by feel, actually picking up the parameters until it worked. So please
check it and improve it because I'm sure it will break something. Thank you.

  parent reply	other threads:[~2023-05-29 14:54 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-01 20:17 [Bug target/49263] New: " oleg.endo@t-online.de
2011-06-01 20:42 ` [Bug target/49263] " oleg.endo@t-online.de
2011-06-12 23:12 ` kkojima at gcc dot gnu.org
2011-06-19 16:42 ` oleg.endo@t-online.de
2011-06-22 22:34 ` kkojima at gcc dot gnu.org
2011-06-26 22:31 ` oleg.endo@t-online.de
2011-06-27  5:15 ` kkojima at gcc dot gnu.org
2011-10-09 23:35 ` oleg.endo@t-online.de
2011-10-10  1:32 ` kkojima at gcc dot gnu.org
2011-10-10 23:48 ` oleg.endo@t-online.de
2011-10-11  1:47 ` kkojima at gcc dot gnu.org
2011-10-13 22:55 ` oleg.endo@t-online.de
2011-10-14 23:06 ` kkojima at gcc dot gnu.org
2011-10-15  2:33 ` kkojima at gcc dot gnu.org
2011-11-20 14:20 ` oleg.endo@t-online.de
2011-12-29  1:09 ` oleg.endo@t-online.de
2012-02-26 16:28 ` olegendo at gcc dot gnu.org
2012-02-26 23:29 ` olegendo at gcc dot gnu.org
2012-08-27 19:52 ` olegendo at gcc dot gnu.org
2012-10-28 22:02 ` olegendo at gcc dot gnu.org
2012-10-31 13:47 ` olegendo at gcc dot gnu.org
2013-12-08 13:47 ` olegendo at gcc dot gnu.org
2013-12-17 12:37 ` olegendo at gcc dot gnu.org
2014-12-30 18:45 ` olegendo at gcc dot gnu.org
2015-01-24 13:05 ` olegendo at gcc dot gnu.org
2015-01-26 23:57 ` olegendo at gcc dot gnu.org
2023-05-12 11:46 ` klepikov.alex+bugs at gmail dot com
2023-05-23 12:34 ` klepikov.alex+bugs at gmail dot com
2023-05-23 12:35 ` klepikov.alex+bugs at gmail dot com
2023-05-23 19:05 ` olegendo at gcc dot gnu.org
2023-05-24 11:40 ` klepikov.alex+bugs at gmail dot com
2023-05-24 11:57 ` olegendo at gcc dot gnu.org
2023-05-24 13:34 ` klepikov.alex+bugs at gmail dot com
2023-05-24 15:00 ` olegendo at gcc dot gnu.org
2023-05-25 17:53 ` klepikov.alex+bugs at gmail dot com
2023-05-25 21:32 ` olegendo at gcc dot gnu.org
2023-05-26 12:03 ` klepikov.alex+bugs at gmail dot com
2023-05-26 17:44 ` olegendo at gcc dot gnu.org
2023-05-28 10:24 ` klepikov.alex+bugs at gmail dot com
2023-05-28 10:48 ` olegendo at gcc dot gnu.org
2023-05-29 14:54 ` klepikov.alex+bugs at gmail dot com [this message]
2023-05-30  1:48 ` egallager at gcc dot gnu.org
2023-05-30  1:56 ` olegendo at gcc dot gnu.org
2023-05-30 12:42 ` klepikov.alex+bugs at gmail dot com
2023-05-30 19:57 ` olegendo at gcc dot gnu.org
2023-05-30 20:00 ` olegendo at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-49263-4-jEXnm1aPyX@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).