public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "rguenth at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/114252] Introducing bswapsi reduces code performance
Date: Thu, 07 Mar 2024 09:42:07 +0000	[thread overview]
Message-ID: <bug-114252-4-B2tMkR1AHB@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-114252-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114252

--- Comment #11 from Richard Biener <rguenth at gcc dot gnu.org> ---
diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc
index 42b68abf61b..c9d4662656f 100644
--- a/gcc/gimple-ssa-store-merging.cc
+++ b/gcc/gimple-ssa-store-merging.cc
@@ -170,6 +170,7 @@
 #include "optabs-tree.h"
 #include "dbgcnt.h"
 #include "selftest.h"
+#include "regs.h"

 /* The maximum size (in bits) of the stores this pass should generate.  */
 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
@@ -1484,7 +1485,8 @@ maybe_optimize_vector_constructor (gimple *cur_stmt)
       break;
     case 32:
       if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
-         && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+         && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+         && have_regs_of_mode[SImode])
        {
          load_type = uint32_type_node;
          fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
@@ -1545,7 +1547,8 @@ pass_optimize_bswap::execute (function *fun)
   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;

   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
-              && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+              && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
+              && have_regs_of_mode[SImode]);
   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
               && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
                   || (bswap32_p && word_mode == SImode)));


doesn't work.  AVR has regs of SImode.  There doesn't seem to be a way to
query the (maximum?) number of hardregs used for a mode.  Using

  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
               && have_regs_of_mode[SImode]
               && hard_regno_nregs (0, SImode) == 1);

"works" but is surely wrong (whatever hardreg zero corresponds to).
Looking only at word_mode, requiring SImode size >= word_mode size like with

  bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
               && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing
               && known_ge (GET_MODE_SIZE (word_mode), GET_MODE_SIZE
(SImode)));

"works" but would affect many more targets.  Maybe && word_mode != QImode
is better.

Note that this will cut off _all_ bswap detection.  Thus my question on
profitability of detecting cases like those in libgcc2.c which then produces

__bswapsi2:
        push r12
        push r13
        push r14
        push r15
        push r16
        push r17
/* prologue: function */
/* frame size = 0 */
/* stack size = 6 */
.L__stack_usage = 6
        mov r16,r22
        mov r17,r23
        mov r18,r24
        mov r19,r25
        mov r22,r19
        clr r23
        clr r24
        clr r25
        mov r15,r16
        clr r14
        clr r13
        clr r12
        or r22,r12
        or r23,r13
        or r24,r14
        or r25,r15
        mov r12,r17
        mov r13,r18
        mov r14,r19
        clr r15
        clr r12
        clr r14
        clr r15
        or r22,r12
        or r23,r13
        or r24,r14
        or r25,r15
        mov r19,r18
        mov r18,r17
        mov r17,r16
        clr r16
        clr r16
        clr r17
        clr r19
        or r22,r16
        or r23,r17
        or r24,r18
        or r25,r19
/* epilogue start */
        pop r17
        pop r16
        pop r15
        pop r14
        pop r13
        pop r12
        ret

then.

bswap detection does not try to do any sophisticated evaluation of costs.

  parent reply	other threads:[~2024-03-07  9:42 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-06 10:01 [Bug tree-optimization/114252] New: " gjl at gcc dot gnu.org
2024-03-06 11:55 ` [Bug target/114252] " rguenth at gcc dot gnu.org
2024-03-06 11:59 ` rguenth at gcc dot gnu.org
2024-03-06 12:15 ` gjl at gcc dot gnu.org
2024-03-06 12:37 ` rguenth at gcc dot gnu.org
2024-03-06 16:12 ` gjl at gcc dot gnu.org
2024-03-06 17:18 ` rguenther at suse dot de
2024-03-07  7:45 ` rguenth at gcc dot gnu.org
2024-03-07  8:45 ` gjl at gcc dot gnu.org
2024-03-07  9:05 ` gjl at gcc dot gnu.org
2024-03-07  9:14 ` rguenth at gcc dot gnu.org
2024-03-07  9:42 ` rguenth at gcc dot gnu.org [this message]
2024-03-07 10:47 ` gjl at gcc dot gnu.org
2024-03-07 10:56 ` rguenth at gcc dot gnu.org
2024-03-07 14:12 ` gjl at gcc dot gnu.org
2024-03-07 15:02 ` pinskia at gcc dot gnu.org
2024-03-07 17:52 ` rguenther at suse dot de

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-114252-4-B2tMkR1AHB@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).