public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/102117] New: s390: Inefficient code for 64x64=128 signed multiply for <= z13
@ 2021-08-29 12:30 jens.seifert at de dot ibm.com
  2021-08-29 12:49 ` [Bug target/102117] " jens.seifert at de dot ibm.com
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: jens.seifert at de dot ibm.com @ 2021-08-29 12:30 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102117

            Bug ID: 102117
           Summary: s390: Inefficient code for 64x64=128 signed multiply
                    for <= z13
           Product: gcc
           Version: 8.3.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jens.seifert at de dot ibm.com
  Target Milestone: ---

__int128 imul128(long long a, long long b)
{
   return (__int128)a * (__int128)b;
}

creates sequence with 3 multiplies:

_Z7imul128xx:
.LFB0:
        .cfi_startproc
        ldgr    %f2,%r12
        .cfi_register 12, 17
        ldgr    %f0,%r13
        .cfi_register 13, 16
        lgr     %r13,%r3
        mlgr    %r12,%r4
        srag    %r1,%r3,63
        msgr    %r1,%r4
        srag    %r4,%r4,63
        msgr    %r4,%r3
        agr     %r4,%r1
        agr     %r12,%r4
        stmg    %r12,%r13,0(%r2)
        lgdr    %r13,%f0
        .cfi_restore 13
        lgdr    %r12,%f2
        .cfi_restore 12
        br      %r14
        .cfi_endproc


The following sequence only requires 1 multiply:

__int128 imul128_opt(long long a, long long b)
{
   unsigned __int128 x = (unsigned __int128)(unsigned long long)a;
   unsigned __int128 y = (unsigned __int128)(unsigned long long)b;
   unsigned long long t1 = (a >> 63) & a;
   unsigned long long t2 = (b >> 63) & b;
   unsigned __int128 u128 = x * y;
   unsigned long long hi = (u128 >> 64) - (t1 + t2);
   unsigned long long lo = (unsigned long long)u128;
   unsigned __int128 res = hi;
   res <<= 64;
   res |= lo;
   return (__int128)res;
}

_Z11imul128_optxx:
.LFB1:
        .cfi_startproc
        ldgr    %f2,%r12
        .cfi_register 12, 17
        ldgr    %f0,%r13
        .cfi_register 13, 16
        lgr     %r13,%r3
        mlgr    %r12,%r4
        lgr     %r1,%r3
        srag    %r3,%r3,63
        ngr     %r3,%r1
        srag    %r1,%r4,63
        ngr     %r4,%r1
        agr     %r3,%r4
        sgrk    %r3,%r12,%r3
        stg     %r13,8(%r2)
        lgdr    %r12,%f2
        .cfi_restore 12
        lgdr    %r13,%f0
        .cfi_restore 13
        stg     %r3,0(%r2)
        br      %r14
        .cfi_endproc

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-11-25 19:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-29 12:30 [Bug target/102117] New: s390: Inefficient code for 64x64=128 signed multiply for <= z13 jens.seifert at de dot ibm.com
2021-08-29 12:49 ` [Bug target/102117] " jens.seifert at de dot ibm.com
2021-11-20 13:16 ` roger at nextmovesoftware dot com
2021-11-21 11:41 ` cvs-commit at gcc dot gnu.org
2021-11-25 19:17 ` roger at nextmovesoftware dot com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).