[Bug target/54236] [SH] Improve addc and subc insn utilization

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

From: "olegendo at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/54236] [SH] Improve addc and subc insn utilization
Date: Sun, 03 Nov 2013 17:50:00 -0000	[thread overview]
Message-ID: <bug-54236-4-tNKFocnPY2@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-54236-4@http.gcc.gnu.org/bugzilla/>

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54236

--- Comment #6 from Oleg Endo <olegendo at gcc dot gnu.org> ---
Created attachment 31144
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=31144&action=edit
stitching addc insns

The attached patch is an example that shows how widening additions can be
stitched together.  One application would be arithmetic on user defined integer
types with an arbitrary number of bits.
For example (requires c++11):

template <unsigned int Bits> class integer
{
public:
  typedef unsigned int word_type;
  typedef unsigned long long ext_word_type;

  static constexpr unsigned int bit_count = Bits;
  static constexpr unsigned int word_bit_count = sizeof (word_type) * 8;
  static constexpr unsigned int word_count = (bit_count + word_bit_count - 1)
                                              / word_bit_count;

private:
  word_type word[word_count];

public:
  friend integer
  operator + (const integer& a, const integer& b)
  {
    integer result;

    word_type carry = 0;
    for (unsigned int i = 0; i < word_count; ++i)
    {
      auto sum = (ext_word_type)a.word[i] + (ext_word_type)b.word[i] + carry;
      result.word[i] = (word_type)sum;
      carry = (sum >> word_bit_count) == 0 ? 0 : 1;
    }

    return result;
  }
};

With this patch the following examples, compiled with -funroll-all-loops -O2
demonstrate the effect:

--------------------------

integer<64> test_02 (const integer<64>& a, const integer<64>& b)
{
  return a + b;
}

        mov.l   @r5,r1
        mov.l   @r4,r0
        clrt
        mov.l   @(4,r5),r2
        addc    r1,r0
        mov.l   @(4,r4),r1
        rts
        addc    r2,r1

this is the same as a 'native' 64 bit addition.

--------------------------

integer<80> test_03 (const integer<80>& a, const integer<80>& b)
{
  return a + b;
}

        mov.l   @r5,r3
        mov.l   @r4,r1
        clrt
        mov.l   @(4,r5),r0
        mov.l   @(4,r4),r6
        addc    r3,r1
        mov.l   @(8,r5),r5
        mov.l   @(8,r4),r4
        addc    r0,r6
        mov.l   r1,@r2
        mov     r2,r0
        addc    r5,r4
        mov.l   r6,@(4,r2)
        rts
        mov.l   r4,@(8,r2)

80 bits are rounded up to 96 in the template 'integer', thus 3 addc insns are
required to do the 96 bit addition.

However, when compiling without loop unrolling, it doesn't work because there
is no mechanism to feed back the carry variable inside a loop.  The test_03
function becomes:

        mov.l   r8,@-r15
        mov.l   r9,@-r15
        mov.l   r10,@-r15
        mov.l   r11,@-r15
        mov     #0,r0
        mov     #0,r10
.L4:
        mov.l   @(r0,r4),r3
        clrt
        mov.l   @(r0,r5),r1
        mov     r10,r7     // r7 = carry from previous iteration
        mov     #0,r6
        mov     r1,r9
        addc    r3,r9      // r9 = a[i] + b[i] (lsw of 64 bit result)
        movt    r10        // r10 = carry (msw of 64 bit result)
        clrt
        mov     r9,r11
        addc    r7,r11     // r11 = r9 + previous carry (lsw of 64 bit result)
        addc    r6,r10     // r10 = carry for next iteration
        mov.l   r11,@(r0,r2)
        add     #4,r0
        cmp/eq  #12,r0
        bf      .L4

        mov.l   @r15+,r11
        mov     r2,r0
        mov.l   @r15+,r10
        mov.l   @r15+,r9
        rts
        mov.l   @r15+,r8


it could be something like this:

        mov     #0,r0
        mov     #0,r6
.L4:
        mov.l   @(r0,r4),r3
        mov.l   @(r0,r5),r1
        cmp/pl  r6         // T = r6 > 0 (get carry into T bit)
        addc    r3,r1      // r1 = a[i] + b[i] + T (lsw of 64 bit result)
        movt    r6         // r6 = new carry (msw of 64 bit result)
        mov.l   r1,@(r0,r2)
        add     #4,r0
        cmp/eq  #12,r0
        bf      .L4

        rts
        mov     r2,r0

However, that would require some loop analysis in order to discover the T bit
feedback opportunity.

next prev parent reply	other threads:[~2013-11-03 17:50 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-12 22:25 [Bug target/54236] New: " olegendo at gcc dot gnu.org
2012-08-12 22:35 ` [Bug target/54236] " olegendo at gcc dot gnu.org
2012-08-16 23:20 ` olegendo at gcc dot gnu.org
2012-09-19 17:46 ` olegendo at gcc dot gnu.org
2013-09-22 22:22 ` olegendo at gcc dot gnu.org
2013-10-29 20:46 ` olegendo at gcc dot gnu.org
2013-11-02 10:20 ` olegendo at gcc dot gnu.org
2013-11-03 17:50 ` olegendo at gcc dot gnu.org [this message]
2013-11-03 20:44 ` olegendo at gcc dot gnu.org
2014-05-21  8:06 ` olegendo at gcc dot gnu.org
2014-09-27 17:42 ` olegendo at gcc dot gnu.org
2015-01-24 13:05 ` olegendo at gcc dot gnu.org
2015-01-25 16:42 ` olegendo at gcc dot gnu.org
2015-05-17 21:02 ` olegendo at gcc dot gnu.org
2015-05-19  8:01 ` olegendo at gcc dot gnu.org
2015-05-21 12:37 ` olegendo at gcc dot gnu.org
2015-07-19  5:32 ` olegendo at gcc dot gnu.org
2015-07-19  6:47 ` olegendo at gcc dot gnu.org
2015-09-28 14:01 ` olegendo at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-54236-4-tNKFocnPY2@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).