public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/62178] New: [AArch64] Performance regression on matrix matrix multiply due to r211211
@ 2014-08-18 22:47 spop at gcc dot gnu.org
  2014-08-18 22:58 ` [Bug target/62178] " pinskia at gcc dot gnu.org
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: spop at gcc dot gnu.org @ 2014-08-18 22:47 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62178

            Bug ID: 62178
           Summary: [AArch64] Performance regression on matrix matrix
                    multiply due to r211211
           Product: gcc
           Version: 5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: spop at gcc dot gnu.org

int a[30 +1][30 +1], b[30 +1][30 +1], r[30 +1][30 +1];

void Intmm (int run) {
  int i, j, k;

  for ( i = 1; i <= 30; i++ )
    for ( j = 1; j <= 30; j++ ) {
      r[i][j] = 0;
      for(k = 1; k <= 30; k++ )
        r[i][j] += a[i][k]*b[k][j];
    }
}

compile this at -O3 with the last good compiler r211210 and with the first bad
compiler at r211211, then diff the assembly:

--- good.s    2014-08-18 17:44:26.179506000 -0500
+++ bad.s    2014-08-18 17:44:26.213807000 -0500
@@ -6,45 +6,44 @@
     .type    Intmm, %function
 Intmm:
     movi    v3.2s, 0
-    adrp    x6, a+128
-    adrp    x8, r+128
-    adrp    x10, r+3848
-    adrp    x9, b+128
-    adrp    x7, b+248
-    add    x6, x6, :lo12:a+128
-    add    x8, x8, :lo12:r+128
-    add    x10, x10, :lo12:r+3848
-    add    x9, x9, :lo12:b+128
-    add    x7, x7, :lo12:b+248
+    adrp    x6, r+128
+    adrp    x4, a+124
+    adrp    x8, r+3848
+    adrp    x7, b
+    add    x6, x6, :lo12:r+128
+    add    x4, x4, :lo12:a+124
+    add    x8, x8, :lo12:r+3848
+    add    x7, x7, :lo12:b
 .L2:
-    mov    x5, x8
-    mov    x4, x8
-    mov    x3, x9
+    mov    x5, 0
 .L4:
-    str    d3, [x4]
-    add    x2, x3, 3720
-    movi    v0.2s, 0
-    mov    x1, x6
-    mov    x0, x3
+    str    d3, [x6, x5]
+    add    x3, x5, 128
+    movi    v1.2s, 0
+    add    x3, x3, x7
+    mov    x0, 0
 .L3:
-    ldr    d1, [x0]
-    add    x0, x0, 124
-    ld1r    {v2.2s}, [x1], 4
-    cmp    x0, x2
-    mla    v0.2s, v2.2s, v1.2s
+    add    x1, x4, x0
+    lsl    x2, x0, 5
+    sub    x2, x2, x0
+    add    x0, x0, 4
+    cmp    x0, 120
+    ldr    w1, [x1, 4]
+    ldr    d2, [x3, x2]
+    dup    v0.2s, w1
+    mla    v1.2s, v0.2s, v2.2s
     bne    .L3
-    str    d0, [x5], 8
-    add    x3, x3, 8
-    cmp    x3, x7
-    add    x4, x4, 8
+    str    d1, [x6, x5]
+    add    x5, x5, 8
+    cmp    x5, 120
     bne    .L4
-    add    x8, x8, 124
     add    x6, x6, 124
-    cmp    x8, x10
+    add    x4, x4, 124
+    cmp    x6, x8
     bne    .L2
     ret
     .size    Intmm, .-Intmm
     .comm    r,3844,8
     .comm    b,3844,8
     .comm    a,3844,8

Remark that the innermost loop .L3 contains 5 more instructions with the bad
compiler, due to more scalar computations for the addressing modes:

 .L3:
-    ldr    d1, [x0]
-    add    x0, x0, 124
-    ld1r    {v2.2s}, [x1], 4
-    cmp    x0, x2
-    mla    v0.2s, v2.2s, v1.2s
+    add    x1, x4, x0
+    lsl    x2, x0, 5
+    sub    x2, x2, x0
+    add    x0, x0, 4
+    cmp    x0, 120
+    ldr    w1, [x1, 4]
+    ldr    d2, [x3, x2]
+    dup    v0.2s, w1
+    mla    v1.2s, v0.2s, v2.2s
     bne    .L3


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-04-02  6:46 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-18 22:47 [Bug target/62178] New: [AArch64] Performance regression on matrix matrix multiply due to r211211 spop at gcc dot gnu.org
2014-08-18 22:58 ` [Bug target/62178] " pinskia at gcc dot gnu.org
2014-08-19 10:52 ` amker.cheng at gmail dot com
2014-10-28 11:27 ` [Bug middle-end/62178] [5.0 regression] " ramana at gcc dot gnu.org
2014-11-24 13:06 ` rguenth at gcc dot gnu.org
2014-11-25  1:35 ` amker.cheng at gmail dot com
2014-11-27 14:35 ` ramana at gcc dot gnu.org
2014-12-18  2:54 ` amker at gcc dot gnu.org
2014-12-18  2:57 ` amker at gcc dot gnu.org
2014-12-22 10:30 ` amker at gcc dot gnu.org
2015-04-02  6:46 ` yroux at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).