[Bug target/110647] [14 Regression] 66% TSVC/s2712 regressoin on N1-neoverse between g:620a35b24a2b6edb (2023-07-01 07:24) and g:80ae426a195a0d03 (2023-07-02 01:37)

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

From: "hubicka at gcc dot gnu.org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/110647] [14 Regression] 66% TSVC/s2712 regressoin on N1-neoverse between g:620a35b24a2b6edb (2023-07-01 07:24) and g:80ae426a195a0d03 (2023-07-02 01:37)
Date: Fri, 14 Jul 2023 18:10:18 +0000	[thread overview]
Message-ID: <bug-110647-4-mx5whi1JXC@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-110647-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110647

--- Comment #2 from Jan Hubicka <hubicka at gcc dot gnu.org> ---
This is a testcase based on our testuiste version so it can be copied to
compiler explorer

#define iterations 10000
#define LEN_1D 32000
#define LEN_2D 256
#define ARRAY_ALIGNMENT 64

typedef float real_t;
#define ABS fabsf
__attribute__((aligned(ARRAY_ALIGNMENT)))
real_t flat_2d_array[LEN_2D * LEN_2D];
__attribute__((aligned(ARRAY_ALIGNMENT))) real_t x[LEN_1D];
__attribute__((aligned(ARRAY_ALIGNMENT))) real_t a[LEN_1D], b[LEN_1D],
    c[LEN_1D], d[LEN_1D], e[LEN_1D], aa[LEN_2D][LEN_2D], bb[LEN_2D][LEN_2D],
    cc[LEN_2D][LEN_2D], tt[LEN_2D][LEN_2D];
__attribute__((aligned(ARRAY_ALIGNMENT))) int indx[LEN_1D];

int dummy(real_t[LEN_1D], real_t[LEN_1D], real_t[LEN_1D], real_t[LEN_1D],
          real_t[LEN_1D], real_t[LEN_2D][LEN_2D], real_t[LEN_2D][LEN_2D],
          real_t[LEN_2D][LEN_2D], real_t);
real_t s2712(struct args_t * func_args)
{
//    control flow
//    if to elemental min


    for (int nl = 0; nl < 4*iterations; nl++) {
        for (int i = 0; i < LEN_1D; i++) {
            if (a[i] >= b[i]) {
                a[i] += b[i] * c[i];
            }
        }
        dummy(a, b, c, d, e, aa, bb, cc, 0.);
    }
return 0;
}

So with GCC 13 I get:
s2712(args_t*):
        stp     x29, x30, [sp, -96]!
        mov     x29, sp
        stp     x19, x20, [sp, 16]
        adrp    x19, a
        adrp    x20, b
        add     x19, x19, :lo12:a
        add     x20, x20, :lo12:b
        stp     x21, x22, [sp, 32]
        adrp    x22, c
        mov     x21, 62464
        add     x22, x22, :lo12:c
        stp     x23, x24, [sp, 48]
        adrp    x24, e
        adrp    x23, d
        add     x24, x24, :lo12:e
        add     x23, x23, :lo12:d
        stp     x25, x26, [sp, 64]
        adrp    x26, bb
        adrp    x25, aa
        add     x26, x26, :lo12:bb
        add     x25, x25, :lo12:aa
        stp     x27, x28, [sp, 80]
        adrp    x27, cc
        add     x27, x27, :lo12:cc
        mov     w28, 40000
        movk    x21, 0x1, lsl 16
.L2:
        mov     x0, 0
.L5:
        ldr     s0, [x19, x0]
        ldr     s1, [x20, x0]
        fcmpe   s0, s1
        bge     .L7
.L3:
        add     x0, x0, 4
        cmp     x0, x21
        bne     .L5
        movi    v0.2s, #0
        mov     x7, x27
        mov     x6, x26
        mov     x5, x25
        mov     x4, x24
        mov     x3, x23
        mov     x2, x22
        mov     x1, x20
        mov     x0, x19
        bl      dummy(float*, float*, float*, float*, float*, float (*) [256],
float (*) [256], float (*) [256], float)
        subs    w28, w28, #1
        bne     .L2
        ldp     x19, x20, [sp, 16]
        movi    v0.2s, #0
        ldp     x21, x22, [sp, 32]
        ldp     x23, x24, [sp, 48]
        ldp     x25, x26, [sp, 64]
        ldp     x27, x28, [sp, 80]
        ldp     x29, x30, [sp], 96
        ret
.L7:
        ldr     s2, [x22, x0]
        fmadd   s0, s1, s2, s0
        str     s0, [x19, x0]
        b       .L3

and trunk:
s2712(args_t*):
        stp     x29, x30, [sp, -96]!
        mov     x29, sp
        stp     x19, x20, [sp, 16]
        adrp    x19, a
        adrp    x20, b
        add     x19, x19, :lo12:a
        add     x20, x20, :lo12:b
        stp     x21, x22, [sp, 32]
        adrp    x22, c
        mov     x21, 62464
        add     x22, x22, :lo12:c
        stp     x23, x24, [sp, 48]
        adrp    x24, e
        adrp    x23, d
        add     x24, x24, :lo12:e
        add     x23, x23, :lo12:d
        stp     x25, x26, [sp, 64]
        adrp    x26, bb
        adrp    x25, aa
        add     x26, x26, :lo12:bb
        add     x25, x25, :lo12:aa
        stp     x27, x28, [sp, 80]
        adrp    x27, cc
        add     x27, x27, :lo12:cc
        mov     w28, 40000
        movk    x21, 0x1, lsl 16
.L2:
        mov     x0, 0
.L5:
        ldr     s31, [x19, x0]
        ldr     s30, [x20, x0]
        fcmpe   s31, s30
        bge     .L7
.L3:
        add     x0, x0, 4
        cmp     x0, x21
        bne     .L5
        movi    v0.2s, #0
        mov     x7, x27
        mov     x6, x26
        mov     x5, x25
        mov     x4, x24
        mov     x3, x23
        mov     x2, x22
        mov     x1, x20
        mov     x0, x19
        bl      dummy(float*, float*, float*, float*, float*, float (*) [256],
float (*) [256], float (*) [256], float)
        subs    w28, w28, #1
        bne     .L2
        ldp     x19, x20, [sp, 16]
        movi    v0.2s, #0
        ldp     x21, x22, [sp, 32]
        ldp     x23, x24, [sp, 48]
        ldp     x25, x26, [sp, 64]
        ldp     x27, x28, [sp, 80]
        ldp     x29, x30, [sp], 96
        ret
.L7:
        ldr     s29, [x22, x0]
        fmadd   s31, s30, s29, s31
        str     s31, [x19, x0]
        b       .L3

The only difference seems to be:
 .L2:
         mov     x0, 0
 .L5:
-        ldr     s31, [x19, x0]
-        ldr     s30, [x20, x0]
-        fcmpe   s31, s30
+        ldr     s0, [x19, x0]
+        ldr     s1, [x20, x0]
+        fcmpe   s0, s1
         bge     .L7
 .L3:
         add     x0, x0, 4
@@ -57,7 +57,7 @@
         ldp     x29, x30, [sp], 96
         ret
 .L7:
-        ldr     s29, [x22, x0]
-        fmadd   s31, s30, s29, s31
-        str     s31, [x19, x0]
+        ldr     s2, [x22, x0]
+        fmadd   s0, s1, s2, s0
+        str     s0, [x19, x0]
         b       .L3

which seems that it is a noise (caused by code layout change in the whole
bechmark) after all?

next prev parent reply	other threads:[~2023-07-14 18:10 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-12 20:53 [Bug middle-end/110647] New: " hubicka at gcc dot gnu.org
2023-07-12 20:58 ` [Bug target/110647] [14 Regression] " pinskia at gcc dot gnu.org
2023-07-12 21:01 ` pinskia at gcc dot gnu.org
2023-07-14 18:10 ` hubicka at gcc dot gnu.org [this message]
2024-03-07 23:29 ` law at gcc dot gnu.org
2024-05-07  7:41 ` [Bug target/110647] [14/15 " rguenth at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-110647-4-mx5whi1JXC@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).