[Bug tree-optimization/110023] New: [10.3 Regression] 10% performance drop on important benchmark after r247544.

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

From: "d_vampile at 163 dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/110023] New: [10.3 Regression] 10% performance drop on important benchmark after r247544.
Date: Mon, 29 May 2023 14:20:36 +0000	[thread overview]
Message-ID: <bug-110023-4@http.gcc.gnu.org/bugzilla/> (raw)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110023

            Bug ID: 110023
           Summary: [10.3 Regression] 10% performance drop on important
                    benchmark after r247544.
           Product: gcc
           Version: 10.3.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: d_vampile at 163 dot com
  Target Milestone: ---

Created attachment 55183
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55183&action=edit
Open-source stream benchmark

The stream benchmark performance deteriorates. The loop peeling policy in the
vect_enhance_data_refs_alignment function is modified to degrade the benchmark
performance,which can be demonstrated on example from attachment. 

Alternatively, you can obtain it from
https://github.com/jeffhammond/stream/archive/master.zip.

Compiling & Running:
gcc -fopenmp -O -DSTREAM_ARRAY_SIZE=100000000 stream.c  -o stream
./stream

Patches to modify the loop stripping policy of the
vect_enhance_data_refs_alignment function are available from:
https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=49ab46214e9288ee1268f87ddcd64dacfd21c31d

After you open OpenMP,

Before modification: (Add subitem)
ldr d0, [x5, x1, lsl #3]
fadd d0, d0, d1
str d0, [x4, x1, lsl #3]
mov w4, w2
sub w7, w7, w2
add x4, x4, x1
ldr x1, [x10, #888]
lsl x4, x4, #3
lsr w8, w7, #1
add x6, x4, x6
add x5, x4, x5
mov w2, #0x0 /
add x4, x4, x1
mov x1, #0x0 /
ldr q0, [x5, x1]
add w2, w2, #0x1
ldr q1, [x6, x1]
cmp w2, w8
fadd v0.2d, v0.2d, v1.2d
str q0, [x4, x1]
add x1, x1, #0x10
b.cc 4012d8 <main._omp_fn.4+0xd8>
and w1, w7, #0xfffffffe
add w0, w0, w1
cmp w7, w1
b.eq 401348 <main._omp_fn.4+0x148>
ldr x5, [x9, #880]
sxtw x1, w0
ldr x4, [x11, #896]
add w0, w0, #0x1
ldr d1, [x5, x1, lsl #3]
cmp w3, w0
ldr x2, [x10, #888]
ldr d0, [x4, x1, lsl #3]
fadd d0, d0, d1
str d0, [x2, x1, lsl #3]
b.le 401348 <main._omp_fn.4+0x148>
sxtw x0, w0
ldr d0, [x5, x0, lsl #3]
ldr d1, [x4, x0, lsl #3]
fadd d0, d0, d1
str d0, [x2, x0, lsl #3]
ldr x19, [sp, #16]
ldp x29, x30, [sp], #32

After the modification:
mov x29, sp
str x19, [sp, #16]
bl 4006e0 <omp_get_num_threads@plt>
mov w19, w0
bl 4006b0 <omp_get_thread_num@plt>
mov w2, #0x8000 /
movk w2, #0x61a, lsl #16
sdiv w1, w2, w19
msub w2, w1, w19, w2
cmp w0, w2
b.ge 401238 <main._omp_fn.4+0x38>
add w1, w1, #0x1
mov w2, #0x0 /
madd w0, w1, w0, w2
add w1, w1, w0
cmp w0, w1
b.ge 4012d8 <main._omp_fn.4+0xd8>
sub w2, w1, w0
adrp x8, 401000 <main._omp_fn.3+0x100>
adrp x9, 401000 <main._omp_fn.3+0x100>
adrp x7, 401000 <main._omp_fn.3+0x100>
cmp w2, #0x1
b.eq 4012b8 <main._omp_fn.4+0xb8>
ldr x1, [x7, #760]
sbfiz x4, x0, #3, #32
ldr x6, [x8, #744]
lsr w10, w2, #1
ldr x5, [x9, #752]
mov w3, #0x0 /
add x6, x4, x6
add x5, x4, x5
add x4, x4, x1
mov x1, #0x0 /
ldr q0, [x6, x1]
add w3, w3, #0x1
ldr q1, [x5, x1]
cmp w3, w10
fadd v0.2d, v0.2d, v1.2d
str q0, [x4, x1]
add x1, x1, #0x10
b.cc 401288 <main._omp_fn.4+0x88>
and w1, w2, #0xfffffffe
add w0, w0, w1
cmp w2, w1
b.eq 4012d8 <main._omp_fn.4+0xd8>
ldr x3, [x9, #752]
sxtw x0, w0
ldr x2, [x8, #744]
ldr x1, [x7, #760]
ldr d0, [x3, x0, lsl #3]
ldr d1, [x2, x0, lsl #3]
fadd d0, d0, d1
str d0, [x1, x0, lsl #3]
ldr x19, [sp, #16]
ldp x29, x30, [sp], #32
ret

After modifying the peeling policy, the vectorization of the for loop in the
Add subitem does not attempt to peel the loop, but the performance eventually
degrades.

next             reply	other threads:[~2023-05-29 14:20 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-29 14:20 d_vampile at 163 dot com [this message]
2023-05-29 14:31 ` [Bug target/110023] " pinskia at gcc dot gnu.org
2023-05-30 14:46 ` d_vampile at 163 dot com

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-110023-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).