public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "felix.yang at huawei dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/94269] New: widening_mul should consider block frequency
Date: Mon, 23 Mar 2020 09:47:07 +0000	[thread overview]
Message-ID: <bug-94269-4@http.gcc.gnu.org/bugzilla/> (raw)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94269

            Bug ID: 94269
           Summary: widening_mul should consider block frequency
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: felix.yang at huawei dot com
  Target Milestone: ---

Test case:

float
calc(long n, float *x, int inc_x,
             float *y, int inc_y)
{
  float dot = 0.0;
  int ix = 0, iy = 0;

  if (n < 0) {
    return dot;
  }

  int i = 0;
  while (i < n) {
    dot += y[iy] * x[ix];
    ix  += inc_x;
    iy  += inc_y;
    i++;
  }

  return dot;
}

Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize
-funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c

calc:
.LFB0:
        .cfi_startproc
        cmp     x0, 0
        ble     .L4
        mov     w7, w0
        mov     x5, x3
        mov     w6, 32
        mov     x3, x1
        mov     x1, 0
        index   z4.s, #0, w4
        index   z3.s, #0, w2
        whilelo p0.s, wzr, w0
        mov     z0.s, #0
        .p2align 3,,7
.L3:
        ld1w    z1.s, p0/z, [x5, z4.s, sxtw 2]
        ld1w    z2.s, p0/z, [x3, z3.s, sxtw 2]
        add     x1, x1, 8
        fmla    z0.s, p0/m, z1.s, z2.s
        smaddl  x5, w4, w6, x5           <==============
        whilelo p0.s, w1, w7
        smaddl  x3, w2, w6, x3           <==============
        b.any   .L3
        ptrue   p0.b, vl32
        faddv   s0, p0, z0.s
        ret

Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize
-funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c
-fdisable-tree-widening_mul

calc:
.LFB0:
        .cfi_startproc
        cmp     x0, 0
        ble     .L4
        sbfiz   x8, x4, 5, 32
        sbfiz   x7, x2, 5, 32
        mov     w6, w0
        mov     x5, x3
        mov     x3, x1
        mov     x1, 0
        index   z4.s, #0, w4
        index   z3.s, #0, w2
        whilelo p0.s, wzr, w0
        mov     z0.s, #0
        ptrue   p1.b, vl32
        .p2align 3,,7
.L3:
        ld1w    z1.s, p0/z, [x5, z4.s, sxtw 2]
        ld1w    z2.s, p0/z, [x3, z3.s, sxtw 2]
        add     x1, x1, 8
        fmul    z1.s, z1.s, z2.s
        add     x5, x5, x8             <=============
        fadd    z0.s, p0/m, z0.s, z1.s
        add     x3, x3, x7             <=============
        whilelo p0.s, w1, w6
        b.any   .L3
        faddv   s0, p1, z0.s
        ret

widening_mul phase moves the two multiply instructions from outside the loop to
inside the loop, merging with the two add instructions separately.  This
increases the cost of the loop.  

I think widening_mul should consider block frequency when doing such a
combination.
I mean something like:
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 54ba035..4439452 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2721,7 +2721,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi,
gimple *stmt,
     {
       if (!has_single_use (rhs1)
          || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
-                                 &type2, &mult_rhs2))
+                                 &type2, &mult_rhs2)
+         || (gimple_bb (rhs1_stmt) != gimple_bb (stmt)
+             && gimple_bb (rhs1_stmt)->count.to_frequency(cfun)
+                < gimple_bb (stmt)->count.to_frequency(cfun)))
        return false;
       add_rhs = rhs2;
       conv_stmt = conv1_stmt;
@@ -2730,7 +2733,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi,
gimple *stmt,
     {
       if (!has_single_use (rhs2)
          || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
-                                 &type2, &mult_rhs2))
+                                 &type2, &mult_rhs2)
+         || (gimple_bb (rhs2_stmt) != gimple_bb (stmt)
+             && gimple_bb (rhs2_stmt)->count.to_frequency(cfun)
+                < gimple_bb (stmt)->count.to_frequency(cfun)))
        return false;
       add_rhs = rhs1;
       conv_stmt = conv2_stmt;

             reply	other threads:[~2020-03-23  9:47 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-23  9:47 felix.yang at huawei dot com [this message]
2020-03-23 14:14 ` [Bug tree-optimization/94269] " rguenth at gcc dot gnu.org
2020-03-26  7:36 ` cvs-commit at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-94269-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).