public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "felix.yang at huawei dot com" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug tree-optimization/94269] New: widening_mul should consider block frequency Date: Mon, 23 Mar 2020 09:47:07 +0000 [thread overview] Message-ID: <bug-94269-4@http.gcc.gnu.org/bugzilla/> (raw) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94269 Bug ID: 94269 Summary: widening_mul should consider block frequency Product: gcc Version: 10.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: tree-optimization Assignee: unassigned at gcc dot gnu.org Reporter: felix.yang at huawei dot com Target Milestone: --- Test case: float calc(long n, float *x, int inc_x, float *y, int inc_y) { float dot = 0.0; int ix = 0, iy = 0; if (n < 0) { return dot; } int i = 0; while (i < n) { dot += y[iy] * x[ix]; ix += inc_x; iy += inc_y; i++; } return dot; } Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c calc: .LFB0: .cfi_startproc cmp x0, 0 ble .L4 mov w7, w0 mov x5, x3 mov w6, 32 mov x3, x1 mov x1, 0 index z4.s, #0, w4 index z3.s, #0, w2 whilelo p0.s, wzr, w0 mov z0.s, #0 .p2align 3,,7 .L3: ld1w z1.s, p0/z, [x5, z4.s, sxtw 2] ld1w z2.s, p0/z, [x3, z3.s, sxtw 2] add x1, x1, 8 fmla z0.s, p0/m, z1.s, z2.s smaddl x5, w4, w6, x5 <============== whilelo p0.s, w1, w7 smaddl x3, w2, w6, x3 <============== b.any .L3 ptrue p0.b, vl32 faddv s0, p0, z0.s ret Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c -fdisable-tree-widening_mul calc: .LFB0: .cfi_startproc cmp x0, 0 ble .L4 sbfiz x8, x4, 5, 32 sbfiz x7, x2, 5, 32 mov w6, w0 mov x5, x3 mov x3, x1 mov x1, 0 index z4.s, #0, w4 index z3.s, #0, w2 whilelo p0.s, wzr, w0 mov z0.s, #0 ptrue p1.b, vl32 .p2align 3,,7 .L3: ld1w z1.s, p0/z, [x5, z4.s, sxtw 2] ld1w z2.s, p0/z, [x3, z3.s, sxtw 2] add x1, x1, 8 fmul z1.s, z1.s, z2.s add x5, x5, x8 <============= fadd z0.s, p0/m, z0.s, z1.s add x3, x3, x7 <============= whilelo p0.s, w1, w6 b.any .L3 faddv s0, p1, z0.s ret widening_mul phase moves the two multiply instructions from outside the loop to inside the loop, merging with the two add instructions separately. This increases the cost of the loop. I think widening_mul should consider block frequency when doing such a combination. I mean something like: diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 54ba035..4439452 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2721,7 +2721,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, { if (!has_single_use (rhs1) || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) + &type2, &mult_rhs2) + || (gimple_bb (rhs1_stmt) != gimple_bb (stmt) + && gimple_bb (rhs1_stmt)->count.to_frequency(cfun) + < gimple_bb (stmt)->count.to_frequency(cfun))) return false; add_rhs = rhs2; conv_stmt = conv1_stmt; @@ -2730,7 +2733,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, { if (!has_single_use (rhs2) || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) + &type2, &mult_rhs2) + || (gimple_bb (rhs2_stmt) != gimple_bb (stmt) + && gimple_bb (rhs2_stmt)->count.to_frequency(cfun) + < gimple_bb (stmt)->count.to_frequency(cfun))) return false; add_rhs = rhs1; conv_stmt = conv2_stmt;
next reply other threads:[~2020-03-23 9:47 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-03-23 9:47 felix.yang at huawei dot com [this message] 2020-03-23 14:14 ` [Bug tree-optimization/94269] " rguenth at gcc dot gnu.org 2020-03-26 7:36 ` cvs-commit at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-94269-4@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).