public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/94269] New: widening_mul should consider block frequency
@ 2020-03-23 9:47 felix.yang at huawei dot com
2020-03-23 14:14 ` [Bug tree-optimization/94269] " rguenth at gcc dot gnu.org
2020-03-26 7:36 ` cvs-commit at gcc dot gnu.org
0 siblings, 2 replies; 3+ messages in thread
From: felix.yang at huawei dot com @ 2020-03-23 9:47 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94269
Bug ID: 94269
Summary: widening_mul should consider block frequency
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: felix.yang at huawei dot com
Target Milestone: ---
Test case:
float
calc(long n, float *x, int inc_x,
float *y, int inc_y)
{
float dot = 0.0;
int ix = 0, iy = 0;
if (n < 0) {
return dot;
}
int i = 0;
while (i < n) {
dot += y[iy] * x[ix];
ix += inc_x;
iy += inc_y;
i++;
}
return dot;
}
Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize
-funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c
calc:
.LFB0:
.cfi_startproc
cmp x0, 0
ble .L4
mov w7, w0
mov x5, x3
mov w6, 32
mov x3, x1
mov x1, 0
index z4.s, #0, w4
index z3.s, #0, w2
whilelo p0.s, wzr, w0
mov z0.s, #0
.p2align 3,,7
.L3:
ld1w z1.s, p0/z, [x5, z4.s, sxtw 2]
ld1w z2.s, p0/z, [x3, z3.s, sxtw 2]
add x1, x1, 8
fmla z0.s, p0/m, z1.s, z2.s
smaddl x5, w4, w6, x5 <==============
whilelo p0.s, w1, w7
smaddl x3, w2, w6, x3 <==============
b.any .L3
ptrue p0.b, vl32
faddv s0, p0, z0.s
ret
Command line: aarch64-linux-gnu-gcc -S -O2 -fopt-info -ftree-loop-vectorize
-funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256 calc.c
-fdisable-tree-widening_mul
calc:
.LFB0:
.cfi_startproc
cmp x0, 0
ble .L4
sbfiz x8, x4, 5, 32
sbfiz x7, x2, 5, 32
mov w6, w0
mov x5, x3
mov x3, x1
mov x1, 0
index z4.s, #0, w4
index z3.s, #0, w2
whilelo p0.s, wzr, w0
mov z0.s, #0
ptrue p1.b, vl32
.p2align 3,,7
.L3:
ld1w z1.s, p0/z, [x5, z4.s, sxtw 2]
ld1w z2.s, p0/z, [x3, z3.s, sxtw 2]
add x1, x1, 8
fmul z1.s, z1.s, z2.s
add x5, x5, x8 <=============
fadd z0.s, p0/m, z0.s, z1.s
add x3, x3, x7 <=============
whilelo p0.s, w1, w6
b.any .L3
faddv s0, p1, z0.s
ret
widening_mul phase moves the two multiply instructions from outside the loop to
inside the loop, merging with the two add instructions separately. This
increases the cost of the loop.
I think widening_mul should consider block frequency when doing such a
combination.
I mean something like:
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 54ba035..4439452 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2721,7 +2721,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi,
gimple *stmt,
{
if (!has_single_use (rhs1)
|| !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
- &type2, &mult_rhs2))
+ &type2, &mult_rhs2)
+ || (gimple_bb (rhs1_stmt) != gimple_bb (stmt)
+ && gimple_bb (rhs1_stmt)->count.to_frequency(cfun)
+ < gimple_bb (stmt)->count.to_frequency(cfun)))
return false;
add_rhs = rhs2;
conv_stmt = conv1_stmt;
@@ -2730,7 +2733,10 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi,
gimple *stmt,
{
if (!has_single_use (rhs2)
|| !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
- &type2, &mult_rhs2))
+ &type2, &mult_rhs2)
+ || (gimple_bb (rhs2_stmt) != gimple_bb (stmt)
+ && gimple_bb (rhs2_stmt)->count.to_frequency(cfun)
+ < gimple_bb (stmt)->count.to_frequency(cfun)))
return false;
add_rhs = rhs1;
conv_stmt = conv2_stmt;
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-03-26 7:36 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-23 9:47 [Bug tree-optimization/94269] New: widening_mul should consider block frequency felix.yang at huawei dot com
2020-03-23 14:14 ` [Bug tree-optimization/94269] " rguenth at gcc dot gnu.org
2020-03-26 7:36 ` cvs-commit at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).