public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO
@ 2004-10-12 17:48 steinmtz at us dot ibm dot com
2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-10-12 17:48 UTC (permalink / raw)
To: gcc-bugs
The following test case illustrates an example where dcbtst instructions are
being inserted too aggressively during FDO.
Test Case:
typedef struct {
union {
short arr[64];
short arr2[8][8];
}un;
} str, *strPtr;
str blah;
int main (int argc, char *argv[])
{
int i, x, y;
strPtr ptr;
ptr = &blah;
for( y=0; y<5; y++){
for(x=0; x<=5; x++){
for (i = 0; i < (sizeof(ptr->un.arr)/8); i++) {
*(((double *) ptr->un.arr)+i) = +0.0;
}
ptr++;
}
}
return 0;
}
Commands (using gcc 4.0)
gcc -O2 -fprofile-generate -funroll-loops -o bug bug.c
./bug
gcc -O2 -fprofile-use -o bug bug.c
Generated Code - notice that a touch is done every eight bytes.
main:
stwu 1,-64(1)
li 0,5
lis 9,blah@ha
li 11,0
li 12,0
mtctr 0
la 8,blah@l(9)
stw 21,20(1)
stw 22,24(1)
stw 23,28(1)
stw 24,32(1)
stw 25,36(1)
stw 26,40(1)
stw 27,44(1)
stw 28,48(1)
stw 29,52(1)
stw 30,56(1)
stw 31,60(1)
.p2align 4,,15
.L2:
addi 22,8,128
addi 23,8,136
addi 7,8,144
addi 6,8,152
addi 5,8,160
addi 4,8,168
stw 11,0(8)
stw 12,4(8)
stw 11,8(8)
stw 12,12(8)
dcbtst 0,23
dcbtst 0,7
dcbtst 0,6
dcbtst 0,22
addi 3,8,176
addi 31,8,184
dcbtst 0,5
dcbtst 0,4
addi 30,8,192
addi 29,8,200
addi 28,8,208
addi 27,8,216
dcbtst 0,3
dcbtst 0,31
addi 26,8,224
addi 25,8,232
dcbtst 0,30
dcbtst 0,29
addi 24,8,240
addi 21,8,248
dcbtst 0,28
dcbtst 0,27
mr 9,22
addi 23,8,264
dcbtst 0,26
dcbtst 0,25
addi 22,22,128
addi 7,8,272
dcbtst 0,24
dcbtst 0,21
addi 6,8,280
addi 5,8,288
dcbtst 0,23
addi 4,8,296
addi 3,8,304
dcbtst 0,22
dcbtst 0,7
addi 31,8,312
addi 30,8,320
dcbtst 0,6
dcbtst 0,5
addi 29,8,328
addi 28,8,336
dcbtst 0,4
dcbtst 0,3
addi 27,8,344
addi 26,8,352
dcbtst 0,31
dcbtst 0,30
addi 25,8,360
addi 24,8,368
dcbtst 0,29
dcbtst 0,28
addi 21,8,376
mr 10,22
dcbtst 0,27
dcbtst 0,26
addi 23,9,264
addi 7,9,272
dcbtst 0,25
dcbtst 0,24
addi 6,9,280
addi 5,9,288
dcbtst 0,21
addi 4,9,296
addi 21,9,376
stw 11,16(8)
stw 12,20(8)
dcbtst 0,23
addi 3,9,304
stw 11,24(8)
stw 12,28(8)
dcbtst 0,7
dcbtst 0,6
addi 31,9,312
dcbtst 0,5
dcbtst 0,4
addi 30,9,320
addi 29,9,328
dcbtst 0,3
dcbtst 0,21
addi 28,9,336
addi 27,9,344
dcbtst 0,31
addi 26,9,352
addi 25,9,360
stw 11,32(8)
stw 12,36(8)
dcbtst 0,30
addi 24,9,368
stw 11,40(8)
stw 12,44(8)
dcbtst 0,29
dcbtst 0,28
addi 22,22,128
dcbtst 0,27
dcbtst 0,26
addi 23,8,520
addi 7,8,528
dcbtst 0,25
dcbtst 0,24
addi 6,8,536
addi 5,8,544
dcbtst 0,22
stw 11,48(8)
stw 12,52(8)
addi 22,8,512
addi 4,8,552
dcbtst 0,23
stw 11,56(8)
stw 12,60(8)
addi 3,8,560
dcbtst 0,7
stw 11,64(8)
stw 12,68(8)
addi 31,8,568
dcbtst 0,22
stw 11,72(8)
stw 12,76(8)
addi 30,8,576
dcbtst 0,6
stw 11,80(8)
stw 12,84(8)
addi 29,8,584
dcbtst 0,5
stw 11,88(8)
stw 12,92(8)
addi 28,8,592
dcbtst 0,4
stw 11,96(8)
stw 12,100(8)
addi 27,8,600
dcbtst 0,3
stw 11,104(8)
stw 12,108(8)
addi 26,8,608
dcbtst 0,31
stw 11,112(8)
stw 12,116(8)
addi 25,8,616
dcbtst 0,30
stw 11,120(8)
stw 12,124(8)
addi 24,8,624
dcbtst 0,29
stw 11,128(8)
stw 12,132(8)
addi 21,8,632
dcbtst 0,28
stw 11,8(9)
stw 12,12(9)
addi 22,8,640
dcbtst 0,27
stw 11,16(9)
stw 12,20(9)
addi 23,8,648
dcbtst 0,26
stw 11,24(9)
stw 12,28(9)
addi 7,8,656
dcbtst 0,25
stw 11,32(9)
stw 12,36(9)
addi 6,8,664
dcbtst 0,24
stw 11,40(9)
stw 12,44(9)
addi 5,8,672
dcbtst 0,21
stw 11,48(9)
stw 12,52(9)
addi 4,8,680
dcbtst 0,22
stw 11,56(9)
stw 12,60(9)
addi 3,8,688
dcbtst 0,23
stw 11,64(9)
stw 12,68(9)
addi 31,8,696
dcbtst 0,7
stw 11,72(9)
stw 12,76(9)
addi 30,8,704
dcbtst 0,6
stw 11,80(9)
stw 12,84(9)
addi 29,8,712
dcbtst 0,5
stw 11,88(9)
stw 12,92(9)
addi 28,8,720
dcbtst 0,4
stw 11,96(9)
stw 12,100(9)
addi 27,8,728
dcbtst 0,3
stw 11,104(9)
stw 12,108(9)
addi 26,8,736
dcbtst 0,31
stw 11,112(9)
stw 12,116(9)
addi 25,8,744
dcbtst 0,30
stw 11,120(9)
stw 12,124(9)
addi 24,8,752
dcbtst 0,29
stw 11,128(9)
stw 12,132(9)
addi 21,8,760
dcbtst 0,28
stw 11,8(10)
stw 12,12(10)
addi 22,8,768
dcbtst 0,27
stw 11,16(10)
stw 12,20(10)
addi 23,8,776
dcbtst 0,26
stw 11,120(10)
stw 12,124(10)
addi 7,8,784
dcbtst 0,25
stw 11,24(10)
stw 12,28(10)
addi 6,8,792
dcbtst 0,24
stw 11,32(10)
stw 12,36(10)
addi 5,8,800
dcbtst 0,21
stw 11,40(10)
stw 12,44(10)
addi 21,8,888
dcbtst 0,23
stw 11,48(10)
stw 12,52(10)
addi 4,8,808
dcbtst 0,7
stw 11,56(10)
stw 12,60(10)
addi 3,8,816
dcbtst 0,6
stw 11,64(10)
stw 12,68(10)
addi 31,8,824
dcbtst 0,5
stw 11,72(10)
stw 12,76(10)
addi 30,8,832
dcbtst 0,4
stw 11,80(10)
stw 12,84(10)
addi 29,8,840
dcbtst 0,3
stw 11,88(10)
stw 12,92(10)
addi 28,8,848
dcbtst 0,31
stw 11,96(10)
stw 12,100(10)
addi 27,8,856
dcbtst 0,30
stw 11,104(10)
stw 12,108(10)
addi 26,8,864
dcbtst 0,29
stw 11,112(10)
stw 12,116(10)
addi 10,8,384
dcbtst 0,28
stw 11,384(8)
stw 12,388(8)
addi 25,8,872
dcbtst 0,27
stw 11,8(10)
stw 12,12(10)
addi 24,8,880
dcbtst 0,26
stw 11,16(10)
stw 12,20(10)
dcbtst 0,25
dcbtst 0,21
dcbtst 0,24
dcbtst 0,22
stw 11,120(10)
stw 12,124(10)
stw 11,24(10)
stw 12,28(10)
stw 11,32(10)
stw 12,36(10)
stw 11,40(10)
stw 12,44(10)
stw 11,48(10)
stw 12,52(10)
stw 11,56(10)
stw 12,60(10)
stw 11,64(10)
stw 12,68(10)
stw 11,72(10)
stw 12,76(10)
stw 11,80(10)
stw 12,84(10)
stw 11,88(10)
stw 12,92(10)
stw 11,96(10)
stw 12,100(10)
stw 11,104(10)
stw 12,108(10)
stw 11,112(10)
stw 12,116(10)
addi 10,8,512
stw 11,512(8)
stw 12,516(8)
stw 11,8(10)
stw 12,12(10)
stw 11,16(10)
stw 12,20(10)
stw 11,24(10)
stw 12,28(10)
stw 11,120(10)
stw 12,124(10)
stw 11,32(10)
stw 12,36(10)
stw 11,40(10)
stw 12,44(10)
stw 11,48(10)
stw 12,52(10)
stw 11,56(10)
stw 12,60(10)
stw 11,64(10)
stw 12,68(10)
stw 11,72(10)
stw 12,76(10)
stw 11,80(10)
stw 12,84(10)
stw 11,88(10)
stw 12,92(10)
stw 11,96(10)
stw 12,100(10)
stw 11,104(10)
stw 12,108(10)
stw 11,112(10)
stw 12,116(10)
addi 10,8,640
stw 11,640(8)
stw 12,644(8)
stw 11,120(10)
stw 12,124(10)
stw 11,8(10)
stw 12,12(10)
stw 11,16(10)
stw 12,20(10)
stw 11,24(10)
stw 12,28(10)
stw 11,32(10)
stw 12,36(10)
stw 11,40(10)
stw 12,44(10)
stw 11,48(10)
stw 12,52(10)
stw 11,56(10)
stw 12,60(10)
stw 11,64(10)
stw 12,68(10)
stw 11,72(10)
stw 12,76(10)
stw 11,80(10)
stw 12,84(10)
stw 11,88(10)
stw 12,92(10)
stw 11,96(10)
stw 12,100(10)
stw 11,104(10)
stw 12,108(10)
stw 11,112(10)
stw 12,116(10)
mr 8,22
bdnz .L2
li 3,0
lwz 21,20(1)
lwz 22,24(1)
lwz 23,28(1)
lwz 24,32(1)
lwz 25,36(1)
lwz 26,40(1)
lwz 27,44(1)
lwz 28,48(1)
lwz 29,52(1)
lwz 30,56(1)
lwz 31,60(1)
addi 1,1,64
blr
--
Summary: Over Aggressive Use of Data Cache Touch Instructions
During FDO
Product: gcc
Version: 4.0.0
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: target
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: steinmtz at us dot ibm dot com
CC: gcc-bugs at gcc dot gnu dot org,steinmtz at us dot ibm
dot com
GCC build triplet: powerpc64-linux
GCC host triplet: powerpc64-linux
GCC target triplet: powerpc64-linux
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
@ 2004-10-12 17:51 ` pinskia at gcc dot gnu dot org
2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-10-12 17:51 UTC (permalink / raw)
To: gcc-bugs
--
What |Removed |Added
----------------------------------------------------------------------------
CC| |rakdver at gcc dot gnu dot
| |org
Component|target |rtl-optimization
Keywords| |missed-optimization
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
@ 2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
2004-11-17 14:34 ` nathan at gcc dot gnu dot org
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-10-12 19:45 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From pinskia at gcc dot gnu dot org 2004-10-12 19:45 -------
Confirmed.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Ever Confirmed| |1
Last reconfirmed|0000-00-00 00:00:00 |2004-10-12 19:45:45
date| |
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
@ 2004-11-17 14:34 ` nathan at gcc dot gnu dot org
2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: nathan at gcc dot gnu dot org @ 2004-11-17 14:34 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From nathan at gcc dot gnu dot org 2004-11-17 14:34 -------
I cannot reproduce this with the 20041118 CVS head on powerpc64-unknown-linux-gnu.
Can you confirm the command line options and provide the bug.gcno, bug.gcda files.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
` (2 preceding siblings ...)
2004-11-17 14:34 ` nathan at gcc dot gnu dot org
@ 2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
2004-11-18 1:32 ` jgrimm2 at us dot ibm dot com
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: rakdver at atrey dot karlin dot mff dot cuni dot cz @ 2004-11-17 14:41 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni dot cz 2004-11-17 14:41 -------
Subject: Re: Over Aggressive Use of Data Cache Touch Instructions During FDO
Hello,
> I cannot reproduce this with the 20041118 CVS head on powerpc64-unknown-linux-gnu.
I think this is because speculative prefetching is now disabled by
default. You should have no problems with reproducing the bug if
you switch it on (-fspeculative-prefetching).
Zdenek
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
` (3 preceding siblings ...)
2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
@ 2004-11-18 1:32 ` jgrimm2 at us dot ibm dot com
2004-11-23 1:43 ` jgrimm2 at us dot ibm dot com
2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
6 siblings, 0 replies; 8+ messages in thread
From: jgrimm2 at us dot ibm dot com @ 2004-11-18 1:32 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From jgrimm2 at us dot ibm dot com 2004-11-18 01:32 -------
Hmmm... looks like a couple things (maybe more) come in to play here. First,
value-profile code doesn't seem to merge prefetches (among other things). That
is, it should be pretty easy (crude?) to track the last prefetch and not
prefetch addresses that look close. Doing so seems to help this test case quite
a bit (96 prefetches down to 24), but probably not enough..... as it seems that
the loop unroller comes through afterwards and does its damage too. Might need
to teach the unroller about prefetch a bit too??
--
What |Removed |Added
----------------------------------------------------------------------------
CC| |jgrimm2 at us dot ibm dot
| |com
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
` (4 preceding siblings ...)
2004-11-18 1:32 ` jgrimm2 at us dot ibm dot com
@ 2004-11-23 1:43 ` jgrimm2 at us dot ibm dot com
2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
6 siblings, 0 replies; 8+ messages in thread
From: jgrimm2 at us dot ibm dot com @ 2004-11-23 1:43 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From jgrimm2 at us dot ibm dot com 2004-11-23 01:43 -------
Finally got SPEC set up w/profiling to do a little comparison. Its not done yet
(and only running c,c++ tests) but first glance through the initials results, it
appears that -fspeculative-prefetching is damaging to spec results at least on
ppc64 (-m32). I wanted to do this to guage whether this bug entry was some
pathological testcase or this would also show up in regularly looked at tests.
I'm currently comparing -O2 -funroll-loops -fno-speculative-prefetching vs -O2
-funroll-loops -fspeculative-prefetching and using profiling for both runs.
I'll run again to see if I can get those results. Additionally, I'd like to
see what the behavior is when I limit the prefetches (don't emit prefetch if
just prefetched from near address + crudely limit maximum number of prefetches),
I just uploaded a patch for that, but haven't tested how it affects spec yet.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
` (5 preceding siblings ...)
2004-11-23 1:43 ` jgrimm2 at us dot ibm dot com
@ 2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-07-29 11:46 UTC (permalink / raw)
To: gcc-bugs
------- Additional Comments From pinskia at gcc dot gnu dot org 2005-07-29 11:42 -------
-fspeculative-prefetching has now been removed from the mainline.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution| |WONTFIX
Target Milestone|--- |4.1.0
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2005-07-29 11:42 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
2004-11-17 14:34 ` nathan at gcc dot gnu dot org
2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
2004-11-18 1:32 ` jgrimm2 at us dot ibm dot com
2004-11-23 1:43 ` jgrimm2 at us dot ibm dot com
2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).