public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO
@ 2004-10-12 17:48 steinmtz at us dot ibm dot com
  2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-10-12 17:48 UTC (permalink / raw)
  To: gcc-bugs

The following test case illustrates an example where dcbtst instructions are 
being inserted too aggressively during FDO.

Test Case:

typedef struct {
  union {
    short arr[64];
    short arr2[8][8];
  }un;
} str, *strPtr;

str blah;

int main (int argc, char *argv[])
{
  
  int i, x, y;
  strPtr ptr;

  ptr = &blah;

  for( y=0; y<5; y++){
    for(x=0; x<=5; x++){
      for (i = 0; i < (sizeof(ptr->un.arr)/8); i++) {
        *(((double *) ptr->un.arr)+i) = +0.0;
      }
      ptr++;
    }
  }

  return 0;
}

Commands (using gcc 4.0)

gcc -O2 -fprofile-generate -funroll-loops -o bug bug.c
./bug
gcc -O2 -fprofile-use -o bug bug.c

Generated Code - notice that a touch is done every eight bytes.

main:
	stwu 1,-64(1)
	li 0,5
	lis 9,blah@ha
	li 11,0
	li 12,0
	mtctr 0
	la 8,blah@l(9)
	stw 21,20(1)
	stw 22,24(1)
	stw 23,28(1)
	stw 24,32(1)
	stw 25,36(1)
	stw 26,40(1)
	stw 27,44(1)
	stw 28,48(1)
	stw 29,52(1)
	stw 30,56(1)
	stw 31,60(1)
	.p2align 4,,15
.L2:
	addi 22,8,128
	addi 23,8,136
	addi 7,8,144
	addi 6,8,152
	addi 5,8,160
	addi 4,8,168
	stw 11,0(8)
	stw 12,4(8)
	stw 11,8(8)
	stw 12,12(8)
	dcbtst 0,23
	dcbtst 0,7
	dcbtst 0,6
	dcbtst 0,22
	addi 3,8,176
	addi 31,8,184
	dcbtst 0,5
	dcbtst 0,4
	addi 30,8,192
	addi 29,8,200
	addi 28,8,208
	addi 27,8,216
	dcbtst 0,3
	dcbtst 0,31
	addi 26,8,224
	addi 25,8,232
	dcbtst 0,30
	dcbtst 0,29
	addi 24,8,240
	addi 21,8,248
	dcbtst 0,28
	dcbtst 0,27
	mr 9,22
	addi 23,8,264
	dcbtst 0,26
	dcbtst 0,25
	addi 22,22,128
	addi 7,8,272
	dcbtst 0,24
	dcbtst 0,21
	addi 6,8,280
	addi 5,8,288
	dcbtst 0,23
	addi 4,8,296
	addi 3,8,304
	dcbtst 0,22
	dcbtst 0,7
	addi 31,8,312
	addi 30,8,320
	dcbtst 0,6
	dcbtst 0,5
	addi 29,8,328
	addi 28,8,336
	dcbtst 0,4
	dcbtst 0,3
	addi 27,8,344
	addi 26,8,352
	dcbtst 0,31
	dcbtst 0,30
	addi 25,8,360
	addi 24,8,368
	dcbtst 0,29
	dcbtst 0,28
	addi 21,8,376
	mr 10,22
	dcbtst 0,27
	dcbtst 0,26
	addi 23,9,264
	addi 7,9,272
	dcbtst 0,25
	dcbtst 0,24
	addi 6,9,280
	addi 5,9,288
	dcbtst 0,21
	addi 4,9,296
	addi 21,9,376
	stw 11,16(8)
	stw 12,20(8)
	dcbtst 0,23
	addi 3,9,304
	stw 11,24(8)
	stw 12,28(8)
	dcbtst 0,7
	dcbtst 0,6
	addi 31,9,312
	dcbtst 0,5
	dcbtst 0,4
	addi 30,9,320
	addi 29,9,328
	dcbtst 0,3
	dcbtst 0,21
	addi 28,9,336
	addi 27,9,344
	dcbtst 0,31
	addi 26,9,352
	addi 25,9,360
	stw 11,32(8)
	stw 12,36(8)
	dcbtst 0,30
	addi 24,9,368
	stw 11,40(8)
	stw 12,44(8)
	dcbtst 0,29
	dcbtst 0,28
	addi 22,22,128
	dcbtst 0,27
	dcbtst 0,26
	addi 23,8,520
	addi 7,8,528
	dcbtst 0,25
	dcbtst 0,24
	addi 6,8,536
	addi 5,8,544
	dcbtst 0,22
	stw 11,48(8)
	stw 12,52(8)
	addi 22,8,512
	addi 4,8,552
	dcbtst 0,23
	stw 11,56(8)
	stw 12,60(8)
	addi 3,8,560
	dcbtst 0,7
	stw 11,64(8)
	stw 12,68(8)
	addi 31,8,568
	dcbtst 0,22
	stw 11,72(8)
	stw 12,76(8)
	addi 30,8,576
	dcbtst 0,6
	stw 11,80(8)
	stw 12,84(8)
	addi 29,8,584
	dcbtst 0,5
	stw 11,88(8)
	stw 12,92(8)
	addi 28,8,592
	dcbtst 0,4
	stw 11,96(8)
	stw 12,100(8)
	addi 27,8,600
	dcbtst 0,3
	stw 11,104(8)
	stw 12,108(8)
	addi 26,8,608
	dcbtst 0,31
	stw 11,112(8)
	stw 12,116(8)
	addi 25,8,616
	dcbtst 0,30
	stw 11,120(8)
	stw 12,124(8)
	addi 24,8,624
	dcbtst 0,29
	stw 11,128(8)
	stw 12,132(8)
	addi 21,8,632
	dcbtst 0,28
	stw 11,8(9)
	stw 12,12(9)
	addi 22,8,640
	dcbtst 0,27
	stw 11,16(9)
	stw 12,20(9)
	addi 23,8,648
	dcbtst 0,26
	stw 11,24(9)
	stw 12,28(9)
	addi 7,8,656
	dcbtst 0,25
	stw 11,32(9)
	stw 12,36(9)
	addi 6,8,664
	dcbtst 0,24
	stw 11,40(9)
	stw 12,44(9)
	addi 5,8,672
	dcbtst 0,21
	stw 11,48(9)
	stw 12,52(9)
	addi 4,8,680
	dcbtst 0,22
	stw 11,56(9)
	stw 12,60(9)
	addi 3,8,688
	dcbtst 0,23
	stw 11,64(9)
	stw 12,68(9)
	addi 31,8,696
	dcbtst 0,7
	stw 11,72(9)
	stw 12,76(9)
	addi 30,8,704
	dcbtst 0,6
	stw 11,80(9)
	stw 12,84(9)
	addi 29,8,712
	dcbtst 0,5
	stw 11,88(9)
	stw 12,92(9)
	addi 28,8,720
	dcbtst 0,4
	stw 11,96(9)
	stw 12,100(9)
	addi 27,8,728
	dcbtst 0,3
	stw 11,104(9)
	stw 12,108(9)
	addi 26,8,736
	dcbtst 0,31
	stw 11,112(9)
	stw 12,116(9)
	addi 25,8,744
	dcbtst 0,30
	stw 11,120(9)
	stw 12,124(9)
	addi 24,8,752
	dcbtst 0,29
	stw 11,128(9)
	stw 12,132(9)
	addi 21,8,760
	dcbtst 0,28
	stw 11,8(10)
	stw 12,12(10)
	addi 22,8,768
	dcbtst 0,27
	stw 11,16(10)
	stw 12,20(10)
	addi 23,8,776
	dcbtst 0,26
	stw 11,120(10)
	stw 12,124(10)
	addi 7,8,784
	dcbtst 0,25
	stw 11,24(10)
	stw 12,28(10)
	addi 6,8,792
	dcbtst 0,24
	stw 11,32(10)
	stw 12,36(10)
	addi 5,8,800
	dcbtst 0,21
	stw 11,40(10)
	stw 12,44(10)
	addi 21,8,888
	dcbtst 0,23
	stw 11,48(10)
	stw 12,52(10)
	addi 4,8,808
	dcbtst 0,7
	stw 11,56(10)
	stw 12,60(10)
	addi 3,8,816
	dcbtst 0,6
	stw 11,64(10)
	stw 12,68(10)
	addi 31,8,824
	dcbtst 0,5
	stw 11,72(10)
	stw 12,76(10)
	addi 30,8,832
	dcbtst 0,4
	stw 11,80(10)
	stw 12,84(10)
	addi 29,8,840
	dcbtst 0,3
	stw 11,88(10)
	stw 12,92(10)
	addi 28,8,848
	dcbtst 0,31
	stw 11,96(10)
	stw 12,100(10)
	addi 27,8,856
	dcbtst 0,30
	stw 11,104(10)
	stw 12,108(10)
	addi 26,8,864
	dcbtst 0,29
	stw 11,112(10)
	stw 12,116(10)
	addi 10,8,384
	dcbtst 0,28
	stw 11,384(8)
	stw 12,388(8)
	addi 25,8,872
	dcbtst 0,27
	stw 11,8(10)
	stw 12,12(10)
	addi 24,8,880
	dcbtst 0,26
	stw 11,16(10)
	stw 12,20(10)
	dcbtst 0,25
	dcbtst 0,21
	dcbtst 0,24
	dcbtst 0,22
	stw 11,120(10)
	stw 12,124(10)
	stw 11,24(10)
	stw 12,28(10)
	stw 11,32(10)
	stw 12,36(10)
	stw 11,40(10)
	stw 12,44(10)
	stw 11,48(10)
	stw 12,52(10)
	stw 11,56(10)
	stw 12,60(10)
	stw 11,64(10)
	stw 12,68(10)
	stw 11,72(10)
	stw 12,76(10)
	stw 11,80(10)
	stw 12,84(10)
	stw 11,88(10)
	stw 12,92(10)
	stw 11,96(10)
	stw 12,100(10)
	stw 11,104(10)
	stw 12,108(10)
	stw 11,112(10)
	stw 12,116(10)
	addi 10,8,512
	stw 11,512(8)
	stw 12,516(8)
	stw 11,8(10)
	stw 12,12(10)
	stw 11,16(10)
	stw 12,20(10)
	stw 11,24(10)
	stw 12,28(10)
	stw 11,120(10)
	stw 12,124(10)
	stw 11,32(10)
	stw 12,36(10)
	stw 11,40(10)
	stw 12,44(10)
	stw 11,48(10)
	stw 12,52(10)
	stw 11,56(10)
	stw 12,60(10)
	stw 11,64(10)
	stw 12,68(10)
	stw 11,72(10)
	stw 12,76(10)
	stw 11,80(10)
	stw 12,84(10)
	stw 11,88(10)
	stw 12,92(10)
	stw 11,96(10)
	stw 12,100(10)
	stw 11,104(10)
	stw 12,108(10)
	stw 11,112(10)
	stw 12,116(10)
	addi 10,8,640
	stw 11,640(8)
	stw 12,644(8)
	stw 11,120(10)
	stw 12,124(10)
	stw 11,8(10)
	stw 12,12(10)
	stw 11,16(10)
	stw 12,20(10)
	stw 11,24(10)
	stw 12,28(10)
	stw 11,32(10)
	stw 12,36(10)
	stw 11,40(10)
	stw 12,44(10)
	stw 11,48(10)
	stw 12,52(10)
	stw 11,56(10)
	stw 12,60(10)
	stw 11,64(10)
	stw 12,68(10)
	stw 11,72(10)
	stw 12,76(10)
	stw 11,80(10)
	stw 12,84(10)
	stw 11,88(10)
	stw 12,92(10)
	stw 11,96(10)
	stw 12,100(10)
	stw 11,104(10)
	stw 12,108(10)
	stw 11,112(10)
	stw 12,116(10)
	mr 8,22
	bdnz .L2
	li 3,0
	lwz 21,20(1)
	lwz 22,24(1)
	lwz 23,28(1)
	lwz 24,32(1)
	lwz 25,36(1)
	lwz 26,40(1)
	lwz 27,44(1)
	lwz 28,48(1)
	lwz 29,52(1)
	lwz 30,56(1)
	lwz 31,60(1)
	addi 1,1,64
	blr

-- 
           Summary: Over Aggressive Use of Data Cache Touch Instructions
                    During FDO
           Product: gcc
           Version: 4.0.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: steinmtz at us dot ibm dot com
                CC: gcc-bugs at gcc dot gnu dot org,steinmtz at us dot ibm
                    dot com
 GCC build triplet: powerpc64-linux
  GCC host triplet: powerpc64-linux
GCC target triplet: powerpc64-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
@ 2004-10-12 17:51 ` pinskia at gcc dot gnu dot org
  2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-10-12 17:51 UTC (permalink / raw)
  To: gcc-bugs



-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |rakdver at gcc dot gnu dot
                   |                            |org
          Component|target                      |rtl-optimization
           Keywords|                            |missed-optimization


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
  2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
@ 2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
  2004-11-17 14:34 ` nathan at gcc dot gnu dot org
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-10-12 19:45 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From pinskia at gcc dot gnu dot org  2004-10-12 19:45 -------
Confirmed.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|                            |1
   Last reconfirmed|0000-00-00 00:00:00         |2004-10-12 19:45:45
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
  2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
  2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
@ 2004-11-17 14:34 ` nathan at gcc dot gnu dot org
  2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: nathan at gcc dot gnu dot org @ 2004-11-17 14:34 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From nathan at gcc dot gnu dot org  2004-11-17 14:34 -------
I cannot reproduce this with the 20041118 CVS head on powerpc64-unknown-linux-gnu.
Can you confirm the command line options and provide the bug.gcno, bug.gcda files.

-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
                   ` (2 preceding siblings ...)
  2004-11-17 14:34 ` nathan at gcc dot gnu dot org
@ 2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
  2004-11-18  1:32 ` jgrimm2 at us dot ibm dot com
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: rakdver at atrey dot karlin dot mff dot cuni dot cz @ 2004-11-17 14:41 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From rakdver at atrey dot karlin dot mff dot cuni dot cz  2004-11-17 14:41 -------
Subject: Re:  Over Aggressive Use of Data Cache Touch Instructions During FDO

Hello,

> I cannot reproduce this with the 20041118 CVS head on powerpc64-unknown-linux-gnu.

I think this is because speculative prefetching is now disabled by
default.  You should have no problems with reproducing the bug if
you switch it on (-fspeculative-prefetching).

Zdenek


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
                   ` (3 preceding siblings ...)
  2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
@ 2004-11-18  1:32 ` jgrimm2 at us dot ibm dot com
  2004-11-23  1:43 ` jgrimm2 at us dot ibm dot com
  2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
  6 siblings, 0 replies; 8+ messages in thread
From: jgrimm2 at us dot ibm dot com @ 2004-11-18  1:32 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From jgrimm2 at us dot ibm dot com  2004-11-18 01:32 -------
Hmmm... looks like a couple things (maybe more) come in to play here.  First,
value-profile code doesn't seem to merge prefetches (among other things).  That
is, it should be pretty easy (crude?) to track the last prefetch and not
prefetch addresses that look close.  Doing so seems to help this test case quite
a bit (96 prefetches down to 24), but probably not enough..... as it seems that
the loop unroller comes through afterwards and does its damage too.   Might need
to teach the unroller about prefetch a bit too??


-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jgrimm2 at us dot ibm dot
                   |                            |com


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
                   ` (4 preceding siblings ...)
  2004-11-18  1:32 ` jgrimm2 at us dot ibm dot com
@ 2004-11-23  1:43 ` jgrimm2 at us dot ibm dot com
  2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
  6 siblings, 0 replies; 8+ messages in thread
From: jgrimm2 at us dot ibm dot com @ 2004-11-23  1:43 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From jgrimm2 at us dot ibm dot com  2004-11-23 01:43 -------
Finally got SPEC set up w/profiling to do a little comparison.  Its not done yet
(and only running c,c++ tests) but first glance through the initials results, it
appears that -fspeculative-prefetching is damaging to spec results at least on
ppc64 (-m32).  I wanted to do this to guage whether this bug entry was some
pathological testcase or this would also show up in regularly looked at tests.  

I'm currently comparing -O2 -funroll-loops -fno-speculative-prefetching vs -O2
-funroll-loops -fspeculative-prefetching and using profiling for both runs.   

I'll run again to see if I can get those results.   Additionally, I'd like to
see what the behavior is when I limit the prefetches (don't emit prefetch if
just prefetched from near address + crudely limit maximum number of prefetches),
I just uploaded a patch for that, but haven't tested how it affects spec yet.




-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug rtl-optimization/17950] Over Aggressive Use of Data Cache Touch Instructions During FDO
  2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
                   ` (5 preceding siblings ...)
  2004-11-23  1:43 ` jgrimm2 at us dot ibm dot com
@ 2005-07-29 11:46 ` pinskia at gcc dot gnu dot org
  6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2005-07-29 11:46 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From pinskia at gcc dot gnu dot org  2005-07-29 11:42 -------
-fspeculative-prefetching has now been removed from the mainline.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |WONTFIX
   Target Milestone|---                         |4.1.0


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17950


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2005-07-29 11:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-10-12 17:48 [Bug target/17950] New: Over Aggressive Use of Data Cache Touch Instructions During FDO steinmtz at us dot ibm dot com
2004-10-12 17:51 ` [Bug rtl-optimization/17950] " pinskia at gcc dot gnu dot org
2004-10-12 19:45 ` pinskia at gcc dot gnu dot org
2004-11-17 14:34 ` nathan at gcc dot gnu dot org
2004-11-17 14:41 ` rakdver at atrey dot karlin dot mff dot cuni dot cz
2004-11-18  1:32 ` jgrimm2 at us dot ibm dot com
2004-11-23  1:43 ` jgrimm2 at us dot ibm dot com
2005-07-29 11:46 ` pinskia at gcc dot gnu dot org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).