public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression
@ 2004-05-25 13:08 steinmtz at us dot ibm dot com
  2004-05-25 14:09 ` [Bug rtl-optimization/15633] " steinmtz at us dot ibm dot com
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-05-25 13:08 UTC (permalink / raw)
  To: gcc-bugs

Using: gcc version 3.5.0 20040518 (experimental)

Note: A patch for this bug is currently being tested by Pat Haugen.  Let me 
know if you need his contact information.


Here is an example of a performance regression when using FDO.  The
"VolInt" component of Skidmarks runs about 120% slower using FDO.  I traced
it down to a loop that uses GPRs instead of FPRs for floating point data.  This 
forces data to and from memory in order to complete the operations.  The non-
FDO version uses FPRs, thus avoiding the spills to memory.

To reproduce given the files I will attach:

gcc  -O2 -m32 -c volInt.i

Looking at the code for procedure "compProjectionIntegrals", you'll see that the
loop in question uses FPRs:

	fmr 16,15
	fmr 17,15
	fmr 18,15
	fmr 19,15
	fmr 20,15
	fmr 21,15
	fmr 22,15
	fmr 23,15
	fmr 24,15
.L4:
	addi 6,6,1
	lwz 10,0(7)
	divw 9,6,5
	addi 7,7,4
	mullw 9,9,5
	subf 9,9,6
	mulli 10,10,3
	slwi 9,9,2
	add 9,9,3
	add 8,10,12
	lwz 11,40(9)
	slwi 8,8,3
	add 8,8,0
	add 10,10,4
	mulli 11,11,3
	lfd 7,8(8)
	slwi 10,10,3
	add 9,11,4
	add 11,11,12
	slwi 9,9,3
	slwi 11,11,3
	add 9,9,0
	add 11,11,0
	lfd 0,8(9)
	fmul 30,7,7
	lfd 13,8(11)
	add 10,10,0
	fmul 12,0,0
	lfd 10,8(10)
	fadd 1,13,13
	fmul 8,13,13
	fmul 31,12,0
	fmul 1,1,7
	fadd 11,12,12
	fmul 12,12,25
	fmul 5,10,10
	fmadd 11,11,10,31
	fmul 6,30,7
	fmul 9,0,25
	fadd 2,1,8
	fmul 12,12,10
	fmul 4,5,10
	fmadd 9,9,5,11
	fmadd 2,30,25,2
	fmul 11,8,13
	fmul 3,6,26
	fmadd 31,31,26,12
	fadd 27,13,7
	fadd 29,0,10
	fmadd 8,8,25,1
	fadd 12,0,0
	fmadd 29,29,0,5
	fmadd 3,13,2,3
	fmadd 28,13,27,30
	fmadd 9,4,26,9
	fmul 11,11,26
	fmadd 12,12,5,31
	fadd 8,8,30
	fmadd 1,13,28,6
	fmadd 31,0,29,4
	fmadd 11,7,8,11
	fmul 9,7,9
	fmul 3,10,3
	fmul 6,6,7
	fmul 5,4,10
	fmul 2,10,2
	fadd 12,12,4
	fmadd 6,13,1,6
	fmadd 12,13,12,9
	fmadd 5,0,31,5
	fmadd 11,0,11,3
	fmadd 8,0,8,2
	fsub 13,13,7
	fsub 0,0,10
	fmadd 24,12,13,24
	fmadd 23,11,0,23
	fmadd 15,0,27,15
	fmadd 16,0,28,16
	fmadd 18,0,6,18
	fmadd 19,13,29,19
	fmadd 21,13,5,21
	fmadd 22,8,0,22
	fmadd 17,0,1,17
	fmadd 20,13,31,20
	bdnz .L4
	stfd 24,Pabb@l(22)
	stfd 23,Paab@l(23)
	stfd 22,Pab@l(27)
	stfd 21,Pbbb@l(24)
	stfd 20,Pbb@l(26)
	stfd 19,Pb@l(29)
	stfd 18,Paaa@l(28)
	stfd 17,Paa@l(30)
	stfd 16,Pa@l(31)
	stfd 15,P1@l(25)

If the same code is compiled using FDO, the subject loop contains numerous 
spills to memory.

gcc -fprofile-use -O2 -m32 -c volInt.i

Generates: 

	mr 18,14
	mr 19,15
	mr 20,14
	mr 21,15
	mr 22,14
	mr 23,15
	mr 24,14
	mr 25,15
	mr 26,14
	mr 27,15
	mr 28,14
	mr 29,15
	mr 30,14
	mr 31,15
	addi 10,3,44
	li 5,1
	beq 7,.L8
.L6:
	divw 9,5,6
.L7:
	mullw 9,9,6
	fmul 2,27,27
	cmpw 7,6,5
	fmul 1,28,28
	subf 9,9,5
	fmul 7,2,27
	slwi 9,9,2
	add 9,9,3
	fmul 31,1,28
	lwz 11,40(9)
	mulli 11,11,3
	add 9,11,8
	add 11,11,7
	slwi 9,9,3
	slwi 11,11,3
	add 9,9,0
	add 11,11,0
	lfd 13,8(9)
	lis 9,.LC1@ha
	lfd 12,8(11)
	la 9,.LC1@l(9)
	fmul 0,13,13
	lfd 3,0(9)
	lis 9,.LC2@ha
	lis 11,.LC1@ha
	fadd 8,12,12
	la 9,.LC2@l(9)
	fmul 9,12,12
	la 11,.LC1@l(11)
	stw 28,16(1)
	stw 29,20(1)
	fmul 8,8,27
	fmul 6,0,13
	fadd 11,0,0
	fmul 0,0,3
	fadd 5,8,9
	fmadd 11,11,28,6
	fmul 0,0,28
	fmul 10,13,3
	fmadd 5,2,3,5
	lfd 3,0(9)
	lis 9,.LC2@ha
	fmadd 6,6,3,0
	lfd 0,0(11)
	la 9,.LC2@l(9)
	fmul 4,7,3
	fmadd 10,10,1,11
	fmul 11,9,12
	fmadd 9,9,0,8
	lfd 8,0(9)
	fadd 3,13,28
	fmadd 4,12,5,4
	fmul 11,11,8
	fmadd 3,3,13,1
	fadd 9,9,2
	fadd 30,12,27
	fmul 4,28,4
	fmadd 11,27,9,11
	fmadd 29,12,30,2
	fmadd 10,31,8,10
	fmadd 2,13,3,31
	fmul 5,28,5
	fmul 8,31,28
	fadd 0,13,13
	fmadd 11,13,11,4
	fmadd 9,13,9,5
	lfd 5,16(1)
	fmadd 0,0,1,6
	fmadd 8,13,2,8
	fsub 13,13,28
	fmul 10,27,10
	fmadd 5,11,13,5
	lfd 11,8(1)
	fmadd 6,12,29,7
	fadd 0,0,31
	stfd 5,16(1)
	fmul 7,7,27
	lwz 28,16(1)
	lwz 29,20(1)
	stw 30,16(1)
	stw 31,20(1)
	fmadd 0,12,0,10
	lfd 10,16(1)
	fmadd 7,12,6,7
	fsub 12,12,27
	fmadd 11,13,30,11
	fmadd 10,0,12,10
	stfd 11,8(1)
	stfd 10,16(1)
	lwz 30,16(1)
	lwz 31,20(1)
	stw 14,16(1)
	stw 15,20(1)
	lfd 0,16(1)
	fmadd 0,13,29,0
	stfd 0,16(1)
	lwz 14,16(1)
	lwz 15,20(1)
	stw 18,16(1)
	stw 19,20(1)
	lfd 5,16(1)
	fmadd 5,13,7,5
	stfd 5,16(1)
	lwz 18,16(1)
	lwz 19,20(1)
	stw 20,16(1)
	stw 21,20(1)
	lfd 10,16(1)
	fmadd 10,12,3,10
	stfd 10,16(1)
	lwz 20,16(1)
	lwz 21,20(1)
	stw 24,16(1)
	stw 25,20(1)
	lfd 11,16(1)
	fmadd 11,12,8,11
	stfd 11,16(1)
	lwz 24,16(1)
	lwz 25,20(1)
	stw 26,16(1)
	stw 27,20(1)
	lfd 0,16(1)
	fmadd 0,9,13,0
	stfd 0,16(1)
	lwz 26,16(1)
	lwz 27,20(1)
	stw 16,16(1)
	stw 17,20(1)
	lfd 3,16(1)
	fmadd 3,13,6,3
	stfd 3,16(1)
	lwz 16,16(1)
	lwz 17,20(1)
	stw 22,16(1)
	stw 23,20(1)
	lfd 5,16(1)
	fmadd 5,12,2,5
	stfd 5,16(1)
	lwz 22,16(1)
	lwz 23,20(1)
	ble- 7,.L10
	lwz 9,0(10)
	cmpwi 7,6,4
	addi 10,10,4
	addi 5,5,1
	mulli 9,9,3
	add 11,9,8
	add 9,9,7
	slwi 9,9,3
	slwi 11,11,3
	add 9,9,0
	add 11,11,0
	lfd 27,8(9)
	lfd 28,8(11)
	bne 7,.L6
.L8:
	srawi 9,5,2
	addze 9,9
	b .L7
.L10:
	lis 9,Pabb@ha
	lis 11,Pab@ha
	stw 30,Pabb@l(9)
	stw 31,Pabb+4@l(9)
	lis 9,Pbbb@ha
	stw 26,Pab@l(11)
	stw 27,Pab+4@l(11)
	mfctr 11
	lis 10,Paab@ha
	stw 24,Pbbb@l(9)
	stw 25,Pbbb+4@l(9)
	mflr 9
	stw 28,Paab@l(10)
	stw 29,Paab+4@l(10)
	lis 10,Pbb@ha
	stw 20,Pb@l(11)
	stw 21,Pb+4@l(11)
	stw 22,Pbb@l(10)
	stw 23,Pbb+4@l(10)
	lwz 10,8(1)
	lwz 11,12(1)
	stw 18,Paaa@l(9)
	stw 19,Paaa+4@l(9)
	lis 9,P1@ha
	stw 16,Paa@l(12)
	stw 17,Paa+4@l(12)
	stw 14,Pa@l(4)
	stw 15,Pa+4@l(4)
	stw 10,P1@l(9)
	stw 11,P1+4@l(9)

Adding the switch -fno-vpt to the command line causes the problem to go away.

-- 
           Summary: Failure to propagate FDO counters by vpt results in
                    performance regression
           Product: gcc
           Version: 3.5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: rtl-optimization
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: steinmtz at us dot ibm dot com
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: PowerPC-Linux
  GCC host triplet: PowerPC-Linux
GCC target triplet: PowerPC-Linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
@ 2004-05-25 14:09 ` steinmtz at us dot ibm dot com
  2004-05-25 14:16 ` steinmtz at us dot ibm dot com
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-05-25 14:09 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From steinmtz at us dot ibm dot com  2004-05-24 15:39 -------
Created an attachment (id=6371)
 --> (http://gcc.gnu.org/bugzilla/attachment.cgi?id=6371&action=view)
Preprocessed source code, compressed with gzip


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
  2004-05-25 14:09 ` [Bug rtl-optimization/15633] " steinmtz at us dot ibm dot com
@ 2004-05-25 14:16 ` steinmtz at us dot ibm dot com
  2004-05-25 14:21 ` steinmtz at us dot ibm dot com
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-05-25 14:16 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From steinmtz at us dot ibm dot com  2004-05-24 15:40 -------
Created an attachment (id=6372)
 --> (http://gcc.gnu.org/bugzilla/attachment.cgi?id=6372&action=view)
FDO data, compressed with gzip


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
  2004-05-25 14:09 ` [Bug rtl-optimization/15633] " steinmtz at us dot ibm dot com
  2004-05-25 14:16 ` steinmtz at us dot ibm dot com
@ 2004-05-25 14:21 ` steinmtz at us dot ibm dot com
  2004-05-25 16:53 ` pinskia at gcc dot gnu dot org
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: steinmtz at us dot ibm dot com @ 2004-05-25 14:21 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From steinmtz at us dot ibm dot com  2004-05-24 15:40 -------
Created an attachment (id=6373)
 --> (http://gcc.gnu.org/bugzilla/attachment.cgi?id=6373&action=view)
FDO data, compressed with gzip


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
                   ` (2 preceding siblings ...)
  2004-05-25 14:21 ` steinmtz at us dot ibm dot com
@ 2004-05-25 16:53 ` pinskia at gcc dot gnu dot org
  2004-06-23 16:09 ` cvs-commit at gcc dot gnu dot org
  2004-06-23 17:31 ` pinskia at gcc dot gnu dot org
  5 siblings, 0 replies; 7+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-05-25 16:53 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From pinskia at gcc dot gnu dot org  2004-05-24 21:19 -------
Confirmed, patch here: <http://gcc.gnu.org/ml/gcc-patches/2004-05/msg01558.html>.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|                            |1
           Keywords|                            |missed-optimization, patch
   Last reconfirmed|0000-00-00 00:00:00         |2004-05-24 21:19:33
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
                   ` (3 preceding siblings ...)
  2004-05-25 16:53 ` pinskia at gcc dot gnu dot org
@ 2004-06-23 16:09 ` cvs-commit at gcc dot gnu dot org
  2004-06-23 17:31 ` pinskia at gcc dot gnu dot org
  5 siblings, 0 replies; 7+ messages in thread
From: cvs-commit at gcc dot gnu dot org @ 2004-06-23 16:09 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From cvs-commit at gcc dot gnu dot org  2004-06-23 16:08 -------
Subject: Bug 15633

CVSROOT:	/cvs/gcc
Module name:	gcc
Changes by:	dje@gcc.gnu.org	2004-06-23 16:07:59

Modified files:
	gcc            : ChangeLog value-prof.c 

Log message:
	2004-06-23  Pat Haugen  <pthaugen@us.ibm.com>
	
	PR optimization/15633
	* value-prof.c (divmod_fixed_value_transform): Compute probability
	of taking optimal path and pass along to gen_ routine.
	(mod_pow2_value_transform): Same.
	(mod_subtract_transform): Same.
	(gen_divmod_fixed_value): Add new probability parameter.
	Add probability to newly created jump.
	(gen_mod_pow2): Same.
	(gen_mod_subtract): Same.

Patches:
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/ChangeLog.diff?cvsroot=gcc&r1=2.4095&r2=2.4096
http://gcc.gnu.org/cgi-bin/cvsweb.cgi/gcc/gcc/value-prof.c.diff?cvsroot=gcc&r1=1.11&r2=1.12



-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [Bug rtl-optimization/15633] Failure to propagate FDO counters by vpt results in performance regression
  2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
                   ` (4 preceding siblings ...)
  2004-06-23 16:09 ` cvs-commit at gcc dot gnu dot org
@ 2004-06-23 17:31 ` pinskia at gcc dot gnu dot org
  5 siblings, 0 replies; 7+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2004-06-23 17:31 UTC (permalink / raw)
  To: gcc-bugs


------- Additional Comments From pinskia at gcc dot gnu dot org  2004-06-23 17:19 -------
Fixed.

-- 
           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|                            |FIXED
   Target Milestone|---                         |3.5.0


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15633


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2004-06-23 17:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-05-25 13:08 [Bug rtl-optimization/15633] New: Failure to propagate FDO counters by vpt results in performance regression steinmtz at us dot ibm dot com
2004-05-25 14:09 ` [Bug rtl-optimization/15633] " steinmtz at us dot ibm dot com
2004-05-25 14:16 ` steinmtz at us dot ibm dot com
2004-05-25 14:21 ` steinmtz at us dot ibm dot com
2004-05-25 16:53 ` pinskia at gcc dot gnu dot org
2004-06-23 16:09 ` cvs-commit at gcc dot gnu dot org
2004-06-23 17:31 ` pinskia at gcc dot gnu dot org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).