public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug regression/39914]  New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13
@ 2009-04-26 18:24 lucier at math dot purdue dot edu
  2009-04-26 18:43 ` [Bug regression/39914] " ubizjak at gmail dot com
                   ` (15 more replies)
  0 siblings, 16 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-26 18:24 UTC (permalink / raw)
  To: gcc-bugs

With this compiler:

gcc version 4.4.0 20090312 (experimental) [trunk revision 144801] (GCC) 

running the test in

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33928

(same .i file, same instructions for reproducing, same compiler options, same
everything)

gives a time of

    132 ms cpu time (132 user, 0 system)

with assembly code in the main loop of

.L2958:
        movq    %rdx, %rcx
        addq    (%r11), %rcx
        leaq    4(%rdx), %r14
        movq    %rcx, (%rdi)
        addq    $4, %rcx
        movq    %rcx, (%r10)
        movq    (%r11), %rcx
        addq    (%rdi), %rcx
        movq    %rcx, (%rsi)
        addq    $4, %rcx
        movq    %rcx, (%r9)
        movq    (%r11), %r12
        addq    (%rsi), %r12
        movq    %r12, (%rbp)
        addq    $4, %r12
        movq    %r12, (%r15)
        movq    (%rax), %rcx
        addq    $7, %rcx
        movsd   (%rcx,%r12,2), %xmm7
        movq    (%rbp), %r12
        leaq    (%rcx,%rdx,2), %r13
        addq    $8, %rdx
        movsd   (%r13), %xmm4
        movsd   (%rcx,%r12,2), %xmm10
        movq    (%r9), %r12
        movsd   (%rcx,%r12,2), %xmm5
        movq    (%rsi), %r12
        movsd   (%rcx,%r12,2), %xmm6
        movq    (%r10), %r12
        movsd   (%rcx,%r12,2), %xmm13
        movq    (%rdi), %r12
        movsd   (%rcx,%r12,2), %xmm11
        leaq    (%r14,%r14), %r12
        movsd   (%rcx,%r12), %xmm9
        movq    24(%r8), %rcx
        movapd  %xmm11, %xmm14
        movsd   15(%rcx), %xmm1
        movsd   7(%rcx), %xmm2
        movapd  %xmm1, %xmm8
        movsd   31(%rcx), %xmm3
        movapd  %xmm2, %xmm12
        mulsd   %xmm10, %xmm8
        mulsd   %xmm7, %xmm12
        mulsd   %xmm2, %xmm10
        mulsd   %xmm1, %xmm7
        movsd   23(%rcx), %xmm0
        addsd   %xmm8, %xmm12
        movapd  %xmm2, %xmm8
        mulsd   %xmm6, %xmm2
        subsd   %xmm7, %xmm10
        movapd  %xmm1, %xmm7
        mulsd   %xmm5, %xmm1
        mulsd   %xmm6, %xmm7
        movapd  %xmm4, %xmm6
        mulsd   %xmm5, %xmm8
        movapd  %xmm9, %xmm5
        subsd   %xmm10, %xmm14
        subsd   %xmm1, %xmm2
        movapd  %xmm3, %xmm1
        addsd   %xmm11, %xmm10
        xorpd   .LC5(%rip), %xmm1
        addsd   %xmm7, %xmm8
        movapd  %xmm13, %xmm7
        subsd   %xmm2, %xmm6
        subsd   %xmm12, %xmm7
        subsd   %xmm8, %xmm5
        addsd   %xmm4, %xmm2
        movapd  %xmm0, %xmm4
        addsd   %xmm9, %xmm8
        movapd  %xmm1, %xmm9
        mulsd   %xmm14, %xmm4
        addsd   %xmm13, %xmm12
        mulsd   %xmm7, %xmm9
        mulsd   %xmm1, %xmm14
        movapd  %xmm3, %xmm1
        mulsd   %xmm0, %xmm7
        mulsd   %xmm10, %xmm1
        mulsd   %xmm0, %xmm10
        addsd   %xmm9, %xmm4
        subsd   %xmm7, %xmm14
        movapd  %xmm0, %xmm7
        movapd  %xmm2, %xmm0
        mulsd   %xmm12, %xmm7
        mulsd   %xmm3, %xmm12
        addsd   %xmm1, %xmm7
        subsd   %xmm12, %xmm10
        addsd   %xmm10, %xmm0
        subsd   %xmm10, %xmm2
        movsd   %xmm0, (%r13)
        movapd  %xmm8, %xmm0
        movq    (%rax), %rcx
        subsd   %xmm7, %xmm8
        addsd   %xmm7, %xmm0
        movsd   %xmm0, 7(%r12,%rcx)
        movq    (%rdi), %r12
        movq    (%rax), %rcx
        movapd  %xmm6, %xmm0
        subsd   %xmm14, %xmm6
        movsd   %xmm2, 7(%rcx,%r12,2)
        movq    (%r10), %r12
        movq    (%rax), %rcx
        addsd   %xmm14, %xmm0
        movsd   %xmm8, 7(%rcx,%r12,2)
        movq    (%rsi), %r12
        movq    (%rax), %rcx
        movsd   %xmm0, 7(%rcx,%r12,2)
        movapd  %xmm5, %xmm0
        movq    (%r9), %r12
        movq    (%rax), %rcx
        subsd   %xmm4, %xmm5
        addsd   %xmm4, %xmm0
        movsd   %xmm0, 7(%rcx,%r12,2)
        movq    (%rbp), %r12
        movq    (%rax), %rcx
        movsd   %xmm6, 7(%rcx,%r12,2)
        movq    (%r15), %r12
        movq    (%rax), %rcx
        movsd   %xmm5, 7(%rcx,%r12,2)
        cmpq    %rdx, -104(%rsp)
        jg      .L2958
        movq    %r14, -104(%rsp)

With this compiler

/pkgs/gcc-mainline/bin/gcc -v
Using built-in specs.
Target: x86_64-unknown-linux-gnu
Configured with: /tmp/lucier/gcc/mainline/configure --enable-checking=release
--prefix=/pkgs/gcc-mainline --enable-languages=c
--enable-gather-detailed-mem-stats
Thread model: posix
gcc version 4.4.0 20090313 (experimental) [trunk revision 144829] (GCC) 

one gets a time of

    212 ms cpu time (212 user, 0 system)

and the assembly language for the main loop is

.L2946:
        movq    %rbx, %rdx
        addq    (%r11), %rdx
        leaq    4(%rbx), %rbp
        movq    %rdx, (%rsi)
        addq    $4, %rdx
        movq    %rdx, (%r10)
        movq    (%r11), %rdx
        addq    (%rsi), %rdx
        movq    %rdx, (%rcx)
        addq    $4, %rdx
        movq    %rdx, (%r9)
        movq    (%r11), %r13
        addq    (%rcx), %r13
        movq    %r13, (%r8)
        addq    $4, %r13
        movq    %r13, (%r15)
        movq    (%rax), %rdx
        addq    $7, %rdx
        movsd   (%rdx,%r13,2), %xmm0
        leaq    (%rdx,%rbx,2), %r14
        addq    $8, %rbx
        movsd   %xmm0, -48(%rsp)
        movq    (%r8), %r13
        movsd   (%rdx,%r13,2), %xmm0
        movsd   %xmm0, -56(%rsp)
        movq    (%r9), %r13
        movsd   (%rdx,%r13,2), %xmm0
        movsd   %xmm0, -64(%rsp)
        movq    (%rcx), %r13
        movsd   (%rdx,%r13,2), %xmm0
        movsd   %xmm0, -72(%rsp)
        movq    (%r10), %r13
        movsd   (%rdx,%r13,2), %xmm0
        movsd   %xmm0, -80(%rsp)
        movq    (%rsi), %r13
        movsd   (%rdx,%r13,2), %xmm0
        leaq    (%rbp,%rbp), %r13
        movsd   %xmm0, -104(%rsp)
        movsd   (%rdx,%r13), %xmm0
        movsd   %xmm0, -88(%rsp)
        movq    24(%rdi), %rdx
        movsd   31(%rdx), %xmm0
        movsd   %xmm0, -32(%rsp)
        movsd   23(%rdx), %xmm0
        movsd   %xmm0, -40(%rsp)
        movsd   15(%rdx), %xmm0
        movsd   %xmm0, -112(%rsp)
        movsd   7(%rdx), %xmm0
        movsd   %xmm0, -120(%rsp)
        movapd  %xmm0, %xmm1
        movsd   -112(%rsp), %xmm0
        mulsd   -48(%rsp), %xmm1
        mulsd   -56(%rsp), %xmm0
        addsd   %xmm0, %xmm1
        movsd   -112(%rsp), %xmm0
        mulsd   -48(%rsp), %xmm0
        movsd   %xmm1, -8(%rsp)
        movsd   -120(%rsp), %xmm1
        mulsd   -56(%rsp), %xmm1
        subsd   %xmm0, %xmm1
        movsd   -112(%rsp), %xmm0
        mulsd   -72(%rsp), %xmm0
        movsd   %xmm1, -16(%rsp)
        movsd   -120(%rsp), %xmm1
        mulsd   -64(%rsp), %xmm1
        addsd   %xmm0, %xmm1
        movsd   -112(%rsp), %xmm0
        mulsd   -64(%rsp), %xmm0
        movsd   %xmm1, -24(%rsp)
        movsd   -120(%rsp), %xmm1
        mulsd   -72(%rsp), %xmm1
        subsd   %xmm0, %xmm1
        movsd   -80(%rsp), %xmm0
        subsd   -8(%rsp), %xmm0
        movsd   %xmm1, -120(%rsp)
        movsd   %xmm0, -48(%rsp)
        movsd   -104(%rsp), %xmm0
        subsd   -16(%rsp), %xmm0
        movsd   %xmm0, -112(%rsp)
        movsd   -88(%rsp), %xmm0
        subsd   -24(%rsp), %xmm0
        movsd   %xmm0, -56(%rsp)
        movsd   (%r14), %xmm0
        subsd   %xmm1, %xmm0
        movsd   %xmm0, -64(%rsp)
        movsd   -80(%rsp), %xmm0
        addsd   -8(%rsp), %xmm0
        movsd   %xmm0, -80(%rsp)
        movsd   -104(%rsp), %xmm0
        addsd   -16(%rsp), %xmm0
        movsd   %xmm0, -104(%rsp)
        movsd   -88(%rsp), %xmm0
        addsd   -24(%rsp), %xmm0
        movsd   %xmm0, -88(%rsp)
        movsd   (%r14), %xmm0
        addsd   %xmm1, %xmm0
        movsd   %xmm0, -96(%rsp)
        movsd   -32(%rsp), %xmm0
        xorpd   .LC5(%rip), %xmm0
        movsd   %xmm0, -120(%rsp)
        movapd  %xmm0, %xmm1
        movsd   -40(%rsp), %xmm0
        mulsd   -48(%rsp), %xmm1
        mulsd   -112(%rsp), %xmm0
        addsd   %xmm0, %xmm1
        movsd   -40(%rsp), %xmm0
        mulsd   -48(%rsp), %xmm0
        movsd   %xmm1, -72(%rsp)
        movsd   -120(%rsp), %xmm1
        mulsd   -112(%rsp), %xmm1
        subsd   %xmm0, %xmm1
        movsd   -32(%rsp), %xmm0
        mulsd   -104(%rsp), %xmm0
        movsd   %xmm1, -112(%rsp)
        movsd   -40(%rsp), %xmm1
        mulsd   -80(%rsp), %xmm1
        addsd   %xmm0, %xmm1
        movsd   -32(%rsp), %xmm0
        mulsd   -80(%rsp), %xmm0
        movsd   %xmm1, -120(%rsp)
        movsd   -40(%rsp), %xmm1
        mulsd   -104(%rsp), %xmm1
        subsd   %xmm0, %xmm1
        movsd   %xmm1, -104(%rsp)
        movsd   -96(%rsp), %xmm0
        addsd   %xmm1, %xmm0
        movsd   %xmm0, (%r14)
        movq    (%rax), %rdx
        movsd   -88(%rsp), %xmm0
        addsd   -120(%rsp), %xmm0
        movsd   %xmm0, 7(%r13,%rdx)
        movq    (%rsi), %r13
        movq    (%rax), %rdx
        movsd   -96(%rsp), %xmm0
        subsd   -104(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        movq    (%r10), %r13
        movq    (%rax), %rdx
        movsd   -88(%rsp), %xmm0
        subsd   -120(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        movq    (%rcx), %r13
        movq    (%rax), %rdx
        movsd   -64(%rsp), %xmm0
        addsd   -112(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        movq    (%r9), %r13
        movq    (%rax), %rdx
        movsd   -56(%rsp), %xmm0
        addsd   -72(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        movq    (%r8), %r13
        movq    (%rax), %rdx
        movsd   -64(%rsp), %xmm0
        subsd   -112(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        movq    (%r15), %r13
        movq    (%rax), %rdx
        movsd   -56(%rsp), %xmm0
        subsd   -72(%rsp), %xmm0
        movsd   %xmm0, 7(%rdx,%r13,2)
        cmpq    %rbx, (%rsp)
        jg      .L2946
        movq    %rbp, (%rsp)

I'm reporting this separately because it doesn't have the same cause as the
previous PR 33928

BTW, with 4.2.4 this test runs in 108 ms on this machine, hence the total
regression amount noted in the subject line.  This part itself causes about 60%
performance regression, the rest is accounte for by

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33928

Brad


-- 
           Summary: 96% performance regression in floating point code; part
                    of the problem started 2009/03/12-13
           Product: gcc
           Version: 4.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: regression
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: lucier at math dot purdue dot edu
 GCC build triplet: x86_64-unknown-linux-gnu
  GCC host triplet: x86_64-unknown-linux-gnu
GCC target triplet: x86_64-unknown-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
@ 2009-04-26 18:43 ` ubizjak at gmail dot com
  2009-04-27  8:16 ` ubizjak at gmail dot com
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: ubizjak at gmail dot com @ 2009-04-26 18:43 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #1 from ubizjak at gmail dot com  2009-04-26 18:43 -------
There are a couple of possible candidates in this range:

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=144812
Log:
2009-03-12  Vladimir Makarov  <vmakarov@redhat.com>

        PR debug/39432
        * ira-int.h (struct allocno): Fix comment for calls_crossed_num.
        * ira-conflicts.c (ira_build_conflicts): Prohibit call used
        registers for allocnos created from user-defined variables.

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=144817
Log:
2009-03-12  H.J. Lu  <hongjiu.lu@intel.com>

        PR target/38824
        * config/i386/i386.md: Compare REGNO on the new peephole2
        patterns.

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=144823
Log:
gcc/

2009-03-12  H.J. Lu  <hongjiu.lu@intel.com>

        PR target/39445
        * config/i386/i386.c (ix86_expand_push): Don't set memory
        alignment.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
  2009-04-26 18:43 ` [Bug regression/39914] " ubizjak at gmail dot com
@ 2009-04-27  8:16 ` ubizjak at gmail dot com
  2009-04-27 15:07 ` lucier at math dot purdue dot edu
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: ubizjak at gmail dot com @ 2009-04-27  8:16 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #2 from ubizjak at gmail dot com  2009-04-27 08:16 -------
(In reply to comment #0)

> (same .i file, same instructions for reproducing, same compiler options, same
> everything)

I guess that this is direct.i compiled with -O1?

Trunk, revision: 146825 -O1 on x86_64 linux gives:

.L27:
        leaq    4(%rbx), %rbp
        movq    %rbx, %rdx
        addq    (%r11), %rdx
        movq    %rdx, (%rsi)
        addq    $4, %rdx
        movq    %rdx, (%r10)
        movq    (%r11), %rdx
        addq    (%rsi), %rdx
        movq    %rdx, (%rcx)
        addq    $4, %rdx
        movq    %rdx, (%r9)
        movq    (%r11), %r12
        addq    (%rcx), %r12
        movq    %r12, (%r8)
        addq    $4, %r12
        movq    %r12, (%r15)
        movq    (%rax), %rdx
        addq    $7, %rdx
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -96(%rsp)
        movq    (%r8), %r12
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -64(%rsp)
        movq    (%r9), %r12
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -56(%rsp)
        movq    (%rcx), %r12
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -48(%rsp)
        movq    (%r10), %r12
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -104(%rsp)
        movq    (%rsi), %r12
        movsd   (%rdx,%r12,2), %xmm2
        movsd   %xmm2, -88(%rsp)
        leaq    (%rbp,%rbp), %r12
        movsd   (%r12,%rdx), %xmm2
        movsd   %xmm2, -80(%rsp)
        leaq    (%rdx,%rbx,2), %r14
        movq    24(%rdi), %rdx
        movsd   31(%rdx), %xmm2
        movsd   %xmm2, -32(%rsp)
        movsd   23(%rdx), %xmm2
        movsd   %xmm2, -40(%rsp)
        movsd   15(%rdx), %xmm2
        movsd   %xmm2, -120(%rsp)
        movsd   7(%rdx), %xmm2
        movsd   %xmm2, -112(%rsp)
        movapd  %xmm2, %xmm3
        mulsd   -96(%rsp), %xmm3
        movsd   -120(%rsp), %xmm2
        mulsd   -64(%rsp), %xmm2
        addsd   %xmm2, %xmm3
        movsd   %xmm3, -24(%rsp)
        movsd   -112(%rsp), %xmm3
        mulsd   -64(%rsp), %xmm3
        movsd   -120(%rsp), %xmm2
        mulsd   -96(%rsp), %xmm2
        subsd   %xmm2, %xmm3
        movsd   %xmm3, -96(%rsp)
        movsd   -112(%rsp), %xmm3
        mulsd   -56(%rsp), %xmm3
        movsd   -120(%rsp), %xmm2
        mulsd   -48(%rsp), %xmm2
        addsd   %xmm2, %xmm3
        movsd   %xmm3, -64(%rsp)
        movsd   -112(%rsp), %xmm3
        mulsd   -48(%rsp), %xmm3
        movsd   -120(%rsp), %xmm2
        mulsd   -56(%rsp), %xmm2
        subsd   %xmm2, %xmm3
        movsd   %xmm3, -120(%rsp)
        movsd   -104(%rsp), %xmm2
        subsd   -24(%rsp), %xmm2
        movsd   %xmm2, -112(%rsp)
        movsd   -88(%rsp), %xmm2
        subsd   -96(%rsp), %xmm2
        movsd   %xmm2, -56(%rsp)
        movsd   -80(%rsp), %xmm2
        subsd   -64(%rsp), %xmm2
        movsd   %xmm2, -48(%rsp)
        movsd   (%r14), %xmm2
        subsd   %xmm3, %xmm2
        movsd   %xmm2, -16(%rsp)
        movsd   -104(%rsp), %xmm2
        addsd   -24(%rsp), %xmm2
        movsd   %xmm2, -104(%rsp)
        movsd   -88(%rsp), %xmm2
        addsd   -96(%rsp), %xmm2
        movsd   %xmm2, -88(%rsp)
        movsd   -80(%rsp), %xmm2
        addsd   -64(%rsp), %xmm2
        movsd   %xmm2, -80(%rsp)
        movsd   (%r14), %xmm2
        addsd   %xmm3, %xmm2
        movsd   %xmm2, -72(%rsp)
        movsd   -32(%rsp), %xmm2
        xorpd   %xmm0, %xmm2
        movsd   %xmm2, -120(%rsp)
        movapd  %xmm2, %xmm3
        mulsd   -112(%rsp), %xmm3
        movsd   -40(%rsp), %xmm2
        mulsd   -56(%rsp), %xmm2
        addsd   %xmm2, %xmm3
        movsd   %xmm3, -96(%rsp)
        movsd   -120(%rsp), %xmm3
        mulsd   -56(%rsp), %xmm3
        movsd   -40(%rsp), %xmm2
        mulsd   -112(%rsp), %xmm2
        subsd   %xmm2, %xmm3
        movsd   %xmm3, -120(%rsp)
        movsd   -40(%rsp), %xmm3
        mulsd   -104(%rsp), %xmm3
        movsd   -32(%rsp), %xmm2
        mulsd   -88(%rsp), %xmm2
        addsd   %xmm2, %xmm3
        movsd   %xmm3, -112(%rsp)
        movsd   -40(%rsp), %xmm3
        mulsd   -88(%rsp), %xmm3
        movsd   -32(%rsp), %xmm2
        mulsd   -104(%rsp), %xmm2
        subsd   %xmm2, %xmm3
        movsd   %xmm3, -104(%rsp)
        movsd   -72(%rsp), %xmm2
        addsd   %xmm3, %xmm2
        movsd   %xmm2, (%r14)
        movq    (%rax), %rdx
        movsd   -80(%rsp), %xmm2
        addsd   -112(%rsp), %xmm2
        movsd   %xmm2, 7(%r12,%rdx)
        movq    (%rsi), %r12
        movq    (%rax), %rdx
        movsd   -72(%rsp), %xmm2
        subsd   -104(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        movq    (%r10), %r12
        movq    (%rax), %rdx
        movsd   -80(%rsp), %xmm2
        subsd   -112(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        movq    (%rcx), %r12
        movq    (%rax), %rdx
        movsd   -16(%rsp), %xmm2
        addsd   -120(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        movq    (%r9), %r12
        movq    (%rax), %rdx
        movsd   -48(%rsp), %xmm2
        addsd   -96(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        movq    (%r8), %r12
        movq    (%rax), %rdx
        movsd   -16(%rsp), %xmm2
        subsd   -120(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        movq    (%r15), %r12
        movq    (%rax), %rdx
        movsd   -48(%rsp), %xmm2
        subsd   -96(%rsp), %xmm2
        movsd   %xmm2, 7(%rdx,%r12,2)
        addq    $8, %rbx
        cmpq    %rbx, -8(%rsp)
        jg      .L27

The code above looks similar to your gcc version 4.4.0 20090313 code.

Using -O2, I get:

.L27:
        movq    -96(%rsp), %r14
        leaq    (%rax,%rcx,2), %rdi
        leaq    -8(%rax,%rcx,2), %rbp
        leaq    (%rax,%rsi,2), %r8
        leaq    -8(%rax,%rsi,2), %r9
        leaq    8(%rax,%rdx,2), %r12
        movsd   (%rdi), %xmm2
        leaq    8(%rax,%rbx,2), %r10
        movsd   (%r14), %xmm4
        movq    -88(%rsp), %r14
        movsd   (%rbp), %xmm6
        leaq    (%rax,%rbx,2), %r11
        movsd   (%r8), %xmm9
        leaq    (%rax,%rdx,2), %r13
        movsd   (%r14), %xmm1
        movq    -120(%rsp), %r14
        movsd   (%r9), %xmm10
        movq    %rcx, -80(%rsp)
        movapd  %xmm1, %xmm14
        addq    $8, %rdx
        movsd   (%r14), %xmm5
        addq    $8, %rcx
        mulsd   %xmm6, %xmm14
        addq    $8, %rsi
        addq    $8, %rbx
        movapd  %xmm5, %xmm7
        mulsd   %xmm5, %xmm6
        movsd   (%r12), %xmm11
        cmpq    %rdx, -112(%rsp)
        mulsd   %xmm2, %xmm7
        mulsd   %xmm1, %xmm2
        movsd   (%r15), %xmm8
        movsd   (%r11), %xmm3
        addsd   %xmm14, %xmm7
        movapd  %xmm1, %xmm14
        subsd   %xmm2, %xmm6
        movapd  %xmm5, %xmm2
        mulsd   %xmm10, %xmm14
        mulsd   %xmm9, %xmm2
        mulsd   %xmm9, %xmm1
        movapd  %xmm11, %xmm9
        mulsd   %xmm10, %xmm5
        movsd   (%r10), %xmm15
        addsd   %xmm14, %xmm2
        movsd   (%r13), %xmm0
        movapd  %xmm15, %xmm14
        subsd   %xmm1, %xmm5
        movapd  %xmm3, %xmm1
        subsd   %xmm7, %xmm14
        movapd  %xmm0, %xmm10
        subsd   %xmm2, %xmm9
        addsd   %xmm2, %xmm11
        movapd  %xmm8, %xmm2
        subsd   %xmm6, %xmm1
        xorpd   %xmm12, %xmm2
        subsd   %xmm5, %xmm10
        addsd   %xmm3, %xmm6
        movapd  %xmm4, %xmm3
        addsd   %xmm0, %xmm5
        movapd  %xmm2, %xmm0
        mulsd   %xmm1, %xmm3
        addsd   %xmm15, %xmm7
        mulsd   %xmm2, %xmm1
        mulsd   %xmm14, %xmm0
        movapd  %xmm4, %xmm2
        mulsd   %xmm4, %xmm14
        mulsd   %xmm7, %xmm2
        addsd   %xmm3, %xmm0
        movapd  %xmm8, %xmm3
        mulsd   %xmm8, %xmm7
        subsd   %xmm14, %xmm1
        mulsd   %xmm6, %xmm3
        addsd   %xmm3, %xmm2
        movapd  %xmm4, %xmm3
        movapd  %xmm5, %xmm4
        mulsd   %xmm6, %xmm3
        subsd   %xmm7, %xmm3
        addsd   %xmm3, %xmm4
        subsd   %xmm3, %xmm5
        movsd   %xmm4, (%r13)
        movapd  %xmm11, %xmm4
        subsd   %xmm2, %xmm11
        addsd   %xmm2, %xmm4
        movapd  %xmm10, %xmm2
        subsd   %xmm1, %xmm10
        addsd   %xmm1, %xmm2
        movsd   %xmm4, (%r12)
        movsd   %xmm5, (%r11)
        movsd   %xmm11, (%r10)
        movsd   %xmm2, (%r9)
        movapd  %xmm9, %xmm2
        subsd   %xmm0, %xmm9
        addsd   %xmm0, %xmm2
        movsd   %xmm2, (%r8)
        movsd   %xmm10, (%rbp)
        movsd   %xmm9, (%rdi)
        jg      .L27

It is not clear from your report, if -O1 flag is problematic, -O2 code looks
good to me.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
  2009-04-26 18:43 ` [Bug regression/39914] " ubizjak at gmail dot com
  2009-04-27  8:16 ` ubizjak at gmail dot com
@ 2009-04-27 15:07 ` lucier at math dot purdue dot edu
  2009-04-27 15:11 ` lucier at math dot purdue dot edu
                   ` (12 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 15:07 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #3 from lucier at math dot purdue dot edu  2009-04-27 15:07 -------
Subject: Re:  96% performance regression in floating
 point code; part of the problem started 2009/03/12-13

On Sun, 2009-04-26 at 18:43 +0000, ubizjak at gmail dot com wrote:
> 
> 
> ------- Comment #1 from ubizjak at gmail dot com  2009-04-26 18:43 -------
> There are a couple of possible candidates in this range:
> 
> URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=144812
> Log:
> 2009-03-12  Vladimir Makarov  <vmakarov@redhat.com>
> 
>         PR debug/39432
>         * ira-int.h (struct allocno): Fix comment for calls_crossed_num.
>         * ira-conflicts.c (ira_build_conflicts): Prohibit call used
>         registers for allocnos created from user-defined variables.

The problem exists in 

gcc version 4.4.0 20090312 (experimental) [trunk revision 144812] (GCC) 

So perhaps it's this checkin.

Brad


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (2 preceding siblings ...)
  2009-04-27 15:07 ` lucier at math dot purdue dot edu
@ 2009-04-27 15:11 ` lucier at math dot purdue dot edu
  2009-04-27 15:26 ` pinskia at gcc dot gnu dot org
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 15:11 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #4 from lucier at math dot purdue dot edu  2009-04-27 15:11 -------
Subject: Re:  96% performance regression in floating
 point code; part of the problem started 2009/03/12-13

On Mon, 2009-04-27 at 08:16 +0000, ubizjak at gmail dot com wrote:
> 
> 
> ------- Comment #2 from ubizjak at gmail dot com  2009-04-27 08:16 -------
> (In reply to comment #0)
> 
> > (same .i file, same instructions for reproducing, same compiler options, same
> > everything)
> 
> I guess that this is direct.i compiled with -O1?
> 

Yes, the compile flags are

-Wall -W -Wno-unused -O1 -fno-math-errno -fschedule-insns2 -fno-trapping-math
-fno-strict-aliasing -fwrapv -fomit-frame-pointer -fPIC -fno-common -mieee-fp

> It is not clear from your report, if -O1 flag is problematic, -O2 code looks
> good to me.

Yes, the -O2 code looks good to me, too.

I've used the above list of options (starting with -O1) on this code
instead of -O2 because the above list (a) has generally given faster
performance, and (b) has required much less compile time and memory to
compile the C code generated by the Gambit Scheme->C compiler.  I have
not yet seen any evidence that -O2 generates better code (overall) than
those set of options above.

Brad


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (3 preceding siblings ...)
  2009-04-27 15:11 ` lucier at math dot purdue dot edu
@ 2009-04-27 15:26 ` pinskia at gcc dot gnu dot org
  2009-04-27 15:32 ` lucier at math dot purdue dot edu
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: pinskia at gcc dot gnu dot org @ 2009-04-27 15:26 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #5 from pinskia at gcc dot gnu dot org  2009-04-27 15:26 -------
This is by design -O1 is way slower than -O2 now.


-- 

pinskia at gcc dot gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |RESOLVED
         Resolution|                            |INVALID


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (4 preceding siblings ...)
  2009-04-27 15:26 ` pinskia at gcc dot gnu dot org
@ 2009-04-27 15:32 ` lucier at math dot purdue dot edu
  2009-04-27 15:35 ` lucier at math dot purdue dot edu
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 15:32 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #6 from lucier at math dot purdue dot edu  2009-04-27 15:32 -------
Subject: Re:  96% performance regression in floating
 point code; part of the problem started 2009/03/12-13

On Mon, 2009-04-27 at 15:26 +0000, pinskia at gcc dot gnu dot org wrote:

> This is by design -O1 is way slower than -O2 now.

I have seen no general discussion that -O1 should be destroyed as a
useful compilation option.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (5 preceding siblings ...)
  2009-04-27 15:32 ` lucier at math dot purdue dot edu
@ 2009-04-27 15:35 ` lucier at math dot purdue dot edu
  2009-04-27 16:29 ` lucier at math dot purdue dot edu
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 15:35 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #7 from lucier at math dot purdue dot edu  2009-04-27 15:35 -------
Subject: Re:  96% performance regression in floating
 point code; part of the problem started 2009/03/12-13

On Mon, 2009-04-27 at 15:32 +0000, lucier at math dot purdue dot edu
wrote:


> On Mon, 2009-04-27 at 15:26 +0000, pinskia at gcc dot gnu dot org wrote:
> 
> > This is by design -O1 is way slower than -O2 now.
> 
> I have seen no general discussion that -O1 should be destroyed as a
> useful compilation option.

Perhaps I should also point out that code generated by -O2 is not
generally much faster than before, so if you believe that -O1 is much
slower than -O2 now by design, it is only by making code generated by
-O1 much slower.

BTW, this code runs in 108 ms when compiled with gcc-4.2.4 with the
given options (including -O1).

Brad


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (6 preceding siblings ...)
  2009-04-27 15:35 ` lucier at math dot purdue dot edu
@ 2009-04-27 16:29 ` lucier at math dot purdue dot edu
  2009-04-27 18:21 ` ubizjak at gmail dot com
                   ` (7 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 16:29 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #8 from lucier at math dot purdue dot edu  2009-04-27 16:29 -------
I hadn't noticed before that Andrew had marked it as "RESOLVED INVALID".

I'm reopening it, as I believe that resolving it as INVALID should require a
more general discussion than a one-line dismissal of the bug.

Brad


-- 

lucier at math dot purdue dot edu changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|RESOLVED                    |UNCONFIRMED
         Resolution|INVALID                     |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (7 preceding siblings ...)
  2009-04-27 16:29 ` lucier at math dot purdue dot edu
@ 2009-04-27 18:21 ` ubizjak at gmail dot com
  2009-04-27 19:04 ` [Bug regression/39914] [4.4/4.5 Regression] " bonzini at gnu dot org
                   ` (6 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: ubizjak at gmail dot com @ 2009-04-27 18:21 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #9 from ubizjak at gmail dot com  2009-04-27 18:21 -------
Following patch should fix the performance hit with -O1:

--cut here--
Index: ira-conflicts.c
===================================================================
--- ira-conflicts.c     (revision 146825)
+++ ira-conflicts.c     (working copy)
@@ -806,7 +806,7 @@ ira_build_conflicts (void)
       if ((! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0)
          /* For debugging purposes don't put user defined variables in
             callee-clobbered registers.  */
-         || (optimize <= 1
+         || (optimize == 0
              && (attrs = REG_ATTRS (regno_reg_rtx [ALLOCNO_REGNO (a)])) !=
NULL
              && (decl = attrs->decl) != NULL
              && VAR_OR_FUNCTION_DECL_P (decl)
--cut here--

IMO, such a performance hit is not acceptable with -O1, we want to _optimize_
the code, we have -O0 to achieve full debug functionality.


-- 

ubizjak at gmail dot com changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
  BugsThisDependsOn|                            |39432
             Status|UNCONFIRMED                 |NEW
     Ever Confirmed|0                           |1
   Last reconfirmed|0000-00-00 00:00:00         |2009-04-27 18:21:04
               date|                            |


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] [4.4/4.5 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (8 preceding siblings ...)
  2009-04-27 18:21 ` ubizjak at gmail dot com
@ 2009-04-27 19:04 ` bonzini at gnu dot org
  2009-04-27 20:38 ` lucier at math dot purdue dot edu
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: bonzini at gnu dot org @ 2009-04-27 19:04 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #10 from bonzini at gnu dot org  2009-04-27 19:04 -------
Yeah, it's basically destroying caller-save optimization.


-- 

bonzini at gnu dot org changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
      Known to fail|                            |4.4.0 4.5.0
      Known to work|                            |4.3.3
            Summary|96% performance regression  |[4.4/4.5 Regression] 96%
                   |in floating point code; part|performance regression in
                   |of the problem started      |floating point code; part of
                   |2009/03/12-13               |the problem started
                   |                            |2009/03/12-13
   Target Milestone|---                         |4.4.1


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] [4.4/4.5 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (9 preceding siblings ...)
  2009-04-27 19:04 ` [Bug regression/39914] [4.4/4.5 Regression] " bonzini at gnu dot org
@ 2009-04-27 20:38 ` lucier at math dot purdue dot edu
  2009-04-28  1:40 ` lucier at math dot purdue dot edu
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-27 20:38 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #11 from lucier at math dot purdue dot edu  2009-04-27 20:37 -------
As far as I can tell, the patch proposed by Uros restores the performance of
code generated by

gcc version 4.4.0 20090312 (experimental) [trunk revision 144812] (GCC) 

In particular, the assembly code for the main loop is identical for code
generated by

gcc version 4.4.0 20090312 (experimental) [trunk revision 144801] (GCC) 

and by

gcc version 4.4.0 20090312 (experimental) [trunk revision 144812] (GCC) 

after his patch.

Thanks for getting to this so quickly.

Brad


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] [4.4/4.5 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (10 preceding siblings ...)
  2009-04-27 20:38 ` lucier at math dot purdue dot edu
@ 2009-04-28  1:40 ` lucier at math dot purdue dot edu
  2009-04-28 16:18 ` uros at gcc dot gnu dot org
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: lucier at math dot purdue dot edu @ 2009-04-28  1:40 UTC (permalink / raw)
  To: gcc-bugs

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 1459 bytes --]



------- Comment #12 from lucier at math dot purdue dot edu  2009-04-28 01:39 -------
I tried to build and check with this patch, but I got stopped with:

/tmp/lucier/gcc/objdirs/mainline/./prev-gcc/xgcc
-B/tmp/lucier/gcc/objdirs/mainline/./prev-gcc/
-B/pkgs/gcc-mainline/x86_64-unknown-linux-gnu/bin/ -c  -g -O2 -DIN_GCC   -W
-Wall -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual
-Wold-style-definition -Wc++-compat -Wmissing-format-attribute -pedantic
-Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -Werror -fno-common
 -DHAVE_CONFIG_H -DGENERATOR_FILE -I. -Ibuild -I../../../mainline/gcc
-I../../../mainline/gcc/build -I../../../mainline/gcc/../include
-I../../../mainline/gcc/../libcpp/include
-I/tmp/lucier/gcc/objdirs/mainline/./gmp -I/tmp/lucier/gcc/mainline/gmp
-I/tmp/lucier/gcc/objdirs/mainline/./mpfr -I/tmp/lucier/gcc/mainline/mpfr 
-I../../../mainline/gcc/../libdecnumber
-I../../../mainline/gcc/../libdecnumber/bid -I../libdecnumber    -o build/vec.o
../../../mainline/gcc/vec.c
cc1: warnings being treated as errors
../../../mainline/gcc/vec.c: In function ‘vec_descriptor’:
../../../mainline/gcc/vec.c:116: error: enum conversion when passing argument 3
of ‘htab_find_slot’ is invalid in C++
../../../mainline/gcc/../include/hashtab.h:172: note: expected ‘enum
insert_option’ but argument is of type ‘int’
make[3]: *** [build/vec.o] Error 1


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug regression/39914] [4.4/4.5 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (11 preceding siblings ...)
  2009-04-28  1:40 ` lucier at math dot purdue dot edu
@ 2009-04-28 16:18 ` uros at gcc dot gnu dot org
  2009-04-28 16:19 ` [Bug rtl-optimization/39914] [4.4 " ubizjak at gmail dot com
                   ` (2 subsequent siblings)
  15 siblings, 0 replies; 17+ messages in thread
From: uros at gcc dot gnu dot org @ 2009-04-28 16:18 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #13 from uros at gcc dot gnu dot org  2009-04-28 16:18 -------
Subject: Bug 39914

Author: uros
Date: Tue Apr 28 16:18:17 2009
New Revision: 146904

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=146904
Log:
        PR rtl-optimization/39914
        * ira-conflicts.c (ira_build_conflicts): Prohibit call used
        registers for allocnos created from user-defined variables only
        when not optimizing.


Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/ira-conflicts.c


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/39914] [4.4 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (12 preceding siblings ...)
  2009-04-28 16:18 ` uros at gcc dot gnu dot org
@ 2009-04-28 16:19 ` ubizjak at gmail dot com
  2009-05-03 19:40 ` uros at gcc dot gnu dot org
  2009-05-03 19:41 ` ubizjak at gmail dot com
  15 siblings, 0 replies; 17+ messages in thread
From: ubizjak at gmail dot com @ 2009-04-28 16:19 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #14 from ubizjak at gmail dot com  2009-04-28 16:19 -------
Fixed on the trunk so far.


-- 

ubizjak at gmail dot com changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         AssignedTo|unassigned at gcc dot gnu   |ubizjak at gmail dot com
                   |dot org                     |
             Status|NEW                         |ASSIGNED
          Component|regression                  |rtl-optimization
   Last reconfirmed|2009-04-27 18:21:04         |2009-04-28 16:19:32
               date|                            |
            Summary|[4.4/4.5 Regression] 96%    |[4.4 Regression] 96%
                   |performance regression in   |performance regression in
                   |floating point code; part of|floating point code; part of
                   |the problem started         |the problem started
                   |2009/03/12-13               |2009/03/12-13


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/39914] [4.4 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (13 preceding siblings ...)
  2009-04-28 16:19 ` [Bug rtl-optimization/39914] [4.4 " ubizjak at gmail dot com
@ 2009-05-03 19:40 ` uros at gcc dot gnu dot org
  2009-05-03 19:41 ` ubizjak at gmail dot com
  15 siblings, 0 replies; 17+ messages in thread
From: uros at gcc dot gnu dot org @ 2009-05-03 19:40 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #15 from uros at gcc dot gnu dot org  2009-05-03 19:40 -------
Subject: Bug 39914

Author: uros
Date: Sun May  3 19:40:35 2009
New Revision: 147081

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=147081
Log:
        Backport from mainline:
        2009-04-28  Uros Bizjak  <ubizjak@gmail.com>

        PR rtl-optimization/39914
        * ira-conflicts.c (ira_build_conflicts): Prohibit call used
        registers for allocnos created from user-defined variables only
        when not optimizing.


Modified:
    branches/gcc-4_4-branch/gcc/ChangeLog
    branches/gcc-4_4-branch/gcc/ira-conflicts.c


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Bug rtl-optimization/39914] [4.4 Regression] 96% performance regression in floating point code; part of the problem started 2009/03/12-13
  2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
                   ` (14 preceding siblings ...)
  2009-05-03 19:40 ` uros at gcc dot gnu dot org
@ 2009-05-03 19:41 ` ubizjak at gmail dot com
  15 siblings, 0 replies; 17+ messages in thread
From: ubizjak at gmail dot com @ 2009-05-03 19:41 UTC (permalink / raw)
  To: gcc-bugs



------- Comment #16 from ubizjak at gmail dot com  2009-05-03 19:41 -------
Fixed.


-- 

ubizjak at gmail dot com changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |RESOLVED
         Resolution|                            |FIXED


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39914


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2009-05-03 19:41 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-04-26 18:24 [Bug regression/39914] New: 96% performance regression in floating point code; part of the problem started 2009/03/12-13 lucier at math dot purdue dot edu
2009-04-26 18:43 ` [Bug regression/39914] " ubizjak at gmail dot com
2009-04-27  8:16 ` ubizjak at gmail dot com
2009-04-27 15:07 ` lucier at math dot purdue dot edu
2009-04-27 15:11 ` lucier at math dot purdue dot edu
2009-04-27 15:26 ` pinskia at gcc dot gnu dot org
2009-04-27 15:32 ` lucier at math dot purdue dot edu
2009-04-27 15:35 ` lucier at math dot purdue dot edu
2009-04-27 16:29 ` lucier at math dot purdue dot edu
2009-04-27 18:21 ` ubizjak at gmail dot com
2009-04-27 19:04 ` [Bug regression/39914] [4.4/4.5 Regression] " bonzini at gnu dot org
2009-04-27 20:38 ` lucier at math dot purdue dot edu
2009-04-28  1:40 ` lucier at math dot purdue dot edu
2009-04-28 16:18 ` uros at gcc dot gnu dot org
2009-04-28 16:19 ` [Bug rtl-optimization/39914] [4.4 " ubizjak at gmail dot com
2009-05-03 19:40 ` uros at gcc dot gnu dot org
2009-05-03 19:41 ` ubizjak at gmail dot com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).