public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "tim at klingt dot org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug c/38134] New: gcc-4.4 speed regression with sse code Date: Sat, 15 Nov 2008 15:56:00 -0000 [thread overview] Message-ID: <bug-38134-12873@http.gcc.gnu.org/bugzilla/> (raw) the attached program, a simdfied version of the tanf function, shows a 20% performance regression from gcc-4.3 to gcc-4.4: the compared compilers are g++-4.3 Using built-in specs. Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.3.2-1ubuntu11' --with-bugurl=file:///usr/share/doc/gcc-4.3/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --enable-nls --with-gxx-include-dir=/usr/include/c++/4.3 --program-suffix=-4.3 --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-mpfr --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 4.3.2 (Ubuntu 4.3.2-1ubuntu11) and Using built-in specs. Target: x86_64-linux-gnu Configured with: ../src/configure -v --with-pkgversion='Ubuntu 20081024-0ubuntu1' --with-bugurl=file:///usr/share/doc/gcc-snapshot/README.Bugs --enable-languages=c,c++,java,fortran,objc,obj-c++,ada --prefix=/usr/lib/gcc-snapshot --enable-shared --with-system-zlib --disable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-java-awt=gtk --enable-gtk-cairo --disable-plugin --with-java-home=/usr/lib/gcc-snapshot --enable-java-home --with-jvm-root-dir=/usr/lib/gcc-snapshot/jvm --with-jvm-jar-dir=/usr/lib/gcc-snapshot/jvm-exports --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-mpfr --disable-werror --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu Thread model: posix gcc version 4.4.0 20081024 (experimental) [trunk revision 141342] (Ubuntu 20081024-0ubuntu1) the interesting part is the inner loop of the bench_1_simd function. gcc-4.4 generates: .L54: movaps in(%rax), %xmm0 movdqa %xmm14, %xmm3 addl $4, %edx pand %xmm0, %xmm3 #APP # 325 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 xorps %xmm3, %xmm0 # 0 "" 2 #NO_APP movaps %xmm0, %xmm4 movaps %xmm0, %xmm15 mulps %xmm13, %xmm4 #APP # 328 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 cvttps2dq %xmm4, %xmm4 # 0 "" 2 #NO_APP movdqa %xmm4, %xmm1 pand %xmm12, %xmm1 paddd %xmm1, %xmm4 #APP # 331 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 cvtdq2ps %xmm4, %xmm1 # 0 "" 2 #NO_APP pand .LC15(%rip), %xmm4 movaps %xmm1, %xmm2 psrld $1, %xmm4 mulps %xmm11, %xmm2 subps %xmm2, %xmm15 movaps %xmm15, %xmm2 movaps %xmm1, %xmm15 mulps %xmm9, %xmm1 mulps %xmm10, %xmm15 subps %xmm15, %xmm2 movaps %xmm8, %xmm15 subps %xmm1, %xmm2 cmpltps %xmm0, %xmm15 movaps %xmm2, %xmm1 mulps %xmm2, %xmm1 movaps %xmm1, %xmm0 mulps %xmm7, %xmm0 addps .LC10(%rip), %xmm0 mulps %xmm1, %xmm0 addps .LC11(%rip), %xmm0 mulps %xmm1, %xmm0 addps .LC12(%rip), %xmm0 mulps %xmm1, %xmm0 addps .LC13(%rip), %xmm0 mulps %xmm1, %xmm0 addps .LC14(%rip), %xmm0 mulps %xmm1, %xmm0 movdqa %xmm5, %xmm1 mulps %xmm2, %xmm0 psubd %xmm4, %xmm1 addps %xmm2, %xmm0 movdqa %xmm1, %xmm4 #APP # 342 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 andps %xmm15, %xmm0 # 0 "" 2 #NO_APP movaps .LC16(%rip), %xmm1 #APP # 343 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 andnps %xmm2, %xmm15 # 0 "" 2 #NO_APP movaps %xmm6, %xmm2 #APP # 344 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 orps %xmm15, %xmm0 # 0 "" 2 #NO_APP addps %xmm0, %xmm1 divps %xmm1, %xmm2 movaps %xmm2, %xmm1 #APP # 145 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 andps %xmm4, %xmm1 # 0 "" 2 # 146 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 andnps %xmm0, %xmm4 # 0 "" 2 #NO_APP movaps %xmm1, %xmm0 #APP # 147 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 orps %xmm4, %xmm0 # 0 "" 2 # 349 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 xorps %xmm3, %xmm0 # 0 "" 2 #NO_APP movaps %xmm0, out(%rax) addq $16, %rax cmpl %edi, %edx jne .L54 while gcc-4.3 generates: .L48: movaps in(%rax), %xmm2 movdqa .LC2(%rip), %xmm5 movaps .LC3(%rip), %xmm0 pand %xmm2, %xmm5 movdqa .LC4(%rip), %xmm4 movaps .LC5(%rip), %xmm1 addl $4, %edx #APP # 325 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 xorps %xmm5, %xmm2 # 0 "" 2 #NO_APP mulps %xmm2, %xmm0 movaps %xmm2, %xmm3 #APP # 328 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 cvttps2dq %xmm0, %xmm0 # 0 "" 2 #NO_APP pand %xmm0, %xmm4 paddd %xmm0, %xmm4 #APP # 331 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 cvtdq2ps %xmm4, %xmm0 # 0 "" 2 #NO_APP pand %xmm9, %xmm4 mulps %xmm0, %xmm1 psrld $1, %xmm4 subps %xmm1, %xmm3 movaps .LC6(%rip), %xmm1 mulps %xmm0, %xmm1 mulps .LC7(%rip), %xmm0 subps %xmm1, %xmm3 subps %xmm0, %xmm3 movaps .LC8(%rip), %xmm0 movaps %xmm3, %xmm1 cmpltps %xmm2, %xmm0 mulps %xmm3, %xmm1 movaps %xmm0, %xmm2 movaps %xmm1, %xmm0 mulps %xmm15, %xmm0 addps %xmm14, %xmm0 mulps %xmm1, %xmm0 addps %xmm13, %xmm0 mulps %xmm1, %xmm0 addps %xmm12, %xmm0 mulps %xmm1, %xmm0 addps %xmm11, %xmm0 mulps %xmm1, %xmm0 addps %xmm10, %xmm0 mulps %xmm1, %xmm0 mulps %xmm3, %xmm0 addps %xmm3, %xmm0 #APP # 342 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 andps %xmm2, %xmm0 # 0 "" 2 # 343 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 andnps %xmm3, %xmm2 # 0 "" 2 #NO_APP movaps %xmm7, %xmm3 #APP # 344 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 orps %xmm2, %xmm0 # 0 "" 2 #NO_APP movdqa %xmm6, %xmm2 movaps %xmm0, %xmm1 psubd %xmm4, %xmm2 addps %xmm8, %xmm1 divps %xmm1, %xmm3 movaps %xmm3, %xmm1 #APP # 145 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 andps %xmm2, %xmm1 # 0 "" 2 # 146 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 andnps %xmm0, %xmm2 # 0 "" 2 # 147 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/simdconst.h" 1 orps %xmm2, %xmm1 # 0 "" 2 # 349 "benchmarks/../source/dsp/../../libs/libsimdmath/lib/sincosf4.h" 1 xorps %xmm5, %xmm1 # 0 "" 2 #NO_APP movaps %xmm1, out(%rax) addq $16, %rax cmpl %edi, %edx jne .L48 the code generated by gcc-4.4 requires more memory access. the code was generated with the flags -O3 -march=core2. while the assembly code is generated for the x86_64 architecture, similar results can be seen with x86 code (4.4 is about 14% slower than 4.3) -- Summary: gcc-4.4 speed regression with sse code Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: tim at klingt dot org http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38134
next reply other threads:[~2008-11-15 15:56 UTC|newest] Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top 2008-11-15 15:56 tim at klingt dot org [this message] 2008-11-15 15:57 ` [Bug c/38134] " tim at klingt dot org 2008-11-15 16:47 ` ubizjak at gmail dot com 2008-11-15 17:05 ` tim at klingt dot org 2008-11-15 20:32 ` [Bug target/38134] [4.4 Regression] speed regression with inline-asm " rguenth at gcc dot gnu dot org 2008-11-16 0:07 ` hjl dot tools at gmail dot com 2008-11-16 0:09 ` hjl dot tools at gmail dot com 2008-11-17 9:36 ` jakub at gcc dot gnu dot org 2008-11-17 18:13 ` ubizjak at gmail dot com 2008-11-17 18:20 ` tim at klingt dot org 2008-11-17 18:31 ` tim at klingt dot org 2008-11-17 18:50 ` tim at klingt dot org 2009-02-03 9:47 ` bonzini at gnu dot org 2009-02-03 10:36 ` ubizjak at gmail dot com 2009-02-03 11:17 ` bonzini at gnu dot org 2009-02-03 11:34 ` ubizjak at gmail dot com 2009-02-13 9:57 ` [Bug target/38134] [4.4 Regression] speed regression with many loop invariants bonzini at gnu dot org 2009-02-13 10:03 ` steven at gcc dot gnu dot org 2009-04-21 16:02 ` [Bug target/38134] [4.4/4.5 " jakub at gcc dot gnu dot org 2009-07-22 10:35 ` jakub at gcc dot gnu dot org 2009-10-15 12:56 ` jakub at gcc dot gnu dot org 2010-01-21 13:16 ` jakub at gcc dot gnu dot org 2010-04-30 9:01 ` [Bug target/38134] [4.4/4.5/4.6 " jakub at gcc dot gnu dot org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-38134-12873@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).