public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/55760] New: scalar code non using rsqrtss and rcpss
@ 2012-12-20 15:49 vincenzo.innocente at cern dot ch
  2012-12-20 15:52 ` [Bug tree-optimization/55760] " rguenth at gcc dot gnu.org
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2012-12-20 15:49 UTC (permalink / raw)
  To: gcc-bugs


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55760

             Bug #: 55760
           Summary: scalar code non using rsqrtss and rcpss
    Classification: Unclassified
           Product: gcc
           Version: 4.8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: vincenzo.innocente@cern.ch


is there any reason why rsqrtss and rcpss are not used for scalar code while
rsqrtps and rcpps are used for loops?

cat scalar.cc
#include<cmath>
void scalar(float& a, float& b) {
  a = std::sqrt(a);
  b = 1.f/b;
}

float v[1024];
float w[1024];

void vector() {
  for(int i=0;i!=1024;++i) {
    v[i] = std::sqrt(v[i]);
    w[i] = 1.f/w[i];
  }
}

c++ -std=c++11 -Ofast -march=corei7 -S scalar.cc -ftree-vectorizer-verbose=1 
-ftree-loop-if-convert-stores; cat scalar.s | c++filt


scalar(float&, float&):
LFB221:
    sqrtss    (%rdi), %xmm0
    movss    %xmm0, (%rdi)
    movss    LC0(%rip), %xmm0
    divss    (%rsi), %xmm0
    movss    %xmm0, (%rsi)
    ret
LFE221:
    .align 4,0x90
    .globl vector()
vector():
LFB222:
    movaps    LC1(%rip), %xmm5
    leaq    void(%rip), %rax
    xorps    %xmm3, %xmm3
    movaps    LC2(%rip), %xmm4
    leaq    wchar_t(%rip), %rdx
    leaq    4096+void(%rip), %rcx
    .align 4,0x90
L4:
    movaps    (%rax), %xmm1
    movaps    %xmm3, %xmm2
    addq    $16, %rax
    addq    $16, %rdx
    rsqrtps    %xmm1, %xmm0
    cmpneqps    %xmm1, %xmm2
    andps    %xmm2, %xmm0
    mulps    %xmm0, %xmm1
    mulps    %xmm1, %xmm0
    mulps    %xmm4, %xmm1
    addps    %xmm5, %xmm0
    mulps    %xmm1, %xmm0
    movaps    %xmm0, -16(%rax)
    movaps    -16(%rdx), %xmm1
    rcpps    %xmm1, %xmm0
    mulps    %xmm0, %xmm1
    mulps    %xmm0, %xmm1
    addps    %xmm0, %xmm0
    subps    %xmm1, %xmm0
    movaps    %xmm0, -16(%rdx)
    cmpq    %rcx, %rax
    jne    L4
    rep; ret


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-08-07 22:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-20 15:49 [Bug tree-optimization/55760] New: scalar code non using rsqrtss and rcpss vincenzo.innocente at cern dot ch
2012-12-20 15:52 ` [Bug tree-optimization/55760] " rguenth at gcc dot gnu.org
2012-12-20 15:55 ` vincenzo.innocente at cern dot ch
2012-12-20 15:59 ` rguenth at gcc dot gnu.org
2012-12-20 16:07 ` dominiq at lps dot ens.fr
2013-01-08 15:29 ` vincenzo.innocente at cern dot ch
2013-01-08 23:55 ` glisse at gcc dot gnu.org
2021-08-07 22:59 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).