public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/57858] New: AVX2: ymm used for div, not for sqrt
@ 2013-07-09  7:55 vincenzo.innocente at cern dot ch
  2013-07-09  9:44 ` [Bug tree-optimization/57858] " jakub at gcc dot gnu.org
                   ` (8 more replies)
  0 siblings, 9 replies; 10+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2013-07-09  7:55 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57858

            Bug ID: 57858
           Summary: AVX2: ymm used for div, not for sqrt
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

in the following example div uses ymm registries while sqr only xmm ones
gcc version 4.9.0 20130630 (experimental) [trunk revision 200570] (GCC) 

cat avx2sqrt.cc
#include<math.h>
double div() {
   double s=0;
   for (int i=0; i!=1024; ++i) s+=1./(i+1);
   return s;
}


double sqr() {
   double s=0;
   for (int i=0; i!=1024; ++i) s+=sqrt(i+1);
   return s;
}

c++ -std=c++11 -Ofast -S avx2sqrt.cc -march=corei7-avx -mavx2
-ftree-vectorizer-verbose=1 -Wall ; cat avx2sqrt.s

_Z3divv:
.LFB3:
    .cfi_startproc
    vmovdqa    .LC1(%rip), %ymm6
    xorl    %eax, %eax
    vxorpd    %xmm1, %xmm1, %xmm1
    vmovdqa    .LC0(%rip), %ymm0
    vmovdqa    .LC2(%rip), %ymm5
    vmovapd    .LC3(%rip), %ymm2
    jmp    .L2
    .p2align 4,,10
    .p2align 3
.L3:
    vmovdqa    %ymm4, %ymm0
.L2:
    vpaddd    %ymm6, %ymm0, %ymm4
    vpaddd    %ymm5, %ymm0, %ymm0
    addl    $1, %eax
    vextracti128    $0x1, %ymm0, %xmm3
    vcvtdq2pd    %xmm0, %ymm0
    vcvtdq2pd    %xmm3, %ymm3
    vdivpd    %ymm0, %ymm2, %ymm0
    vdivpd    %ymm3, %ymm2, %ymm3
    vaddpd    %ymm0, %ymm3, %ymm0
    cmpl    $128, %eax
    vaddpd    %ymm0, %ymm1, %ymm1
    jne    .L3
    vhaddpd    %ymm1, %ymm1, %ymm1
    vperm2f128    $1, %ymm1, %ymm1, %ymm0
    vaddpd    %ymm0, %ymm1, %ymm0
    vzeroupper
    ret
    .cfi_endproc
.LFE3:
    .size    _Z3divv, .-_Z3divv
    .p2align 4,,15
    .globl    _Z3sqrv
    .type    _Z3sqrv, @function
_Z3sqrv:
.LFB4:
    .cfi_startproc
    movl    $1, %eax
    vmovsd    .LC4(%rip), %xmm1
    vxorpd    %xmm0, %xmm0, %xmm0
    jmp    .L6
    .p2align 4,,10
    .p2align 3
.L7:
    vcvtsi2sd    %eax, %xmm1, %xmm1
    vsqrtsd    %xmm1, %xmm1, %xmm1
.L6:
    addl    $1, %eax
    vaddsd    %xmm1, %xmm0, %xmm0
    cmpl    $1025, %eax
    jne    .L7
    rep; ret
    .cfi_endproc
.LFE4:
    .size    _Z3sqrv, .-_Z3sqrv


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-09-11  6:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-09  7:55 [Bug tree-optimization/57858] New: AVX2: ymm used for div, not for sqrt vincenzo.innocente at cern dot ch
2013-07-09  9:44 ` [Bug tree-optimization/57858] " jakub at gcc dot gnu.org
2013-07-09 13:49 ` vincenzo.innocente at cern dot ch
2013-07-09 15:33 ` glisse at gcc dot gnu.org
2013-07-10  6:37 ` jakub at gcc dot gnu.org
2013-07-10  9:51 ` vincenzo.innocente at cern dot ch
2021-07-30  6:06 ` pinskia at gcc dot gnu.org
2021-07-30  9:16 ` rguenth at gcc dot gnu.org
2021-07-30 22:56 ` pinskia at gcc dot gnu.org
2021-09-11  6:37 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).