public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets)
@ 2014-07-08 13:34 vincenzo.innocente at cern dot ch
  2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2014-07-08 13:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

            Bug ID: 61747
           Summary: min,max pattern not always properly optimized (for
                    sse4 targets)
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

I was expecting gcc to substitute min/max instruction for (a>/<b) ? a : b;
even for "O2".
This is not always the case, only Ofast provides consistently optimized code
(even if sometimes with a redundant move). -ffinite-math-only makes the code
worse for vector arguments...

cat vmin.cc 
typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;

  template<typename V1>
  V1 vmax(V1 a, V1 b) {
    return (a>b) ? a : b;
  }
  template<typename V1>
  V1 vmin(V1 a, V1 b) {
    return (a<b) ? a : b;
  }


float foo(float a, float b, float c) {
  return vmin(vmax(a,b),c);
}

float32x4_t foo(float32x4_t a, float32x4_t b, float32x4_t c) {
  return vmin(vmax(a,b),c);
}

template<typename Float>
Float bart(Float a) { 
  constexpr Float zero{0.f};
  constexpr Float it = zero+4.f;
  constexpr Float zt = zero-3.f;
  return vmin(vmax(a,zt),it);
}


float bar(float a) {
   return bart(a);
}
float32x4_t bar(float32x4_t a) {
   return bart(a);
}

I see
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm1, %xmm0
    minss    %xmm2, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    minps    %xmm2, %xmm0
    ret

__Z3barf:
LFB5:
    ucomiss    LC3(%rip), %xmm0
    jbe    L12
    minss    LC2(%rip), %xmm0
    ret
    .align 4,0x90
L12:
    movss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret

-----------------
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S -ffinite-math-only; cat vmin.s
__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret
__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    movaps    %xmm0, %xmm1
    movaps    %xmm2, %xmm0
    cmpleps    %xmm1, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret
LFE6:

--------------
eventually
c++ -std=c++11 -Ofast  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm0, %xmm1
    minps    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret
__Z3barDv4_f:
LFB6:
    maxps    LC5(%rip), %xmm0
    minps    LC6(%rip), %xmm0
    ret


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-07-21  6:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
2014-07-08 15:27 ` glisse at gcc dot gnu.org
2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
2014-07-08 15:46 ` rguenth at gcc dot gnu.org
2021-12-13  0:13 ` pinskia at gcc dot gnu.org
2023-07-18 11:15 ` rguenth at gcc dot gnu.org
2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
2023-07-20  7:52 ` rguenth at gcc dot gnu.org
2023-07-21  5:37 ` pinskia at gcc dot gnu.org
2023-07-21  6:21 ` rguenther at suse dot de

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).