public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "vincenzo.innocente at cern dot ch" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets)
Date: Tue, 08 Jul 2014 13:34:00 -0000	[thread overview]
Message-ID: <bug-61747-4@http.gcc.gnu.org/bugzilla/> (raw)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

            Bug ID: 61747
           Summary: min,max pattern not always properly optimized (for
                    sse4 targets)
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

I was expecting gcc to substitute min/max instruction for (a>/<b) ? a : b;
even for "O2".
This is not always the case, only Ofast provides consistently optimized code
(even if sometimes with a redundant move). -ffinite-math-only makes the code
worse for vector arguments...

cat vmin.cc 
typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;

  template<typename V1>
  V1 vmax(V1 a, V1 b) {
    return (a>b) ? a : b;
  }
  template<typename V1>
  V1 vmin(V1 a, V1 b) {
    return (a<b) ? a : b;
  }


float foo(float a, float b, float c) {
  return vmin(vmax(a,b),c);
}

float32x4_t foo(float32x4_t a, float32x4_t b, float32x4_t c) {
  return vmin(vmax(a,b),c);
}

template<typename Float>
Float bart(Float a) { 
  constexpr Float zero{0.f};
  constexpr Float it = zero+4.f;
  constexpr Float zt = zero-3.f;
  return vmin(vmax(a,zt),it);
}


float bar(float a) {
   return bart(a);
}
float32x4_t bar(float32x4_t a) {
   return bart(a);
}

I see
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm1, %xmm0
    minss    %xmm2, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    minps    %xmm2, %xmm0
    ret

__Z3barf:
LFB5:
    ucomiss    LC3(%rip), %xmm0
    jbe    L12
    minss    LC2(%rip), %xmm0
    ret
    .align 4,0x90
L12:
    movss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret

-----------------
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S -ffinite-math-only; cat vmin.s
__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret
__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    movaps    %xmm0, %xmm1
    movaps    %xmm2, %xmm0
    cmpleps    %xmm1, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret
LFE6:

--------------
eventually
c++ -std=c++11 -Ofast  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm0, %xmm1
    minps    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret
__Z3barDv4_f:
LFB6:
    maxps    LC5(%rip), %xmm0
    minps    LC6(%rip), %xmm0
    ret


             reply	other threads:[~2014-07-08 13:34 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-08 13:34 vincenzo.innocente at cern dot ch [this message]
2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
2014-07-08 15:27 ` glisse at gcc dot gnu.org
2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
2014-07-08 15:46 ` rguenth at gcc dot gnu.org
2021-12-13  0:13 ` pinskia at gcc dot gnu.org
2023-07-18 11:15 ` rguenth at gcc dot gnu.org
2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
2023-07-20  7:52 ` rguenth at gcc dot gnu.org
2023-07-21  5:37 ` pinskia at gcc dot gnu.org
2023-07-21  6:21 ` rguenther at suse dot de

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-61747-4@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).