public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets)
@ 2014-07-08 13:34 vincenzo.innocente at cern dot ch
  2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2014-07-08 13:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

            Bug ID: 61747
           Summary: min,max pattern not always properly optimized (for
                    sse4 targets)
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vincenzo.innocente at cern dot ch

I was expecting gcc to substitute min/max instruction for (a>/<b) ? a : b;
even for "O2".
This is not always the case, only Ofast provides consistently optimized code
(even if sometimes with a redundant move). -ffinite-math-only makes the code
worse for vector arguments...

cat vmin.cc 
typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;

  template<typename V1>
  V1 vmax(V1 a, V1 b) {
    return (a>b) ? a : b;
  }
  template<typename V1>
  V1 vmin(V1 a, V1 b) {
    return (a<b) ? a : b;
  }


float foo(float a, float b, float c) {
  return vmin(vmax(a,b),c);
}

float32x4_t foo(float32x4_t a, float32x4_t b, float32x4_t c) {
  return vmin(vmax(a,b),c);
}

template<typename Float>
Float bart(Float a) { 
  constexpr Float zero{0.f};
  constexpr Float it = zero+4.f;
  constexpr Float zt = zero-3.f;
  return vmin(vmax(a,zt),it);
}


float bar(float a) {
   return bart(a);
}
float32x4_t bar(float32x4_t a) {
   return bart(a);
}

I see
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm1, %xmm0
    minss    %xmm2, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    minps    %xmm2, %xmm0
    ret

__Z3barf:
LFB5:
    ucomiss    LC3(%rip), %xmm0
    jbe    L12
    minss    LC2(%rip), %xmm0
    ret
    .align 4,0x90
L12:
    movss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret

-----------------
c++ -std=c++11 -O2  -msse4.2 -s vmin.cc -S -ffinite-math-only; cat vmin.s
__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret
__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm1, %xmm0
    movaps    %xmm0, %xmm1
    movaps    %xmm2, %xmm0
    cmpleps    %xmm1, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret

__Z3barDv4_f:
LFB6:
    movaps    LC5(%rip), %xmm1
    movaps    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm2, %xmm1
    movaps    LC6(%rip), %xmm2
    movaps    %xmm1, %xmm0
    cmpltps    %xmm2, %xmm0
    blendvps    %xmm0, %xmm1, %xmm2
    movaps    %xmm2, %xmm0
    ret
LFE6:

--------------
eventually
c++ -std=c++11 -Ofast  -msse4.2 -s vmin.cc -S; cat vmin.s

__Z3foofff:
LFB2:
    maxss    %xmm0, %xmm1
    minss    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3fooDv4_fS_S_:
LFB3:
    maxps    %xmm0, %xmm1
    minps    %xmm2, %xmm1
    movaps    %xmm1, %xmm0
    ret

__Z3barf:
LFB5:
    maxss    LC2(%rip), %xmm0
    minss    LC3(%rip), %xmm0
    ret
__Z3barDv4_f:
LFB6:
    maxps    LC5(%rip), %xmm0
    minps    LC6(%rip), %xmm0
    ret


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
@ 2014-07-08 14:57 ` glisse at gcc dot gnu.org
  2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: glisse at gcc dot gnu.org @ 2014-07-08 14:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #1 from Marc Glisse <glisse at gcc dot gnu.org> ---
I think you need -fno-signed-zeros for the transformation to be valid.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
  2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
@ 2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
  2014-07-08 15:27 ` glisse at gcc dot gnu.org
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2014-07-08 15:21 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #2 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
> I think you need -fno-signed-zeros for the transformation to be valid.
possible.
but then is the O2 code that is wrong?
in any case adding -fno-signed-zeros makes no difference w/r/t O2 alone


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
  2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
  2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
@ 2014-07-08 15:27 ` glisse at gcc dot gnu.org
  2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: glisse at gcc dot gnu.org @ 2014-07-08 15:27 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #3 from Marc Glisse <glisse at gcc dot gnu.org> ---
(In reply to vincenzo Innocente from comment #2)
> > I think you need -fno-signed-zeros for the transformation to be valid.
> possible.
> but then is the O2 code that is wrong?
> in any case adding -fno-signed-zeros makes no difference w/r/t O2 alone

-fno-signed-zeros comes in addition to some flag saying there are no NaNs
(-ffinite-math-only for instance).


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (2 preceding siblings ...)
  2014-07-08 15:27 ` glisse at gcc dot gnu.org
@ 2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
  2014-07-08 15:46 ` rguenth at gcc dot gnu.org
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: vincenzo.innocente at cern dot ch @ 2014-07-08 15:35 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #4 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
confirm that
-ffinite-math-only -fno-signed-zeros
is equivalent to Ofast in this case
so we conclude that the code generated at O2 is wrong and
-ffinite-math-only -fno-signed-zeros
is required to trigger min/max?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (3 preceding siblings ...)
  2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
@ 2014-07-08 15:46 ` rguenth at gcc dot gnu.org
  2021-12-13  0:13 ` pinskia at gcc dot gnu.org
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: rguenth at gcc dot gnu.org @ 2014-07-08 15:46 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #5 from Richard Biener <rguenth at gcc dot gnu.org> ---
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
;; isn't really correct, as those rtl operators aren't defined when
;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.

(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
  [(set (match_operand:VF 0 "register_operand")
        (smaxmin:VF
          (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
          (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
  "TARGET_SSE && <mask_mode512bit_condition> &&
<round_saeonly_mode512bit_condition>"
{
  if (!flag_finite_math_only)
    operands[1] = force_reg (<MODE>mode, operands[1]);
  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
})

and

;; These versions of the min/max patterns implement exactly the operations
;;   min = (op1 < op2 ? op1 : op2)
;;   max = (!(op1 < op2) ? op1 : op2)
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.

(define_insn "*ieee_smin<mode>3"
  [(set (match_operand:VF 0 "register_operand" "=v,v")
        (unspec:VF
          [(match_operand:VF 1 "register_operand" "0,v")
           (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
         UNSPEC_IEEE_MIN))]
  "TARGET_SSE"
...

maybe explain the -O2 code.  Note that the middle-end uses min/max
regardless of flags and makes it the targets responsibility to disable
instructions that don't conform to IEEE.

The above suggests that a>b ? a : b isn't IEEE conform on x86.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (4 preceding siblings ...)
  2014-07-08 15:46 ` rguenth at gcc dot gnu.org
@ 2021-12-13  0:13 ` pinskia at gcc dot gnu.org
  2023-07-18 11:15 ` rguenth at gcc dot gnu.org
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: pinskia at gcc dot gnu.org @ 2021-12-13  0:13 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
     Ever confirmed|0                           |1
           Severity|normal                      |enhancement
   Last reconfirmed|                            |2021-12-13
             Status|UNCONFIRMED                 |NEW

--- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Confirmed.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (5 preceding siblings ...)
  2021-12-13  0:13 ` pinskia at gcc dot gnu.org
@ 2023-07-18 11:15 ` rguenth at gcc dot gnu.org
  2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-18 11:15 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Assignee|unassigned at gcc dot gnu.org      |rguenth at gcc dot gnu.org
             Status|NEW                         |ASSIGNED

--- Comment #7 from Richard Biener <rguenth at gcc dot gnu.org> ---
The cases with constant arguments fail to be recognized by the x86 conditional
move expansion because RTL expansion makes it too difficult to see they are
equal where required.  That is emit_conditional_move forcing the constant
to two different regs via prepare_cmp_insn.

I'm testing a patch for this.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (6 preceding siblings ...)
  2023-07-18 11:15 ` rguenth at gcc dot gnu.org
@ 2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
  2023-07-20  7:52 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-07-20  7:51 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #8 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:

https://gcc.gnu.org/g:ceae1400cf24f329393e96dd9720b0391afe858d

commit r14-2667-gceae1400cf24f329393e96dd9720b0391afe858d
Author: Richard Biener <rguenther@suse.de>
Date:   Tue Jul 18 13:19:11 2023 +0200

    middle-end/61747 - conditional move expansion and constants

    When expanding a COND_EXPR or a VEC_COND_EXPR the x86 backend for
    example tries to match FP min/max instructions.  But this only
    works when it can see the equality of the comparison and selected
    operands.  This breaks in both prepare_cmp_insn and vector_compare_rtx
    where the former forces expensive constants to a register and the
    latter performs legitimization.  The patch below fixes this in
    the caller preserving former equalities.

            PR middle-end/61747
            * internal-fn.cc (expand_vec_cond_optab_fn): When the
            value operands are equal to the original comparison operands
            preserve that equality by re-using the comparison expansion.
            * optabs.cc (emit_conditional_move): When the value operands
            are equal to the comparison operands and would be forced to
            a register by prepare_cmp_insn do so earlier, preserving the
            equality.

            * g++.target/i386/pr61747.C: New testcase.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (7 preceding siblings ...)
  2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
@ 2023-07-20  7:52 ` rguenth at gcc dot gnu.org
  2023-07-21  5:37 ` pinskia at gcc dot gnu.org
  2023-07-21  6:21 ` rguenther at suse dot de
  10 siblings, 0 replies; 12+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-20  7:52 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|---                         |FIXED
   Target Milestone|---                         |14.0
             Status|ASSIGNED                    |RESOLVED

--- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
Fixed for GCC 14.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (8 preceding siblings ...)
  2023-07-20  7:52 ` rguenth at gcc dot gnu.org
@ 2023-07-21  5:37 ` pinskia at gcc dot gnu.org
  2023-07-21  6:21 ` rguenther at suse dot de
  10 siblings, 0 replies; 12+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-07-21  5:37 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #10 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(In reply to CVS Commits from comment #8)
>             * g++.target/i386/pr61747.C: New testcase.

The testcase fails now, I don't know what caused it to fail though:
FAIL: g++.target/i386/pr61747.C  -std=gnu++14  scan-assembler-times max 4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [Bug tree-optimization/61747] min,max pattern not always properly optimized (for sse4 targets)
  2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
                   ` (9 preceding siblings ...)
  2023-07-21  5:37 ` pinskia at gcc dot gnu.org
@ 2023-07-21  6:21 ` rguenther at suse dot de
  10 siblings, 0 replies; 12+ messages in thread
From: rguenther at suse dot de @ 2023-07-21  6:21 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747

--- Comment #11 from rguenther at suse dot de <rguenther at suse dot de> ---
On Fri, 21 Jul 2023, pinskia at gcc dot gnu.org wrote:

> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61747
> 
> --- Comment #10 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
> (In reply to CVS Commits from comment #8)
> >             * g++.target/i386/pr61747.C: New testcase.
> 
> The testcase fails now, I don't know what caused it to fail though:
> FAIL: g++.target/i386/pr61747.C  -std=gnu++14  scan-assembler-times max 4

I failed to update it before pushing, it will be fixed with the next
push I do (currently re-testing)

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-07-21  6:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-08 13:34 [Bug tree-optimization/61747] New: min,max pattern not always properly optimized (for sse4 targets) vincenzo.innocente at cern dot ch
2014-07-08 14:57 ` [Bug tree-optimization/61747] " glisse at gcc dot gnu.org
2014-07-08 15:21 ` vincenzo.innocente at cern dot ch
2014-07-08 15:27 ` glisse at gcc dot gnu.org
2014-07-08 15:35 ` vincenzo.innocente at cern dot ch
2014-07-08 15:46 ` rguenth at gcc dot gnu.org
2021-12-13  0:13 ` pinskia at gcc dot gnu.org
2023-07-18 11:15 ` rguenth at gcc dot gnu.org
2023-07-20  7:51 ` cvs-commit at gcc dot gnu.org
2023-07-20  7:52 ` rguenth at gcc dot gnu.org
2023-07-21  5:37 ` pinskia at gcc dot gnu.org
2023-07-21  6:21 ` rguenther at suse dot de

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).