[gcc r12-5119] aarch64: Tweak FMAX/FMIN iterators

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-5119] aarch64: Tweak FMAX/FMIN iterators
@ 2021-11-10 12:38 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2021-11-10 12:38 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6d331688fcb69e9aae84bb94cb7cc54641a90ab6

commit r12-5119-g6d331688fcb69e9aae84bb94cb7cc54641a90ab6
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Wed Nov 10 12:38:43 2021 +0000

    aarch64: Tweak FMAX/FMIN iterators
    
    There was some duplication between the maxmin_uns (uns for unspec
    rather than unsigned) int attribute and the optab int attribute.
    The difficulty for FMAXNM and FMINNM is that the instructions
    really correspond to two things: the smax/smin optabs for floats
    (used only for fast-math-like flags) and the fmax/fmin optabs
    (used for built-in functions).  The optab attribute was
    consistently for the former but maxmin_uns had a mixture of both.
    
    This patch renames maxmin_uns to fmaxmin and only uses it
    for the fmax and fmin optabs.  The reductions that previously
    used the maxmin_uns attribute now use the optab attribute instead.
    
    FMAX and FMIN are awkward in that they don't correspond to any
    optab.  It's nevertheless useful to define them alongside the
    “real” optabs.  Previously they were known as “smax_nan” and
    “smin_nan”, but the problem with those names it that smax and
    smin are only used for floats if NaNs don't matter.  This patch
    therefore uses fmax_nan and fmin_nan instead.
    
    There is still some inconsistency, in that the optab attribute
    handles UNSPEC_COND_FMAX but the fmaxmin attribute handles
    UNSPEC_FMAX.  This is because the SVE FP instructions, being
    predicated, have to use unspecs in cases where the Advanced
    SIMD ones could use rtl codes.
    
    At least there are no duplicate entries though, so this seemed
    like the best compromise for now.
    
    gcc/
            * config/aarch64/iterators.md (optab): Use fmax_nan instead of
            smax_nan and fmin_nan instead of smin_nan.
            (maxmin_uns): Rename to...
            (fmaxmin): ...this and make the same changes.  Remove entries
            unrelated to fmax* and fmin*.
            * config/aarch64/aarch64.md (<maxmin_uns><mode>3): Rename to...
            (<fmaxmin><mode>3): ...this.
            * config/aarch64/aarch64-simd.md (aarch64_<maxmin_uns>p<mode>):
            Rename to...
            (aarch64_<optab>p<mode>): ...this.
            (<maxmin_uns><mode>3): Rename to...
            (<fmaxmin><mode>3): ...this.
            (reduc_<maxmin_uns>_scal_<mode>): Rename to...
            (reduc_<optab>_scal_<mode>): ...this and update gen* call.
            (aarch64_reduc_<maxmin_uns>_internal<mode>): Rename to...
            (aarch64_reduc_<optab>_internal<mode>): ...this.
            (aarch64_reduc_<maxmin_uns>_internalv2si): Rename to...
            (aarch64_reduc_<optab>_internalv2si): ...this.
            * config/aarch64/aarch64-sve.md (<maxmin_uns><mode>3): Rename to...
            (<fmaxmin><mode>3): ...this.
            * config/aarch64/aarch64-simd-builtins.def (smax_nan, smin_nan)
            Rename to...
            (fmax_nan, fmin_nan): ...this.
            * config/aarch64/arm_neon.h (vmax_f32, vmax_f64, vmaxq_f32, vmaxq_f64)
            (vmin_f32, vmin_f64, vminq_f32, vminq_f64, vmax_f16, vmaxq_f16)
            (vmin_f16, vminq_f16): Update accordingly.

Diff:
---
 gcc/config/aarch64/aarch64-simd-builtins.def | 12 +++++-------
 gcc/config/aarch64/aarch64-simd.md           | 24 ++++++++++++------------
 gcc/config/aarch64/aarch64-sve.md            |  2 +-
 gcc/config/aarch64/aarch64.md                |  2 +-
 gcc/config/aarch64/arm_neon.h                | 24 ++++++++++++------------
 gcc/config/aarch64/iterators.md              | 26 ++++++++------------------
 6 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 4a7e2cf4125..9b0a6eceafe 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -502,21 +502,19 @@
   BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE)
   BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE)
 
-  /* Implemented by <maxmin_uns><mode>3.
-     smax variants map to fmaxnm,
-     smax_nan variants map to fmax.  */
+  /* Implemented by <optab><mode>3.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE)
-  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3, NONE)
-  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3, NONE)
 
-  /* Implemented by <maxmin_uns><mode>3.  */
+  /* Implemented by <fmaxmin><mode>3.  */
   BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP)
   BUILTIN_VHSDF_HSDF (BINOP, fmin, 3, FP)
+  BUILTIN_VHSDF_DF (BINOP, fmax_nan, 3, FP)
+  BUILTIN_VHSDF_DF (BINOP, fmin_nan, 3, FP)
 
-  /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
+  /* Implemented by aarch64_<optab>p<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE)
   BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE)
   BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index bff76e4b6e9..35d55a3e51e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1553,7 +1553,7 @@
 })
 
 ;; Pairwise Integer Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
 			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
@@ -1564,7 +1564,7 @@
 )
 
 ;; Pairwise FP Max/Min operations.
-(define_insn "aarch64_<maxmin_uns>p<mode>"
+(define_insn "aarch64_<optab>p<mode>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
@@ -3488,7 +3488,7 @@
 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
@@ -3622,7 +3622,7 @@
 
 ;; Template for outputting a scalar, so we can create __builtins which can be
 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
 		  FMAXMINV)]
@@ -3630,15 +3630,15 @@
   {
     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
     rtx scratch = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
-							      operands[1]));
+    emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+							 operands[1]));
     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
     DONE;
   }
 )
 
 ;; Likewise for integer cases, signed and unsigned.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
 		    MAXMINV)]
@@ -3646,14 +3646,14 @@
   {
     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
     rtx scratch = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
-							      operands[1]));
+    emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
+							 operands[1]));
     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
     DONE;
   }
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
 		    MAXMINV))]
@@ -3662,7 +3662,7 @@
   [(set_attr "type" "neon_reduc_minmax<q>")]
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
+(define_insn "aarch64_reduc_<optab>_internalv2si"
  [(set (match_operand:V2SI 0 "register_operand" "=w")
        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
 		    MAXMINV))]
@@ -3671,7 +3671,7 @@
   [(set_attr "type" "neon_reduc_minmax")]
 )
 
-(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
+(define_insn "aarch64_reduc_<optab>_internal<mode>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
 		      FMAXMINV))]
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 8fe4c721313..5de479e141a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6288,7 +6288,7 @@
 
 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
 ;; smin/smax rtx codes are handled in the generic section above.
-(define_expand "<maxmin_uns><mode>3"
+(define_expand "<fmaxmin><mode>3"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
 	  [(match_dup 3)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4035e061706..5297b2d3f95 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6475,7 +6475,7 @@
 ;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<maxmin_uns><mode>3"
+(define_insn "<fmaxmin><mode>3"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
 	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
 		     (match_operand:GPF_F16 2 "register_operand" "w")]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 398a2e3a021..2e64f079833 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -18264,7 +18264,7 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv2sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
@@ -18272,7 +18272,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+      { __builtin_aarch64_fmax_nandf (vget_lane_f64 (__a, 0),
 				      vget_lane_f64 (__b, 0)) };
 }
 
@@ -18325,14 +18325,14 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4sf (__a, __b);
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smax_nanv2df (__a, __b);
+  return __builtin_aarch64_fmax_nanv2df (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
@@ -19003,7 +19003,7 @@ __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv2sf (__a, __b);
 }
 
 __extension__ extern __inline float64x1_t
@@ -19011,7 +19011,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f64 (float64x1_t __a, float64x1_t __b)
 {
     return (float64x1_t)
-	  { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+	  { __builtin_aarch64_fmin_nandf (vget_lane_f64 (__a, 0),
 					  vget_lane_f64 (__b, 0)) };
 }
 
@@ -19064,14 +19064,14 @@ __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4sf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4sf (__a, __b);
 }
 
 __extension__ extern __inline float64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smin_nanv2df (__a, __b);
+  return __builtin_aarch64_fmin_nanv2df (__a, __b);
 }
 
 __extension__ extern __inline int8x16_t
@@ -29131,14 +29131,14 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmax_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smax_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv4hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmaxq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smax_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmax_nanv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x4_t
@@ -29159,14 +29159,14 @@ __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vmin_f16 (float16x4_t __a, float16x4_t __b)
 {
-  return __builtin_aarch64_smin_nanv4hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv4hf (__a, __b);
 }
 
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vminq_f16 (float16x8_t __a, float16x8_t __b)
 {
-  return __builtin_aarch64_smin_nanv8hf (__a, __b);
+  return __builtin_aarch64_fmin_nanv8hf (__a, __b);
 }
 
 __extension__ extern __inline float16x4_t
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bdc8ba3576c..e8eebd863a6 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3189,9 +3189,9 @@
 			(UNSPEC_COND_FCVTZS "fix_trunc")
 			(UNSPEC_COND_FCVTZU "fixuns_trunc")
 			(UNSPEC_COND_FDIV "div")
-			(UNSPEC_COND_FMAX "smax_nan")
+			(UNSPEC_COND_FMAX "fmax_nan")
 			(UNSPEC_COND_FMAXNM "smax")
-			(UNSPEC_COND_FMIN "smin_nan")
+			(UNSPEC_COND_FMIN "fmin_nan")
 			(UNSPEC_COND_FMINNM "smin")
 			(UNSPEC_COND_FMLA "fma")
 			(UNSPEC_COND_FMLS "fnma")
@@ -3214,22 +3214,12 @@
 			(UNSPEC_COND_SCVTF "float")
 			(UNSPEC_COND_UCVTF "floatuns")])
 
-(define_int_attr  maxmin_uns [(UNSPEC_UMAXV "umax")
-			      (UNSPEC_UMINV "umin")
-			      (UNSPEC_SMAXV "smax")
-			      (UNSPEC_SMINV "smin")
-			      (UNSPEC_FMAX  "smax_nan")
-			      (UNSPEC_FMAXNMV "smax")
-			      (UNSPEC_FMAXV "smax_nan")
-			      (UNSPEC_FMIN "smin_nan")
-			      (UNSPEC_FMINNMV "smin")
-			      (UNSPEC_FMINV "smin_nan")
-			      (UNSPEC_FMAXNM "fmax")
-			      (UNSPEC_FMINNM "fmin")
-			      (UNSPEC_COND_FMAX "fmax_nan")
-			      (UNSPEC_COND_FMAXNM "fmax")
-			      (UNSPEC_COND_FMIN "fmin_nan")
-			      (UNSPEC_COND_FMINNM "fmin")])
+(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
+			  (UNSPEC_FMAXNM "fmax")
+			  (UNSPEC_FMIN "fmin_nan")
+			  (UNSPEC_FMINNM "fmin")
+			  (UNSPEC_COND_FMAXNM "fmax")
+			  (UNSPEC_COND_FMINNM "fmin")])
 
 (define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
 				 (UNSPEC_UMINV "umin")


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-10 12:38 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-10 12:38 [gcc r12-5119] aarch64: Tweak FMAX/FMIN iterators Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).