[gcc r12-5602] vect: Add support for fmax and fmin reductions

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-5602] vect: Add support for fmax and fmin reductions
@ 2021-11-30  9:53 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2021-11-30  9:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e32b9eb32d7cd2d39bf9c70497890ac61b9ee14c

commit r12-5602-ge32b9eb32d7cd2d39bf9c70497890ac61b9ee14c
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Tue Nov 30 09:52:25 2021 +0000

    vect: Add support for fmax and fmin reductions
    
    This patch adds support for reductions involving calls to fmax*()
    and fmin*(), without the -ffast-math flags that allow them to be
    converted to MAX_EXPR and MIN_EXPR.
    
    gcc/
            * doc/md.texi (reduc_fmin_scal_@var{m}): Document.
            (reduc_fmax_scal_@var{m}): Likewise.
            * optabs.def (reduc_fmax_scal_optab): New optab.
            (reduc_fmin_scal_optab): Likewise
            * internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions.
            * tree-vect-loop.c (reduction_fn_for_scalar_code): Handle
            CASE_CFN_FMAX and CASE_CFN_FMIN.
            (neutral_op_for_reduction): Likewise.
            (needs_fold_left_reduction_p): Likewise.
            * config/aarch64/iterators.md (FMAXMINV): New iterator.
            (fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV.
            * config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix
            unspec mode.
            (reduc_<fmaxmin>_scal_<mode>): New pattern.
            * config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>):
            Likewise.
    
    gcc/testsuite/
            * gcc.dg/vect/vect-fmax-1.c: New test.
            * gcc.dg/vect/vect-fmax-2.c: Likewise.
            * gcc.dg/vect/vect-fmax-3.c: Likewise.
            * gcc.dg/vect/vect-fmin-1.c: New test.
            * gcc.dg/vect/vect-fmin-2.c: Likewise.
            * gcc.dg/vect/vect-fmin-3.c: Likewise.
            * gcc.target/aarch64/fmaxnm_1.c: Likewise.
            * gcc.target/aarch64/fmaxnm_2.c: Likewise.
            * gcc.target/aarch64/fminnm_1.c: Likewise.
            * gcc.target/aarch64/fminnm_2.c: Likewise.
            * gcc.target/aarch64/sve/fmaxnm_2.c: Likewise.
            * gcc.target/aarch64/sve/fmaxnm_3.c: Likewise.
            * gcc.target/aarch64/sve/fminnm_2.c: Likewise.
            * gcc.target/aarch64/sve/fminnm_3.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md              | 15 ++++-
 gcc/config/aarch64/aarch64-sve.md               | 11 ++++
 gcc/config/aarch64/iterators.md                 |  4 ++
 gcc/doc/md.texi                                 |  8 +++
 gcc/internal-fn.def                             |  4 ++
 gcc/optabs.def                                  |  2 +
 gcc/testsuite/gcc.dg/vect/vect-fmax-1.c         | 83 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-fmax-2.c         |  7 ++
 gcc/testsuite/gcc.dg/vect/vect-fmax-3.c         | 83 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-fmin-1.c         | 86 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-fmin-2.c         |  9 +++
 gcc/testsuite/gcc.dg/vect/vect-fmin-3.c         | 83 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c     | 24 +++++++
 gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c     | 20 ++++++
 gcc/testsuite/gcc.target/aarch64/fminnm_1.c     | 24 +++++++
 gcc/testsuite/gcc.target/aarch64/fminnm_2.c     | 20 ++++++
 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c | 22 +++++++
 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c | 18 ++++++
 gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c | 22 +++++++
 gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c | 18 ++++++
 gcc/tree-vect-loop.c                            | 45 +++++++++++--
 21 files changed, 599 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1020cd9ee64..8e61dd9fcbb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3592,8 +3592,8 @@
 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
 (define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
-   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
-		  FMAXMINV)]
+   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+		 FMAXMINV)]
   "TARGET_SIMD"
   {
     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
@@ -3605,6 +3605,17 @@
   }
 )
 
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+		 FMAXMINNMV)]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
+    DONE;
+  }
+)
+
 ;; Likewise for integer cases, signed and unsigned.
 (define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 0f5bf5ea8cb..9ef968840c2 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8566,6 +8566,17 @@
   }
 )
 
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
+		 FMAXMINNMV)]
+  "TARGET_SVE"
+  {
+    emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
+    DONE;
+  }
+)
+
 ;; Predicated floating-point tree reductions.
 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
   [(set (match_operand:<VEL> 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 2c58d5570ae..0b34e7f7d16 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2519,6 +2519,8 @@
 (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
 			       UNSPEC_FMAXNMV UNSPEC_FMINNMV])
 
+(define_int_iterator FMAXMINNMV [UNSPEC_FMAXNMV UNSPEC_FMINNMV])
+
 (define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV])
 
 (define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP])
@@ -3225,8 +3227,10 @@
 
 (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
 			  (UNSPEC_FMAXNM "fmax")
+			  (UNSPEC_FMAXNMV "fmax")
 			  (UNSPEC_FMIN "fmin_nan")
 			  (UNSPEC_FMINNM "fmin")
+			  (UNSPEC_FMINNMV "fmin")
 			  (UNSPEC_COND_FMAXNM "fmax")
 			  (UNSPEC_COND_FMINNM "fmin")])
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 589f841ea74..8fd0f8d2fe1 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5400,6 +5400,14 @@ Find the unsigned minimum/maximum of the elements of a vector. The vector is
 operand 1, and operand 0 is the scalar result, with mode equal to the mode of
 the elements of the input vector.
 
+@cindex @code{reduc_fmin_scal_@var{m}} instruction pattern
+@cindex @code{reduc_fmax_scal_@var{m}} instruction pattern
+@item @samp{reduc_fmin_scal_@var{m}}, @samp{reduc_fmax_scal_@var{m}}
+Find the floating-point minimum/maximum of the elements of a vector,
+using the same rules as @code{fmin@var{m}3} and @code{fmax@var{m}3}.
+Operand 1 is a vector of mode @var{m} and operand 0 is the scalar
+result, which has mode @code{GET_MODE_INNER (@var{m})}.
+
 @cindex @code{reduc_plus_scal_@var{m}} instruction pattern
 @item @samp{reduc_plus_scal_@var{m}}
 Compute the sum of the elements of a vector. The vector is operand 1, and
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index bb4d8ab8096..acb0dbda556 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -216,6 +216,10 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
 			      reduc_smax_scal, reduc_umax_scal, unary)
 DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
 			      reduc_smin_scal, reduc_umin_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_FMAX, ECF_CONST | ECF_NOTHROW,
+		       reduc_fmax_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_FMIN, ECF_CONST | ECF_NOTHROW,
+		       reduc_fmin_scal, unary)
 DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW,
 		       reduc_and_scal, unary)
 DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index e25f4c9a346..cef6054b378 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -335,6 +335,8 @@ OPTAB_D (fmax_optab, "fmax$a3")
 OPTAB_D (fmin_optab, "fmin$a3")
 
 /* Vector reduction to a scalar.  */
+OPTAB_D (reduc_fmax_scal_optab, "reduc_fmax_scal_$a")
+OPTAB_D (reduc_fmin_scal_optab, "reduc_fmin_scal_$a")
 OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
 OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
 OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
new file mode 100644
index 00000000000..841ffab5666
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+#ifndef TYPE
+#define TYPE float
+#define FN __builtin_fmaxf
+#endif
+
+TYPE __attribute__((noipa))
+test (TYPE x, TYPE *ptr, int n)
+{
+  for (int i = 0; i < n; ++i)
+    x = FN (x, ptr[i]);
+  return x;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+  check_vect ();
+
+  TYPE a[N];
+
+  for (int i = 0; i < N; ++i)
+    a[i] = i;
+
+  if (test (-1, a, 1) != 0)
+    __builtin_abort ();
+  if (test (-1, a, 64) != 63)
+    __builtin_abort ();
+  if (test (-1, a, 65) != 64)
+    __builtin_abort ();
+  if (test (-1, a, 66) != 65)
+    __builtin_abort ();
+  if (test (-1, a, 67) != 66)
+    __builtin_abort ();
+  if (test (-1, a, 128) != 127)
+    __builtin_abort ();
+  if (test (127, a, 128) != 127)
+    __builtin_abort ();
+  if (test (128, a, 128) != 128)
+    __builtin_abort ();
+
+  for (int i = 0; i < N; ++i)
+    a[i] = -i;
+
+  if (test (-60, a, 4) != 0)
+    __builtin_abort ();
+  if (test (0, a, 4) != 0)
+    __builtin_abort ();
+  if (test (1, a, 4) != 1)
+    __builtin_abort ();
+
+  for (int i = 0; i < HALF; ++i)
+    {
+      a[i] = i;
+      a[HALF + i] = HALF - i;
+    }
+
+  if (test (0, a, HALF - 16) != HALF - 17)
+    __builtin_abort ();
+  if (test (0, a, HALF - 2) != HALF - 3)
+    __builtin_abort ();
+  if (test (0, a, HALF - 1) != HALF - 2)
+    __builtin_abort ();
+  if (test (0, a, HALF) != HALF - 1)
+    __builtin_abort ();
+  if (test (0, a, HALF + 1) != HALF)
+    __builtin_abort ();
+  if (test (0, a, HALF + 2) != HALF)
+    __builtin_abort ();
+  if (test (0, a, HALF + 3) != HALF)
+    __builtin_abort ();
+  if (test (0, a, HALF + 16) != HALF)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
new file mode 100644
index 00000000000..3d1f64416d5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
@@ -0,0 +1,7 @@
+#define TYPE double
+#define FN __builtin_fmax
+
+#include "vect-fmax-1.c"
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
new file mode 100644
index 00000000000..f711ed0563e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
+{
+  for (int i = 0; i < n; i += 2)
+    {
+      x0 = __builtin_fmax (x0, ptr[i + 0]);
+      x1 = __builtin_fmax (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+  check_vect ();
+
+  double res[2], a[N];
+
+  for (int i = 0; i < N; i += 2)
+    {
+      a[i] = i < HALF ? i : HALF;
+      a[i + 1] = i / 8;
+    }
+
+  test (-1, -1, res, a, 2);
+  if (res[0] != 0 || res[1] != 0)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, 6);
+  if (res[0] != 4 || res[1] != 0)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, 8);
+  if (res[0] != 6 || res[1] != 0)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, 10);
+  if (res[0] != 8 || res[1] != 1)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, HALF - 2);
+  if (res[0] != HALF - 4 || res[1] != HALF / 8 - 1)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, HALF);
+  if (res[0] != HALF - 2 || res[1] != HALF / 8 - 1)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, HALF + 2);
+  if (res[0] != HALF || res[1] != HALF / 8)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, HALF + 8);
+  if (res[0] != HALF || res[1] != HALF / 8)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, HALF + 10);
+  if (res[0] != HALF || res[1] != HALF / 8 + 1)
+    __builtin_abort ();
+
+  test (-1, -1, res, a, N);
+  if (res[0] != HALF || res[1] != N / 8 - 1)
+    __builtin_abort ();
+
+  test (HALF + 1, -1, res, a, N);
+  if (res[0] != HALF + 1 || res[1] != N / 8 - 1)
+    __builtin_abort ();
+
+  test (HALF + 1, N, res, a, N);
+  if (res[0] != HALF + 1 || res[1] != N)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
new file mode 100644
index 00000000000..3d5f843a9db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
@@ -0,0 +1,86 @@
+#include "tree-vect.h"
+
+#ifndef TYPE
+#define TYPE float
+#define FN __builtin_fminf
+#endif
+
+TYPE __attribute__((noipa))
+test (TYPE x, TYPE *ptr, int n)
+{
+  for (int i = 0; i < n; ++i)
+    x = FN (x, ptr[i]);
+  return x;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+  check_vect ();
+
+  TYPE a[N];
+
+  for (int i = 0; i < N; ++i)
+    a[i] = -i;
+
+  if (test (1, a, 1) != 0)
+    __builtin_abort ();
+  if (test (1, a, 64) != -63)
+    __builtin_abort ();
+  if (test (1, a, 65) != -64)
+    __builtin_abort ();
+  if (test (1, a, 66) != -65)
+    __builtin_abort ();
+  if (test (1, a, 67) != -66)
+    __builtin_abort ();
+  if (test (1, a, 128) != -127)
+    __builtin_abort ();
+  if (test (-127, a, 128) != -127)
+    __builtin_abort ();
+  if (test (-128, a, 128) != -128)
+    __builtin_abort ();
+
+  for (int i = 0; i < N; ++i)
+    a[i] = i;
+
+  if (test (1, a, 4) != 0)
+    __builtin_abort ();
+  if (test (0, a, 4) != 0)
+    __builtin_abort ();
+  if (test (-1, a, 4) != -1)
+    __builtin_abort ();
+
+  for (int i = 0; i < HALF; ++i)
+    {
+      a[i] = HALF - i;
+      a[HALF + i] = i;
+    }
+
+  if (test (N, a, HALF - 16) != 17)
+    __builtin_abort ();
+  if (test (N, a, HALF - 2) != 3)
+    __builtin_abort ();
+  if (test (N, a, HALF - 1) != 2)
+    __builtin_abort ();
+  if (test (N, a, HALF) != 1)
+    __builtin_abort ();
+  if (test (N, a, HALF + 1) != 0)
+    __builtin_abort ();
+  if (test (N, a, HALF + 2) != 0)
+    __builtin_abort ();
+  if (test (N, a, HALF + 3) != 0)
+    __builtin_abort ();
+  if (test (N, a, HALF + 16) != 0)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
new file mode 100644
index 00000000000..21e45cca55a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
@@ -0,0 +1,9 @@
+#ifndef TYPE
+#define TYPE double
+#define FN __builtin_fmin
+#endif
+
+#include "vect-fmin-1.c"
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
new file mode 100644
index 00000000000..cc38bf43909
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
+{
+  for (int i = 0; i < n; i += 2)
+    {
+      x0 = __builtin_fmin (x0, ptr[i + 0]);
+      x1 = __builtin_fmin (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+  check_vect ();
+
+  double res[2], a[N];
+
+  for (int i = 0; i < N; i += 2)
+    {
+      a[i] = i < HALF ? HALF - i : 0;
+      a[i + 1] = -i / 8;
+    }
+
+  test (N, N, res, a, 2);
+  if (res[0] != HALF || res[1] != 0)
+    __builtin_abort ();
+
+  test (N, N, res, a, 6);
+  if (res[0] != HALF - 4 || res[1] != 0)
+    __builtin_abort ();
+
+  test (N, N, res, a, 8);
+  if (res[0] != HALF - 6 || res[1] != 0)
+    __builtin_abort ();
+
+  test (N, N, res, a, 10);
+  if (res[0] != HALF - 8 || res[1] != -1)
+    __builtin_abort ();
+
+  test (N, N, res, a, HALF - 2);
+  if (res[0] != 4 || res[1] != -HALF / 8 + 1)
+    __builtin_abort ();
+
+  test (N, N, res, a, HALF);
+  if (res[0] != 2 || res[1] != -HALF / 8 + 1)
+    __builtin_abort ();
+
+  test (N, N, res, a, HALF + 2);
+  if (res[0] != 0 || res[1] != -HALF / 8)
+    __builtin_abort ();
+
+  test (N, N, res, a, HALF + 8);
+  if (res[0] != 0 || res[1] != -HALF / 8)
+    __builtin_abort ();
+
+  test (N, N, res, a, HALF + 10);
+  if (res[0] != 0 || res[1] != -HALF / 8 - 1)
+    __builtin_abort ();
+
+  test (N, N, res, a, N);
+  if (res[0] != 0 || res[1] != -N / 8 + 1)
+    __builtin_abort ();
+
+  test (-1, N, res, a, N);
+  if (res[0] != -1 || res[1] != -N / 8 + 1)
+    __builtin_abort ();
+
+  test (-1, -N / 8, res, a, N);
+  if (res[0] != -1 || res[1] != -N / 8)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
new file mode 100644
index 00000000000..40c36c7a3dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+float
+f1 (float x, float *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmaxf (x, ptr[i]);
+  return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmax (x, ptr[i]);
+  return x;
+}
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
new file mode 100644
index 00000000000..6e48ac8eeee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+  double x0 = res[0];
+  double x1 = res[1];
+  for (int i = 0; i < 128; i += 2)
+    {
+      x0 = __builtin_fmax (x0, ptr[i + 0]);
+      x1 = __builtin_fmax (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c
new file mode 100644
index 00000000000..1cf372b2a6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+float
+f1 (float x, float *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fminf (x, ptr[i]);
+  return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmin (x, ptr[i]);
+  return x;
+}
+
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c
new file mode 100644
index 00000000000..543e1884051
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+  double x0 = res[0];
+  double x1 = res[1];
+  for (int i = 0; i < 128; i += 2)
+    {
+      x0 = __builtin_fmin (x0, ptr[i + 0]);
+      x1 = __builtin_fmin (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
new file mode 100644
index 00000000000..ee3cdc20f96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
@@ -0,0 +1,22 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+float
+f1 (float x, float *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmaxf (x, ptr[i]);
+  return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmax (x, ptr[i]);
+  return x;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfmaxnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
new file mode 100644
index 00000000000..a8eee0f4b26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+  double x0 = res[0];
+  double x1 = res[1];
+  for (int i = 0; i < 128; i += 2)
+    {
+      x0 = __builtin_fmax (x0, ptr[i + 0]);
+      x1 = __builtin_fmax (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
new file mode 100644
index 00000000000..10aced05f1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
@@ -0,0 +1,22 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+float
+f1 (float x, float *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fminf (x, ptr[i]);
+  return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+  for (int i = 0; i < 128; ++i)
+    x = __builtin_fmin (x, ptr[i]);
+  return x;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfminnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
new file mode 100644
index 00000000000..80ad0160249
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+  double x0 = res[0];
+  double x1 = res[1];
+  for (int i = 0; i < 128; i += 2)
+    {
+      x0 = __builtin_fmin (x0, ptr[i + 0]);
+      x1 = __builtin_fmin (x1, ptr[i + 1]);
+    }
+  res[0] = x0;
+  res[1] = x1;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b1198e1a9ef..841da78f1fd 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3185,9 +3185,22 @@ reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
 	return true;
 
       default:
-	break;
-    }
-  return false;
+	return false;
+      }
+  else
+    switch (combined_fn (code))
+      {
+      CASE_CFN_FMAX:
+	*reduc_fn = IFN_REDUC_FMAX;
+	return true;
+
+      CASE_CFN_FMIN:
+	*reduc_fn = IFN_REDUC_FMIN;
+	return true;
+
+      default:
+	return false;
+      }
 }
 
 /* If there is a neutral value X such that a reduction would not be affected
@@ -3223,9 +3236,18 @@ neutral_op_for_reduction (tree scalar_type, code_helper code,
 	return initial_value;
 
       default:
-	break;
+	return NULL_TREE;
+      }
+  else
+    switch (combined_fn (code))
+      {
+      CASE_CFN_FMIN:
+      CASE_CFN_FMAX:
+	return initial_value;
+
+      default:
+	return NULL_TREE;
       }
-  return NULL_TREE;
 }
 
 /* Error reporting helper for vect_is_simple_reduction below.  GIMPLE statement
@@ -3255,9 +3277,18 @@ needs_fold_left_reduction_p (tree type, code_helper code)
 	    return false;
 
 	  default:
-	    break;
+	    return !flag_associative_math;
+	  }
+      else
+	switch (combined_fn (code))
+	  {
+	  CASE_CFN_FMIN:
+	  CASE_CFN_FMAX:
+	    return false;
+
+	  default:
+	    return !flag_associative_math;
 	  }
-      return !flag_associative_math;
     }
 
   if (INTEGRAL_TYPE_P (type))


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-30  9:53 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-30  9:53 [gcc r12-5602] vect: Add support for fmax and fmin reductions Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).