public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] RISCV-V: Suport FP floor auto-vectorization
@ 2023-09-22  6:23 pan2.li
  2023-09-23  0:40 ` [PATCH v2] RISC-V: " pan2.li
  2023-09-23  1:19 ` [PATCH v3] " pan2.li
  0 siblings, 2 replies; 7+ messages in thread
From: pan2.li @ 2023-09-22  6:23 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.

When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+

All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm        a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (floor<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_floor): New function decl.
	* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
	(expand_vec_floor): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/math-floor-0.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-run-0.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-floor-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   | 11 ++++
 gcc/config/riscv/riscv-protos.h               |  5 ++
 gcc/config/riscv/riscv-v.cc                   | 36 +++++++++++-
 .../riscv/rvv/autovec/math-floor-0.c          | 26 +++++++++
 .../riscv/rvv/autovec/math-floor-1.c          | 26 +++++++++
 .../riscv/rvv/autovec/math-floor-2.c          | 26 +++++++++
 .../riscv/rvv/autovec/math-floor-3.c          | 28 ++++++++++
 .../riscv/rvv/autovec/math-floor-run-0.c      | 39 +++++++++++++
 .../riscv/rvv/autovec/math-floor-run-1.c      | 39 +++++++++++++
 .../riscv/rvv/autovec/math-floor-run-2.c      | 39 +++++++++++++
 .../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
 11 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index b92cb7a5d0f..9ba20e27cf1 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2245,6 +2245,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ceil/ceilf
+;; - floor/floorf
 ;; -------------------------------------------------------------------------
 (define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2255,3 +2256,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
 )
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 07b4ffe3edf..04e26c957d7 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
 
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
 };
 
 enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -437,6 +441,7 @@ void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
 void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index f63dec573ef..8eb05b32ef2 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,8 +323,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3508,6 +3510,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
 }
 
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
 static rtx
 expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
 			   machine_mode vec_fp_mode)
@@ -3568,7 +3577,30 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   icode = code_for_pred (FLOAT, vec_fp_mode);
   emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
 
-  /* Step-4: Retrieve the sign bit.  */
+  /* Step-4: Retrieve the sign bit for -0.0.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		  machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RDN, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the floor result.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RDN, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit for -0.0.  */
   expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-0.c
new file mode 100644
index 00000000000..a9095e0222f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-0.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-1.c
new file mode 100644
index 00000000000..3cab1597f02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-2.c
new file mode 100644
index 00000000000..9b0a30fd217
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-3.c
new file mode 100644
index 00000000000..b1bd8df0bbc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-0.c
new file mode 100644
index 00000000000..6f017887603
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -std=c2x -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1023.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-1.c
new file mode 100644
index 00000000000..25df3f89fa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-2.c
new file mode 100644
index 00000000000..7090b95cd2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] RISC-V: Suport FP floor auto-vectorization
  2023-09-22  6:23 [PATCH v1] RISCV-V: Suport FP floor auto-vectorization pan2.li
@ 2023-09-23  0:40 ` pan2.li
  2023-09-23  0:42   ` 钟居哲
  2023-09-23  1:19 ` [PATCH v3] " pan2.li
  1 sibling, 1 reply; 7+ messages in thread
From: pan2.li @ 2023-09-23  0:40 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.

When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+

All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (floor<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_floor): New function decl.
	* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
	(expand_vec_floor): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   | 11 ++++
 gcc/config/riscv/riscv-protos.h               |  5 ++
 gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
 .../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
 .../riscv/rvv/autovec/unop/math-floor-run-0.c | 39 +++++++++++++
 .../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
 .../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
 .../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
 11 files changed, 316 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ceil/ceilf
+;; - floor/floorf
 ;; -------------------------------------------------------------------------
 (define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
 )
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
 
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
 };
 
 enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
 void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
 }
 
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
 static rtx
 emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
 			 machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
 
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		  machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
new file mode 100644
index 00000000000..abd45d30b06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c2x -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1024.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] RISC-V: Suport FP floor auto-vectorization
  2023-09-23  0:40 ` [PATCH v2] RISC-V: " pan2.li
@ 2023-09-23  0:42   ` 钟居哲
  2023-09-23  1:02     ` Li, Pan2
  0 siblings, 1 reply; 7+ messages in thread
From: 钟居哲 @ 2023-09-23  0:42 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 21308 bytes --]

LGTM. But I think you should remove FP16 run tests.

So plz send a patch first remove FP16 run test of CEIL first.



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-23 08:40
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v2] RISC-V: Suport FP floor auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.
 
When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+
 
All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (floor<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_floor): New function decl.
* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
(expand_vec_floor): Ditto.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   | 11 ++++
gcc/config/riscv/riscv-protos.h               |  5 ++
gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
.../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
.../riscv/rvv/autovec/unop/math-floor-run-0.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
.../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
11 files changed, 316 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
;; -------------------------------------------------------------------------
;; Includes:
;; - ceil/ceilf
+;; - floor/floorf
;; -------------------------------------------------------------------------
(define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
)
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
};
enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+   machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
new file mode 100644
index 00000000000..abd45d30b06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c2x -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1024.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] RISC-V: Suport FP floor auto-vectorization
  2023-09-23  0:42   ` 钟居哲
@ 2023-09-23  1:02     ` Li, Pan2
  0 siblings, 0 replies; 7+ messages in thread
From: Li, Pan2 @ 2023-09-23  1:02 UTC (permalink / raw)
  To: 钟居哲, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 21928 bytes --]

Sure.

Pan

From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: Saturday, September 23, 2023 8:42 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Suport FP floor auto-vectorization

LGTM. But I think you should remove FP16 run tests.

So plz send a patch first remove FP16 run test of CEIL first.

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-23 08:40
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v2] RISC-V: Suport FP floor auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.

When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+

All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (floor<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_floor): New function decl.
* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
(expand_vec_floor): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   | 11 ++++
gcc/config/riscv/riscv-protos.h               |  5 ++
gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
.../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
.../riscv/rvv/autovec/unop/math-floor-run-0.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
.../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
11 files changed, 316 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
;; -------------------------------------------------------------------------
;; Includes:
;; - ceil/ceilf
+;; - floor/floorf
;; -------------------------------------------------------------------------
(define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
)
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
};
enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+   machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
new file mode 100644
index 00000000000..abd45d30b06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c2x -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1024.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_floorf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
--
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3] RISC-V: Suport FP floor auto-vectorization
  2023-09-22  6:23 [PATCH v1] RISCV-V: Suport FP floor auto-vectorization pan2.li
  2023-09-23  0:40 ` [PATCH v2] RISC-V: " pan2.li
@ 2023-09-23  1:19 ` pan2.li
  2023-09-23  1:39   ` 钟居哲
  1 sibling, 1 reply; 7+ messages in thread
From: pan2.li @ 2023-09-23  1:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.

When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+

All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (floor<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_floor): New function decl.
	* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
	(expand_vec_floor): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   | 11 ++++
 gcc/config/riscv/riscv-protos.h               |  5 ++
 gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
 .../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
 .../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
 .../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
 .../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
 .../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
 10 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ceil/ceilf
+;; - floor/floorf
 ;; -------------------------------------------------------------------------
 (define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
 )
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
 
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
 };
 
 enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
 void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
 }
 
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
 static rtx
 emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
 			 machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
 
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		  machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] RISC-V: Suport FP floor auto-vectorization
  2023-09-23  1:19 ` [PATCH v3] " pan2.li
@ 2023-09-23  1:39   ` 钟居哲
  2023-09-23  1:45     ` Li, Pan2
  0 siblings, 1 reply; 7+ messages in thread
From: 钟居哲 @ 2023-09-23  1:39 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 19229 bytes --]

LGTM.



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-23 09:19
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v3] RISC-V: Suport FP floor auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.
 
When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+
 
All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (floor<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_floor): New function decl.
* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
(expand_vec_floor): Ditto.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   | 11 ++++
gcc/config/riscv/riscv-protos.h               |  5 ++
gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
.../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
.../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
.../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
10 files changed, 277 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
;; -------------------------------------------------------------------------
;; Includes:
;; - ceil/ceilf
+;; - floor/floorf
;; -------------------------------------------------------------------------
(define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
)
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
};
enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+   machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v3] RISC-V: Suport FP floor auto-vectorization
  2023-09-23  1:39   ` 钟居哲
@ 2023-09-23  1:45     ` Li, Pan2
  0 siblings, 0 replies; 7+ messages in thread
From: Li, Pan2 @ 2023-09-23  1:45 UTC (permalink / raw)
  To: 钟居哲, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 19868 bytes --]

Committed, thanks Juzhe.

Pan

From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: Saturday, September 23, 2023 9:40 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v3] RISC-V: Suport FP floor auto-vectorization

LGTM.

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-23 09:19
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v3] RISC-V: Suport FP floor auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to support auto-vectorization for the
floor API in math.h. It depends on the -ffast-math option.

When we would like to call floor/floorf like v2 = floor (v1), we will
convert it into below insns (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RDN
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+-------------+
  | raw float | binary layout | after floor |
  +-----------+---------------+-------------+
  | 8388607.5 | 0x4affffff    | 8388607.0   |
  | 8388608.0 | 0x4b000000    | 8388608.0   |
  | 8388609.0 | 0x4b000001    | 8388609.0   |
  +-----------+---------------+-------------+

All single floating point glte 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-floor-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi       2   // Rounding Down
.L4:
  vfabs.v     v1,v2
  vmflt.vf    v0,v1,fa5
  vfcvt.x.f.v v3,v2,v0.t
  vfcvt.f.x.v v1,v3,v0.t
  vfsgnj.vv   v1,v1,v2
  bne         .L4
.L14:
  fsrm        a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (floor<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_floor): New function decl.
* config/riscv/riscv-v.cc (gen_floor_const_fp): New function impl.
(expand_vec_floor): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/math-floor-0.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-2.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-3.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/math-floor-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   | 11 ++++
gcc/config/riscv/riscv-protos.h               |  5 ++
gcc/config/riscv/riscv-v.cc                   | 35 +++++++++++-
.../riscv/rvv/autovec/unop/math-floor-0.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-1.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-2.c     | 23 ++++++++
.../riscv/rvv/autovec/unop/math-floor-3.c     | 25 +++++++++
.../riscv/rvv/autovec/unop/math-floor-run-1.c | 39 +++++++++++++
.../riscv/rvv/autovec/unop/math-floor-run-2.c | 39 +++++++++++++
.../riscv/rvv/autovec/vls/math-floor-1.c      | 56 +++++++++++++++++++
10 files changed, 277 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 6f35fb1bd9e..a005e17457e 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2209,6 +2209,7 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
;; -------------------------------------------------------------------------
;; Includes:
;; - ceil/ceilf
+;; - floor/floorf
;; -------------------------------------------------------------------------
(define_expand "ceil<mode>2"
   [(match_operand:V_VLSF 0 "register_operand")
@@ -2219,3 +2220,13 @@ (define_expand "ceil<mode>2"
     DONE;
   }
)
+
+(define_expand "floor<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 34becfbaba8..63eb2475705 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -253,6 +253,9 @@ enum insn_flags : unsigned int
   /* Means INSN has FRM operand and the value is FRM_RUP.  */
   FRM_RUP_P = 1 << 16,
+
+  /* Means INSN has FRM operand and the value is FRM_RDN.  */
+  FRM_RDN_P = 1 << 17,
};
enum insn_type : unsigned int
@@ -294,6 +297,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
+  UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -455,6 +459,7 @@ void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 251d827d973..c2466b1354f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -326,8 +326,10 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
-    if (m_insn_flags & FRM_RUP_P)
+    else if (m_insn_flags & FRM_RUP_P)
       add_rounding_mode_operand (FRM_RUP);
+    else if (m_insn_flags & FRM_RDN_P)
+      add_rounding_mode_operand (FRM_RDN);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3556,6 +3558,13 @@ gen_ceil_const_fp (machine_mode inner_mode)
   return const_double_from_real_value (real, inner_mode);
}
+static rtx
+gen_floor_const_fp (machine_mode inner_mode)
+{
+  /* The floor needs the same floating point const as ceil.  */
+  return gen_ceil_const_fp (inner_mode);
+}
+
static rtx
emit_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
machine_mode vec_fp_mode)
@@ -3635,7 +3644,29 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
      to int conversion.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
-  /* Step-5: Retrieve the sign bit.  */
+  /* Step-5: Retrieve the sign bit for -0.0.  */
+  emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
+void
+expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+   machine_mode vec_int_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  emit_vec_abs (op_0, op_1, vec_fp_mode);
+
+  /* Step-2: Generate the mask on const fp.  */
+  rtx const_fp = gen_floor_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = emit_vec_float_cmp_mask (op_0, LT, const_fp, vec_fp_mode);
+
+  /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-4: Convert to floating-point on mask for the floor result.  */
+  emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+
+  /* Step-5: Retrieve the sign bit for -0.0.  */
   emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
new file mode 100644
index 00000000000..33b169395bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-0.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_floorf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (_Float16, __builtin_floorf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
new file mode 100644
index 00000000000..5c462c424df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
new file mode 100644
index 00000000000..6f07add1004
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_floor:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_UNARY_CALL (double, __builtin_floor)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
new file mode 100644
index 00000000000..a091ffdab50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_floorf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+2
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   vmflt\.vf\s+v0,\s*v[0-9]+,\s*[fa]+[0-9]+
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_UNARY_CALL (float, __builtin_floorf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
new file mode 100644
index 00000000000..d66905d4230
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (float, __builtin_floorf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 1.0, 1)
+TEST_INIT (float, -1.2, -2.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388607.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388608.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_floorf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
new file mode 100644
index 00000000000..367a16dd714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-floor-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL (double, __builtin_floor)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 1.0, 1)
+TEST_INIT (double, -1.2, -2.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370495.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370496.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_floor, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_floor, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
new file mode 100644
index 00000000000..076580e6a58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-floor-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (floorf16, 1, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 4, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 8, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 16, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 32, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 64, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 128, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 256, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 512, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 1024, _Float16, __builtin_floorf16)
+DEF_OP_V (floorf16, 2048, _Float16, __builtin_floorf16)
+
+DEF_OP_V (floorf, 1, float, __builtin_floorf)
+DEF_OP_V (floorf, 2, float, __builtin_floorf)
+DEF_OP_V (floorf, 4, float, __builtin_floorf)
+DEF_OP_V (floorf, 8, float, __builtin_floorf)
+DEF_OP_V (floorf, 16, float, __builtin_floorf)
+DEF_OP_V (floorf, 32, float, __builtin_floorf)
+DEF_OP_V (floorf, 64, float, __builtin_floorf)
+DEF_OP_V (floorf, 128, float, __builtin_floorf)
+DEF_OP_V (floorf, 256, float, __builtin_floorf)
+DEF_OP_V (floorf, 512, float, __builtin_floorf)
+DEF_OP_V (floorf, 1024, float, __builtin_floorf)
+
+DEF_OP_V (floor, 1, double, __builtin_floor)
+DEF_OP_V (floor, 2, double, __builtin_floor)
+DEF_OP_V (floor, 4, double, __builtin_floor)
+DEF_OP_V (floor, 8, double, __builtin_floor)
+DEF_OP_V (floor, 16, double, __builtin_floor)
+DEF_OP_V (floor, 32, double, __builtin_floor)
+DEF_OP_V (floor, 64, double, __builtin_floor)
+DEF_OP_V (floor, 128, double, __builtin_floor)
+DEF_OP_V (floor, 256, double, __builtin_floor)
+DEF_OP_V (floor, 512, double, __builtin_floor)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
--
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-09-23  1:45 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-22  6:23 [PATCH v1] RISCV-V: Suport FP floor auto-vectorization pan2.li
2023-09-23  0:40 ` [PATCH v2] RISC-V: " pan2.li
2023-09-23  0:42   ` 钟居哲
2023-09-23  1:02     ` Li, Pan2
2023-09-23  1:19 ` [PATCH v3] " pan2.li
2023-09-23  1:39   ` 钟居哲
2023-09-23  1:45     ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).