[PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
@ 2023-09-20  2:30 pan2.li
  2023-09-20  2:35 ` juzhe.zhong
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: pan2.li @ 2023-09-20  2:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret

Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.

gcc/ChangeLog:

	* config/riscv/autovec.md (ceil<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	* config/riscv/riscv-v.cc: Handle rounding up.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/test-math.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   | 30 +++++++++++++
 gcc/config/riscv/riscv-protos.h               |  4 ++
 gcc/config/riscv/riscv-v.cc                   |  2 +
 .../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
 .../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
 .../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
 .../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
 .../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
 .../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
 .../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
 10 files changed, 219 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
 })
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
 };
 
 enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+	TYPE tmp = (TYPE)i;                                    \
+                                                               \
+	if (i % 2 == 0)                                        \
+	  {                                                    \
+	    in[i] = 1.5f + (TYPE)i;                            \
+	    ref[i] = (TYPE)(i + 2);                            \
+	  }                                                    \
+	else                                                   \
+	  {                                                    \
+	    in[i] = (TYPE)i;                                   \
+	    ref[i] = (TYPE)i;                                  \
+	  }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+	if (out[i] != ref[i])                                     \
+	  __builtin_abort ();                                     \
+      }                                                           \
+  }
-- 
2.34.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:30 [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization pan2.li
@ 2023-09-20  2:35 ` juzhe.zhong
  2023-09-20  2:44   ` Li, Pan2
  2023-09-21 10:32 ` [PATCH v2] " pan2.li
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: juzhe.zhong @ 2023-09-20  2:35 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 15653 bytes --]

+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)

It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.

Besides, I want to see this following case:

a[i] = cond[i] ? CEIL (b[i]): c[i];

Ideally, we should be able to combine vfcvt + vmerge into vfcvt with mask.




juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-20 10:30
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret
 
Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
* config/riscv/riscv-v.cc: Handle rounding up.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   | 30 +++++++++++++
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   |  2 +
.../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
10 files changed, 219 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+ TYPE tmp = (TYPE)i;                                    \
+                                                               \
+ if (i % 2 == 0)                                        \
+   {                                                    \
+     in[i] = 1.5f + (TYPE)i;                            \
+     ref[i] = (TYPE)(i + 2);                            \
+   }                                                    \
+ else                                                   \
+   {                                                    \
+     in[i] = (TYPE)i;                                   \
+     ref[i] = (TYPE)i;                                  \
+   }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:35 ` juzhe.zhong
@ 2023-09-20  2:44   ` Li, Pan2
  2023-09-20  3:01     ` juzhe.zhong
  2023-09-20  3:37     ` juzhe.zhong
  0 siblings, 2 replies; 15+ messages in thread
From: Li, Pan2 @ 2023-09-20  2:44 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 16490 bytes --]

> It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
Under preparing, and will append to this V2 instead of another patch.

> a[i] = cond[i] ? CEIL (b[i]): c[i];
Sure

Pan


From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, September 20, 2023 10:35 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization

+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)

It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.

Besides, I want to see this following case:

a[i] = cond[i] ? CEIL (b[i]): c[i];

Ideally, we should be able to combine vfcvt + vmerge into vfcvt with mask.


________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-20 10:30
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret

Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.

gcc/ChangeLog:

* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
* config/riscv/riscv-v.cc: Handle rounding up.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   | 30 +++++++++++++
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   |  2 +
.../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
10 files changed, 219 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+ TYPE tmp = (TYPE)i;                                    \
+                                                               \
+ if (i % 2 == 0)                                        \
+   {                                                    \
+     in[i] = 1.5f + (TYPE)i;                            \
+     ref[i] = (TYPE)(i + 2);                            \
+   }                                                    \
+ else                                                   \
+   {                                                    \
+     in[i] = (TYPE)i;                                   \
+     ref[i] = (TYPE)i;                                  \
+   }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
--
2.34.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:44   ` Li, Pan2
@ 2023-09-20  3:01     ` juzhe.zhong
  2023-09-20  3:37     ` juzhe.zhong
  1 sibling, 0 replies; 15+ messages in thread
From: juzhe.zhong @ 2023-09-20  3:01 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 16722 bytes --]

I have checked LLVM:
https://godbolt.org/z/4jWG5vjMT 

It seems their code sequence as follows:

vfabs.v
vmflt.vf
vfcvt.x.f.v -> static rounding mode
vfcvt.f.x.v -> dynamic rounding mode
vfsgnj.vv

How come you just only need 2 static vfcvt insns is enough ?



juzhe.zhong@rivai.ai
 
From: Li, Pan2
Date: 2023-09-20 10:44
To: juzhe.zhong@rivai.ai; gcc-patches
CC: Wang, Yanzhang; kito.cheng
Subject: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
> It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
Under preparing, and will append to this V2 instead of another patch.
 
> a[i] = cond[i] ? CEIL (b[i]): c[i];
Sure
 
Pan
 
 
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> 
Sent: Wednesday, September 20, 2023 10:35 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
 
It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
 
Besides, I want to see this following case:
 
a[i] = cond[i] ? CEIL (b[i]): c[i];
 
Ideally, we should be able to combine vfcvt + vmerge into vfcvt with mask.
 
 


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-20 10:30
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret
 
Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
* config/riscv/riscv-v.cc: Handle rounding up.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   | 30 +++++++++++++
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   |  2 +
.../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
10 files changed, 219 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+ TYPE tmp = (TYPE)i;                                    \
+                                                               \
+ if (i % 2 == 0)                                        \
+   {                                                    \
+     in[i] = 1.5f + (TYPE)i;                            \
+     ref[i] = (TYPE)(i + 2);                            \
+   }                                                    \
+ else                                                   \
+   {                                                    \
+     in[i] = (TYPE)i;                                   \
+     ref[i] = (TYPE)i;                                  \
+   }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:44   ` Li, Pan2
  2023-09-20  3:01     ` juzhe.zhong
@ 2023-09-20  3:37     ` juzhe.zhong
  2023-09-20  3:42       ` Li, Pan2
  1 sibling, 1 reply; 15+ messages in thread
From: juzhe.zhong @ 2023-09-20  3:37 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 18296 bytes --]

I just checked the LLVM implementation.

This is their codes of rounding autovectorizaton:

They handle CEIL/FLOOR/FROUND/FROUNDEVEN/FROUND  TO ZERO with the same handling

  switch (Op.getOpcode()) {
  default:
    llvm_unreachable("Unexpected opcode");
  case ISD::FCEIL:
  case ISD::VP_FCEIL:
  case ISD::FFLOOR:
  case ISD::VP_FFLOOR:
  case ISD::FROUND:
  case ISD::FROUNDEVEN:
  case ISD::VP_FROUND:
  case ISD::VP_FROUNDEVEN:
  case ISD::VP_FROUNDTOZERO: {
    RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
    assert(FRM != RISCVFPRndMode::Invalid);
    Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
                            DAG.getTargetConstant(FRM, DL, XLenVT), VL);
    break;
  }
  case ISD::FTRUNC:
    Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
                            Mask, VL);
    break;
  case ISD::VP_FRINT:
    Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
    break;
  case ISD::VP_FNEARBYINT:
    Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
                            Mask, VL);
    break;
  }

  // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
  if (Op.getOpcode() != ISD::VP_FNEARBYINT)
    Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
                            Mask, VL);

  // Restore the original sign so that -0.0 is preserved.
  Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
                          Src, Src, Mask, VL);

I think you could just copy LLVM implementation and translate them into GCC codes.
It's so simple.

Create a function call 'expand_rounding".

LLVM code is very easy to read. I believe you could leverage LLVM implementation quickly.



juzhe.zhong@rivai.ai
 
From: Li, Pan2
Date: 2023-09-20 10:44
To: juzhe.zhong@rivai.ai; gcc-patches
CC: Wang, Yanzhang; kito.cheng
Subject: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
> It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
Under preparing, and will append to this V2 instead of another patch.
 
> a[i] = cond[i] ? CEIL (b[i]): c[i];
Sure
 
Pan
 
 
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> 
Sent: Wednesday, September 20, 2023 10:35 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
 
It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
 
Besides, I want to see this following case:
 
a[i] = cond[i] ? CEIL (b[i]): c[i];
 
Ideally, we should be able to combine vfcvt + vmerge into vfcvt with mask.
 
 


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-20 10:30
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret
 
Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
* config/riscv/riscv-v.cc: Handle rounding up.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   | 30 +++++++++++++
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   |  2 +
.../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
10 files changed, 219 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+ TYPE tmp = (TYPE)i;                                    \
+                                                               \
+ if (i % 2 == 0)                                        \
+   {                                                    \
+     in[i] = 1.5f + (TYPE)i;                            \
+     ref[i] = (TYPE)(i + 2);                            \
+   }                                                    \
+ else                                                   \
+   {                                                    \
+     in[i] = (TYPE)i;                                   \
+     ref[i] = (TYPE)i;                                  \
+   }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  3:37     ` juzhe.zhong
@ 2023-09-20  3:42       ` Li, Pan2
  0 siblings, 0 replies; 15+ messages in thread
From: Li, Pan2 @ 2023-09-20  3:42 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 19351 bytes --]

Thanks Juzhe, let me check and keep you posted.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Wednesday, September 20, 2023 11:37 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization

I just checked the LLVM implementation.

This is their codes of rounding autovectorizaton:

They handle CEIL/FLOOR/FROUND/FROUNDEVEN/FROUND  TO ZERO with the same handling

  switch (Op.getOpcode()) {
  default:
    llvm_unreachable("Unexpected opcode");
  case ISD::FCEIL:
  case ISD::VP_FCEIL:
  case ISD::FFLOOR:
  case ISD::VP_FFLOOR:
  case ISD::FROUND:
  case ISD::FROUNDEVEN:
  case ISD::VP_FROUND:
  case ISD::VP_FROUNDEVEN:
  case ISD::VP_FROUNDTOZERO: {
    RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
    assert(FRM != RISCVFPRndMode::Invalid);
    Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
                            DAG.getTargetConstant(FRM, DL, XLenVT), VL);
    break;
  }
  case ISD::FTRUNC:
    Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
                            Mask, VL);
    break;
  case ISD::VP_FRINT:
    Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
    break;
  case ISD::VP_FNEARBYINT:
    Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
                            Mask, VL);
    break;
  }

  // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
  if (Op.getOpcode() != ISD::VP_FNEARBYINT)
    Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
                            Mask, VL);

  // Restore the original sign so that -0.0 is preserved.
  Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
                          Src, Src, Mask, VL);

I think you could just copy LLVM implementation and translate them into GCC codes.
It's so simple.

Create a function call 'expand_rounding".

LLVM code is very easy to read. I believe you could leverage LLVM implementation quickly.

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: Li, Pan2<mailto:pan2.li@intel.com>
Date: 2023-09-20 10:44
To: juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>; gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: Wang, Yanzhang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: RE: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
> It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.
Under preparing, and will append to this V2 instead of another patch.

> a[i] = cond[i] ? CEIL (b[i]): c[i];
Sure

Pan


From: juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai> <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>>
Sent: Wednesday, September 20, 2023 10:35 AM
To: Li, Pan2 <pan2.li@intel.com<mailto:pan2.li@intel.com>>; gcc-patches <gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>
Cc: Li, Pan2 <pan2.li@intel.com<mailto:pan2.li@intel.com>>; Wang, Yanzhang <yanzhang.wang@intel.com<mailto:yanzhang.wang@intel.com>>; kito.cheng <kito.cheng@gmail.com<mailto:kito.cheng@gmail.com>>
Subject: Re: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization

+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)

It should be "V_VLSF" instead of "VF" so that you could also support VLS CEIL.

Besides, I want to see this following case:

a[i] = cond[i] ? CEIL (b[i]): c[i];

Ideally, we should be able to combine vfcvt + vmerge into vfcvt with mask.


________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-20 10:30
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
onvert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

The conditional auto-vectorization for ceil/ceilf is also supported
and covered by test cases.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vsetvli a5,a2,e32,m1,ta,ma
  vle32.v v1,0(a1)
  vsetvli a3,zero,e32,m1,ta,ma
  slli    a4,a5,2
  vfcvt.x.f.v     v1,v1
  sub     a2,a2,a5
  vfcvt.f.x.v     v1,v1
  vsetvli zero,a5,e32,m1,ta,ma
  vse32.v v1,0(a0)
  add     a1,a1,a4
  add     a0,a0,a4
  bne     a2,zero,.L4
.L14:
  fsrm    a6
  ret

Please not VLS mode is not involved in this patch and will be token
care of in the underlying patches soon.

gcc/ChangeLog:

* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
* config/riscv/riscv-v.cc: Handle rounding up.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   | 30 +++++++++++++
gcc/config/riscv/riscv-protos.h               |  4 ++
gcc/config/riscv/riscv-v.cc                   |  2 +
.../riscv/rvv/autovec/math-ceil-1.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-2.c           | 21 +++++++++
.../riscv/rvv/autovec/math-ceil-3.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-4.c           | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-1.c       | 24 ++++++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       | 24 ++++++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  | 45 +++++++++++++++++++
10 files changed, 219 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..ea508d81047 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,33 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:VF 0 "register_operand")
+   (match_operand:VF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    rtx tmp = gen_reg_rtx (<VCONVERT>mode);
+    rtx ops_1[] = {tmp, operands[1]};
+    insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, <MODE>mode);
+
+    /* vfcvt.x.f with rounding up (aka ceil).  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_1);
+
+    rtx ops_2[] = {operands[0], tmp};
+    icode = code_for_pred (FLOAT, <MODE>mode);
+
+    /* vfcvt.f.x for the final result.  To avoid unnecessary frm register
+       access, we use RUP here and it will never do the rounding up because
+       the tmp rtx comes from the float to int conversion.  */
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP_FRM_RUP, ops_2);
+
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..833f1efbaf4 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..4192f988648 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..8f0f09609eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..73395d30d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..eb0f3a3db78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..b9a3c8ebf84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..014c4c3ac0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(float, ceilf)
+TEST_INIT(float)
+TEST_ASSERT(float)
+
+int
+main ()
+{
+  test_float_init (in, ref, ARRAY_SIZE);
+  test_float_ceilf (out, in, ARRAY_SIZE);
+  test_float_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..ae361e11144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,24 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+// Test function declaration
+TEST_CEIL(double, ceil)
+TEST_INIT(double)
+TEST_ASSERT(double)
+
+int
+main ()
+{
+  test_double_init (in, ref, ARRAY_SIZE);
+  test_double_ceil (out, in, ARRAY_SIZE);
+  test_double_assert (out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..57dd5e0e460
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,45 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE)                                        \
+  void test_##TYPE##_init (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                            \
+    for (unsigned i = 0; i < size; i++)                        \
+      {                                                        \
+ TYPE tmp = (TYPE)i;                                    \
+                                                               \
+ if (i % 2 == 0)                                        \
+   {                                                    \
+     in[i] = 1.5f + (TYPE)i;                            \
+     ref[i] = (TYPE)(i + 2);                            \
+   }                                                    \
+ else                                                   \
+   {                                                    \
+     in[i] = (TYPE)i;                                   \
+     ref[i] = (TYPE)i;                                  \
+   }                                                    \
+      }                                                        \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
--
2.34.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:30 [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization pan2.li
  2023-09-20  2:35 ` juzhe.zhong
@ 2023-09-21 10:32 ` pan2.li
  2023-09-21 11:05   ` juzhe.zhong
  2023-09-21 11:30   ` juzhe.zhong
  2023-09-21 15:18 ` [PATCH v3] " pan2.li
  2023-09-22  0:12 ` [PATCH v4] " pan2.li
  3 siblings, 2 replies; 15+ messages in thread
From: pan2.li @ 2023-09-21 10:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+

All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (ceil<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_ceil): New function decl.
	* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
	(expand_vec_float_cmp_mask): Ditto.
	(expand_vec_copysign): Ditto.
	(expand_vec_ceil): Ditto.
	* config/riscv/vector-iterators.md: Add VLS mode to VCONVERT.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-4.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-double.h: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-single.h: New test.
	* gcc.target/riscv/rvv/autovec/test-math.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   |  16 +++
 gcc/config/riscv/riscv-protos.h               |   5 +
 gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
 gcc/config/riscv/vector-iterators.md          |  12 ++
 .../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
 .../riscv/rvv/autovec/math-ceil-4.c           |  28 +++++
 .../riscv/rvv/autovec/math-ceil-run-1.c       |   4 +
 .../riscv/rvv/autovec/math-ceil-run-2.c       |   4 +
 .../riscv/rvv/autovec/math-ceil-run-3.c       |   4 +
 .../riscv/rvv/autovec/math-ceil-run-4.c       |   4 +
 .../riscv/rvv/autovec/math-ceil-run-double.h  |  36 ++++++
 .../riscv/rvv/autovec/math-ceil-run-single.h  |  36 ++++++
 .../gcc.target/riscv/rvv/autovec/test-math.h  |  40 ++++++
 15 files changed, 385 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..36ed839aa5b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
 })
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..7488a9392de 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
 };
 
 enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -428,6 +432,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..9a2cd62c878 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3405,4 +3407,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
 }
 
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+			   machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+		     machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		 machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 0b395e65228..78f08cb4c40 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2339,6 +2339,18 @@ (define_mode_attr VCONVERT [
   (RVVM8HF "RVVM8HI") (RVVM4HF "RVVM4HI") (RVVM2HF "RVVM2HI") (RVVM1HF "RVVM1HI") (RVVMF2HF "RVVMF2HI") (RVVMF4HF "RVVMF4HI")
   (RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI") (RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
   (RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI") (RVVM1DF "RVVM1DI")
+
+  (V1HF "V1HI") (V2HF "V2HI") (V4HF "V4HI") (V8HF "V8HI") (V16HF "V16HI")
+  (V32HF "V32HI") (V64HF "V64HI") (V128HF "V128HI") (V256HF "V256HI")
+  (V512HF "V512HI") (V1024HF "V1024HI") (V2048HF "V2048HI")
+
+  (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
+  (V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
+  (V512SF "V512SI") (V1024SF "V1024SI")
+
+  (V1DF "V1DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V16DF "V16DI")
+  (V32DF "V32DI") (V64DF "V64DI") (V128DF "V128DI") (V256DF "V256DI")
+  (V512DF "V512DI")
 ])
 
 (define_mode_attr vconvert [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..10d6ec7458e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..f4b6d2d6a9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..bdbcd97f8c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..5ce313d567b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..7de1dfe8fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..a98aa6ed2fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
new file mode 100644
index 00000000000..025d251dd08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
new file mode 100644
index 00000000000..0dacfe763d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
new file mode 100644
index 00000000000..5b85c007046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
new file mode 100644
index 00000000000..7b710715d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..c3e3f1b24db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,40 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+	in[i] = VAL_IN;                                              \
+	ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+	if (out[i] != ref[i])                                     \
+	  __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-21 10:32 ` [PATCH v2] " pan2.li
@ 2023-09-21 11:05   ` juzhe.zhong
  2023-09-21 11:30   ` juzhe.zhong
  1 sibling, 0 replies; 15+ messages in thread
From: juzhe.zhong @ 2023-09-21 11:05 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 25575 bytes --]

+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }

I think you should add !flag_trapping_math && !flag_rounding_math

You can try -ftrapping-math or frounding-mode, LLVM failed to vectorize.

Like  X86:

(define_expand "round<mode>2"
  [(match_operand:X87MODEF 0 "register_operand")
   (match_operand:X87MODEF 1 "nonimmediate_operand")]
  "(TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
  || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations
    && (flag_fp_int_builtin_inexact || !flag_trapping_math))
   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math && !flag_rounding_math)"

Otherwise LGTM.


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-21 18:32
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+
 
All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector-iterators.md: Add VLS mode to VCONVERT.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-double.h: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-single.h: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
gcc/config/riscv/vector-iterators.md          |  12 ++
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-4.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-2.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-3.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-4.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-double.h  |  36 ++++++
.../riscv/rvv/autovec/math-ceil-run-single.h  |  36 ++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  40 ++++++
15 files changed, 385 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..36ed839aa5b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..7488a9392de 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -428,6 +432,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..9a2cd62c878 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3405,4 +3407,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 0b395e65228..78f08cb4c40 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2339,6 +2339,18 @@ (define_mode_attr VCONVERT [
   (RVVM8HF "RVVM8HI") (RVVM4HF "RVVM4HI") (RVVM2HF "RVVM2HI") (RVVM1HF "RVVM1HI") (RVVMF2HF "RVVMF2HI") (RVVMF4HF "RVVMF4HI")
   (RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI") (RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
   (RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI") (RVVM1DF "RVVM1DI")
+
+  (V1HF "V1HI") (V2HF "V2HI") (V4HF "V4HI") (V8HF "V8HI") (V16HF "V16HI")
+  (V32HF "V32HI") (V64HF "V64HI") (V128HF "V128HI") (V256HF "V256HI")
+  (V512HF "V512HI") (V1024HF "V1024HI") (V2048HF "V2048HI")
+
+  (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
+  (V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
+  (V512SF "V512SI") (V1024SF "V1024SI")
+
+  (V1DF "V1DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V16DF "V16DI")
+  (V32DF "V32DI") (V64DF "V64DI") (V128DF "V128DI") (V256DF "V256DI")
+  (V512DF "V512DI")
])
(define_mode_attr vconvert [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..10d6ec7458e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..f4b6d2d6a9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..bdbcd97f8c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..5ce313d567b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..7de1dfe8fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..a98aa6ed2fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
new file mode 100644
index 00000000000..025d251dd08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
new file mode 100644
index 00000000000..0dacfe763d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
new file mode 100644
index 00000000000..5b85c007046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
new file mode 100644
index 00000000000..7b710715d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..c3e3f1b24db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,40 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-21 10:32 ` [PATCH v2] " pan2.li
  2023-09-21 11:05   ` juzhe.zhong
@ 2023-09-21 11:30   ` juzhe.zhong
  2023-09-21 11:56     ` Li, Pan2
  1 sibling, 1 reply; 15+ messages in thread
From: juzhe.zhong @ 2023-09-21 11:30 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 24735 bytes --]

Also。 Remove math.h include。
Instead, plz use __builtin_ceil.



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-21 18:32
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+
 
All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector-iterators.md: Add VLS mode to VCONVERT.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-double.h: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-single.h: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
gcc/config/riscv/vector-iterators.md          |  12 ++
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-4.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-2.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-3.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-4.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-double.h  |  36 ++++++
.../riscv/rvv/autovec/math-ceil-run-single.h  |  36 ++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  40 ++++++
15 files changed, 385 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..36ed839aa5b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..7488a9392de 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -428,6 +432,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..9a2cd62c878 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3405,4 +3407,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 0b395e65228..78f08cb4c40 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2339,6 +2339,18 @@ (define_mode_attr VCONVERT [
   (RVVM8HF "RVVM8HI") (RVVM4HF "RVVM4HI") (RVVM2HF "RVVM2HI") (RVVM1HF "RVVM1HI") (RVVMF2HF "RVVMF2HI") (RVVMF4HF "RVVMF4HI")
   (RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI") (RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
   (RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI") (RVVM1DF "RVVM1DI")
+
+  (V1HF "V1HI") (V2HF "V2HI") (V4HF "V4HI") (V8HF "V8HI") (V16HF "V16HI")
+  (V32HF "V32HI") (V64HF "V64HI") (V128HF "V128HI") (V256HF "V256HI")
+  (V512HF "V512HI") (V1024HF "V1024HI") (V2048HF "V2048HI")
+
+  (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
+  (V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
+  (V512SF "V512SI") (V1024SF "V1024SI")
+
+  (V1DF "V1DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V16DF "V16DI")
+  (V32DF "V32DI") (V64DF "V64DI") (V128DF "V128DI") (V256DF "V256DI")
+  (V512DF "V512DI")
])
(define_mode_attr vconvert [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..10d6ec7458e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..f4b6d2d6a9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..bdbcd97f8c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..5ce313d567b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..7de1dfe8fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..a98aa6ed2fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
new file mode 100644
index 00000000000..025d251dd08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
new file mode 100644
index 00000000000..0dacfe763d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
new file mode 100644
index 00000000000..5b85c007046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
new file mode 100644
index 00000000000..7b710715d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..c3e3f1b24db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,40 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-21 11:30   ` juzhe.zhong
@ 2023-09-21 11:56     ` Li, Pan2
  0 siblings, 0 replies; 15+ messages in thread
From: Li, Pan2 @ 2023-09-21 11:56 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 25423 bytes --]

Thanks for comments, will send V3 for this.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Thursday, September 21, 2023 7:31 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization

Also。 Remove math.h include。
Instead, plz use __builtin_ceil.

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-21 18:32
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v2] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+

All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector-iterators.md: Add VLS mode to VCONVERT.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-4.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-double.h: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-single.h: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
gcc/config/riscv/vector-iterators.md          |  12 ++
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-4.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-2.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-3.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-4.c       |   4 +
.../riscv/rvv/autovec/math-ceil-run-double.h  |  36 ++++++
.../riscv/rvv/autovec/math-ceil-run-single.h  |  36 ++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  40 ++++++
15 files changed, 385 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..36ed839aa5b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2374,3 +2374,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..7488a9392de 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -428,6 +432,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index a9287e5d671..9a2cd62c878 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3405,4 +3407,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 0b395e65228..78f08cb4c40 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -2339,6 +2339,18 @@ (define_mode_attr VCONVERT [
   (RVVM8HF "RVVM8HI") (RVVM4HF "RVVM4HI") (RVVM2HF "RVVM2HI") (RVVM1HF "RVVM1HI") (RVVMF2HF "RVVMF2HI") (RVVMF4HF "RVVMF4HI")
   (RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI") (RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
   (RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI") (RVVM1DF "RVVM1DI")
+
+  (V1HF "V1HI") (V2HF "V2HI") (V4HF "V4HI") (V8HF "V8HI") (V16HF "V16HI")
+  (V32HF "V32HI") (V64HF "V64HI") (V128HF "V128HI") (V256HF "V256HI")
+  (V512HF "V512HI") (V1024HF "V1024HI") (V2048HF "V2048HI")
+
+  (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
+  (V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
+  (V512SF "V512SI") (V1024SF "V1024SI")
+
+  (V1DF "V1DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V16DF "V16DI")
+  (V32DF "V32DI") (V64DF "V64DI") (V128DF "V128DI") (V256DF "V256DI")
+  (V512DF "V512DI")
])
(define_mode_attr vconvert [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..10d6ec7458e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..f4b6d2d6a9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..bdbcd97f8c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
new file mode 100644
index 00000000000..5ce313d567b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(double, ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..7de1dfe8fe5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..a98aa6ed2fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
new file mode 100644
index 00000000000..025d251dd08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-3.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-single.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
new file mode 100644
index 00000000000..0dacfe763d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-4.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math -lm" } */
+
+#include "math-ceil-run-double.h"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
new file mode 100644
index 00000000000..5b85c007046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-double.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
new file mode 100644
index 00000000000..7b710715d53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-single.h
@@ -0,0 +1,36 @@
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..c3e3f1b24db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,40 @@
+#include <math.h>
+
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
--
2.34.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:30 [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization pan2.li
  2023-09-20  2:35 ` juzhe.zhong
  2023-09-21 10:32 ` [PATCH v2] " pan2.li
@ 2023-09-21 15:18 ` pan2.li
  2023-09-21 22:04   ` 钟居哲
  2023-09-22  0:12 ` [PATCH v4] " pan2.li
  3 siblings, 1 reply; 15+ messages in thread
From: pan2.li @ 2023-09-21 15:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+

All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (ceil<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_ceil): New function decl.
	* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
	(expand_vec_float_cmp_mask): Ditto.
	(expand_vec_copysign): Ditto.
	(expand_vec_ceil): Ditto.
	* config/riscv/vector.md: Add VLS mode support.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls/def.h: New macro.
	* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/test-math.h: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   |  16 +++
 gcc/config/riscv/riscv-protos.h               |   5 +
 gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
 gcc/config/riscv/vector.md                    |   2 +-
 .../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
 .../riscv/rvv/autovec/math-ceil-run-1.c       |  39 ++++++
 .../riscv/rvv/autovec/math-ceil-run-2.c       |  39 ++++++
 .../gcc.target/riscv/rvv/autovec/test-math.h  |  38 ++++++
 .../gcc.target/riscv/rvv/autovec/vls/def.h    |   8 ++
 .../riscv/rvv/autovec/vls/math-ceil-1.c       |  43 +++++++
 12 files changed, 385 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f0f1abc4e82..1b4bd82f9ec 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2239,3 +2239,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
 })
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..07b4ffe3edf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
 };
 
 enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -432,6 +436,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4b9a494f8eb..35143bc6421 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3439,4 +3441,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
 }
 
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+			   machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+		     machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		 machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 36f0256c747..73f90dea36b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7438,7 +7438,7 @@ (define_insn "@pred_fcvt_x<v_su>_f<mode>"
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<VCONVERT>
-	     [(match_operand:VF 3 "register_operand"         " vr, vr, vr, vr")] VFCVTS)
+	     [(match_operand:V_VLSF 3 "register_operand"     " vr, vr, vr, vr")] VFCVTS)
 	  (match_operand:<VCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfcvt.x<v_su>.f.v\t%0,%3%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..0908ef269bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..65d4807edef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, __builtin_ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..416698a753e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..1bd5829eeca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, __builtin_ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..d9386215efb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, __builtin_ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..6e913da37f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,38 @@
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+	in[i] = VAL_IN;                                              \
+	ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+	if (out[i] != ref[i])                                     \
+	  __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
index 74685f8d05e..ccc1d1d70ab 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
@@ -518,3 +518,11 @@ typedef double v512df __attribute__ ((vector_size (4096)));
     for (int i = 0; i < NUM; i++)                                              \
       dst[i] = ((TYPE2) a[i] + b[i] + 1) >> 1;                                 \
   }
+
+#define DEF_CALL_V(PREFIX, NUM, TYPE, CALL)                                    \
+  void __attribute__ ((noinline, noclone))                                     \
+  PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b)                    \
+  {                                                                            \
+    for (int i = 0; i < NUM; ++i)                                              \
+      a[i] = CALL (b[i]);                                                      \
+  }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
new file mode 100644
index 00000000000..cd19a686c86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_CALL_V (ceilf, 1, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 2, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 4, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 8, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 16, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 32, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 64, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 128, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 256, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 512, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 1024, float, __builtin_ceilf)
+
+DEF_CALL_V (ceil, 1, double, __builtin_ceil)
+DEF_CALL_V (ceil, 2, double, __builtin_ceil)
+DEF_CALL_V (ceil, 4, double, __builtin_ceil)
+DEF_CALL_V (ceil, 8, double, __builtin_ceil)
+DEF_CALL_V (ceil, 16, double, __builtin_ceil)
+DEF_CALL_V (ceil, 32, double, __builtin_ceil)
+DEF_CALL_V (ceil, 64, double, __builtin_ceil)
+DEF_CALL_V (ceil, 128, double, __builtin_ceil)
+DEF_CALL_V (ceil, 256, double, __builtin_ceil)
+DEF_CALL_V (ceil, 512, double, __builtin_ceil)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 19 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 19 } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-21 15:18 ` [PATCH v3] " pan2.li
@ 2023-09-21 22:04   ` 钟居哲
  0 siblings, 0 replies; 15+ messages in thread
From: 钟居哲 @ 2023-09-21 22:04 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 26021 bytes --]

Add FP16 tests:
https://godbolt.org/z/e9vrzKTvn

Like LLVM.


diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
index 74685f8d05e..ccc1d1d70ab 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
@@ -518,3 +518,11 @@ typedef double v512df __attribute__ ((vector_size (4096)));
     for (int i = 0; i < NUM; i++)                                              \
       dst[i] = ((TYPE2) a[i] + b[i] + 1) >> 1;                                 \
   }
+
+#define DEF_CALL_V(PREFIX, NUM, TYPE, CALL)                                    \
+  void __attribute__ ((noinline, noclone))                                     \
+  PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b)                    \
+  {                                                                            \
+    for (int i = 0; i < NUM; ++i)                                              \
+      a[i] = CALL (b[i]);                                                      \
+  }

You don't need to add this.

Just directly use this in def.h:
#define DEF_OP_V(PREFIX, NUM, TYPE, OP)                                        \
  void __attribute__ ((noinline, noclone))                                     \
  PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b)                    \
  {                                                                            \
    for (int i = 0; i < NUM; ++i)                                              \
      a[i] = OP (b[i]);                                                        \
  }





juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-21 23:18
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v3] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+
 
All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector.md: Add VLS mode support.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/vls/def.h: New macro.
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
* gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 116 ++++++++++++++++++
gcc/config/riscv/vector.md                    |   2 +-
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |  39 ++++++
.../riscv/rvv/autovec/math-ceil-run-2.c       |  39 ++++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  38 ++++++
.../gcc.target/riscv/rvv/autovec/vls/def.h    |   8 ++
.../riscv/rvv/autovec/vls/math-ceil-1.c       |  43 +++++++
12 files changed, 385 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f0f1abc4e82..1b4bd82f9ec 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2239,3 +2239,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..07b4ffe3edf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -432,6 +436,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4b9a494f8eb..35143bc6421 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3439,4 +3441,118 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   2. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 36f0256c747..73f90dea36b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7438,7 +7438,7 @@ (define_insn "@pred_fcvt_x<v_su>_f<mode>"
     (reg:SI VTYPE_REGNUM)
     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:<VCONVERT>
-      [(match_operand:VF 3 "register_operand"         " vr, vr, vr, vr")] VFCVTS)
+      [(match_operand:V_VLSF 3 "register_operand"     " vr, vr, vr, vr")] VFCVTS)
  (match_operand:<VCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfcvt.x<v_su>.f.v\t%0,%3%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..0908ef269bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..65d4807edef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, __builtin_ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..416698a753e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..1bd5829eeca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, __builtin_ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..d9386215efb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, __builtin_ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..6e913da37f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,38 @@
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
index 74685f8d05e..ccc1d1d70ab 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
@@ -518,3 +518,11 @@ typedef double v512df __attribute__ ((vector_size (4096)));
     for (int i = 0; i < NUM; i++)                                              \
       dst[i] = ((TYPE2) a[i] + b[i] + 1) >> 1;                                 \
   }
+
+#define DEF_CALL_V(PREFIX, NUM, TYPE, CALL)                                    \
+  void __attribute__ ((noinline, noclone))                                     \
+  PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b)                    \
+  {                                                                            \
+    for (int i = 0; i < NUM; ++i)                                              \
+      a[i] = CALL (b[i]);                                                      \
+  }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
new file mode 100644
index 00000000000..cd19a686c86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_CALL_V (ceilf, 1, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 2, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 4, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 8, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 16, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 32, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 64, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 128, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 256, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 512, float, __builtin_ceilf)
+DEF_CALL_V (ceilf, 1024, float, __builtin_ceilf)
+
+DEF_CALL_V (ceil, 1, double, __builtin_ceil)
+DEF_CALL_V (ceil, 2, double, __builtin_ceil)
+DEF_CALL_V (ceil, 4, double, __builtin_ceil)
+DEF_CALL_V (ceil, 8, double, __builtin_ceil)
+DEF_CALL_V (ceil, 16, double, __builtin_ceil)
+DEF_CALL_V (ceil, 32, double, __builtin_ceil)
+DEF_CALL_V (ceil, 64, double, __builtin_ceil)
+DEF_CALL_V (ceil, 128, double, __builtin_ceil)
+DEF_CALL_V (ceil, 256, double, __builtin_ceil)
+DEF_CALL_V (ceil, 512, double, __builtin_ceil)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 19 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 19 } } */
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-20  2:30 [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization pan2.li
                   ` (2 preceding siblings ...)
  2023-09-21 15:18 ` [PATCH v3] " pan2.li
@ 2023-09-22  0:12 ` pan2.li
  2023-09-22  0:46   ` juzhe.zhong
  3 siblings, 1 reply; 15+ messages in thread
From: pan2.li @ 2023-09-22  0:12 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

Update in v4:

* Add test for _Float16.
* Remove unnecessary macro in def.h for test.

Original log:

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+

All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

	* config/riscv/autovec.md (ceil<mode>2): New pattern.
	* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
	(enum insn_type): Ditto.
	(expand_vec_ceil): New function decl.
	* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
	(expand_vec_float_cmp_mask): Ditto.
	(expand_vec_copysign): Ditto.
	(expand_vec_ceil): Ditto.
	* config/riscv/vector.md: Add VLS mode support.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/math-ceil-0.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-0.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/test-math.h: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/autovec.md                   |  16 +++
 gcc/config/riscv/riscv-protos.h               |   5 +
 gcc/config/riscv/riscv-v.cc                   | 133 ++++++++++++++++++
 gcc/config/riscv/vector.md                    |   2 +-
 .../riscv/rvv/autovec/math-ceil-0.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
 .../riscv/rvv/autovec/math-ceil-3.c           |  28 ++++
 .../riscv/rvv/autovec/math-ceil-run-0.c       |  39 +++++
 .../riscv/rvv/autovec/math-ceil-run-1.c       |  39 +++++
 .../riscv/rvv/autovec/math-ceil-run-2.c       |  39 +++++
 .../gcc.target/riscv/rvv/autovec/test-math.h  |  38 +++++
 .../riscv/rvv/autovec/vls/math-ceil-1.c       |  56 ++++++++
 13 files changed, 472 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f0f1abc4e82..1b4bd82f9ec 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2239,3 +2239,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
 })
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..07b4ffe3edf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
 };
 
 enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
 
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -432,6 +436,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
 void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4b9a494f8eb..f63dec573ef 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
 
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3439,4 +3441,135 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
 }
 
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Half floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 1023.5    | 0x63ff        |
+      +-----------+---------------+
+      | 1024.0    | 0x6400        |
+      +-----------+---------------+
+      | 1025.0    | 0x6401        |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All half floating point will be unchanged for ceil if it is
+      greater than and equal to 1024.
+
+   2. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   3. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_HFmode)
+    real_from_integer (&real, inner_mode, 1024, SIGNED);
+  else if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+			   machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+		     machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+		 machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
 } // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 36f0256c747..73f90dea36b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7438,7 +7438,7 @@ (define_insn "@pred_fcvt_x<v_su>_f<mode>"
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (unspec:<VCONVERT>
-	     [(match_operand:VF 3 "register_operand"         " vr, vr, vr, vr")] VFCVTS)
+	     [(match_operand:V_VLSF 3 "register_operand"     " vr, vr, vr, vr")] VFCVTS)
 	  (match_operand:<VCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfcvt.x<v_su>.f.v\t%0,%3%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
new file mode 100644
index 00000000000..88a2ac4b338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_ceilf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(_Float16, __builtin_ceilf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..0908ef269bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..65d4807edef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, __builtin_ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..416698a753e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
new file mode 100644
index 00000000000..f1946e197cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -std=c2x -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_CEIL (_Float16, __builtin_ceilf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1023.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..202944ddd92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, __builtin_ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..f0ff9bca0af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, __builtin_ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..6e913da37f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,38 @@
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+	in[i] = VAL_IN;                                              \
+	ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+	if (out[i] != ref[i])                                     \
+	  __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
new file mode 100644
index 00000000000..b113df80c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (ceilf16, 1, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 4, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 8, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 16, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 32, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 64, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 128, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 256, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 512, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 1024, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2048, _Float16, __builtin_ceilf16)
+
+DEF_OP_V (ceilf, 1, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 2, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 4, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 8, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 16, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 32, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 64, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 128, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 256, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 512, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 1024, float, __builtin_ceilf)
+
+DEF_OP_V (ceil, 1, double, __builtin_ceil)
+DEF_OP_V (ceil, 2, double, __builtin_ceil)
+DEF_OP_V (ceil, 4, double, __builtin_ceil)
+DEF_OP_V (ceil, 8, double, __builtin_ceil)
+DEF_OP_V (ceil, 16, double, __builtin_ceil)
+DEF_OP_V (ceil, 32, double, __builtin_ceil)
+DEF_OP_V (ceil, 64, double, __builtin_ceil)
+DEF_OP_V (ceil, 128, double, __builtin_ceil)
+DEF_OP_V (ceil, 256, double, __builtin_ceil)
+DEF_OP_V (ceil, 512, double, __builtin_ceil)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-22  0:12 ` [PATCH v4] " pan2.li
@ 2023-09-22  0:46   ` juzhe.zhong
  2023-09-22  0:50     ` Li, Pan2
  0 siblings, 1 reply; 15+ messages in thread
From: juzhe.zhong @ 2023-09-22  0:46 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 27731 bytes --]

LGTM



juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-09-22 08:12
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com>
 
Update in v4:
 
* Add test for _Float16.
* Remove unnecessary macro in def.h for test.
 
Original log:
 
This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.
 
When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).
 
* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3
 
However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.
 
  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+
 
All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.
 
Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3
 
After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret
 
Please note VLS mode is also involved in this patch and covered by the
test cases.
 
gcc/ChangeLog:
 
* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector.md: Add VLS mode support.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/math-ceil-0.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
* gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c: New test.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 133 ++++++++++++++++++
gcc/config/riscv/vector.md                    |   2 +-
.../riscv/rvv/autovec/math-ceil-0.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 ++++
.../riscv/rvv/autovec/math-ceil-run-0.c       |  39 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |  39 +++++
.../riscv/rvv/autovec/math-ceil-run-2.c       |  39 +++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  38 +++++
.../riscv/rvv/autovec/vls/math-ceil-1.c       |  56 ++++++++
13 files changed, 472 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
 
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f0f1abc4e82..1b4bd82f9ec 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2239,3 +2239,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..07b4ffe3edf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -432,6 +436,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4b9a494f8eb..f63dec573ef 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3439,4 +3441,135 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Half floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 1023.5    | 0x63ff        |
+      +-----------+---------------+
+      | 1024.0    | 0x6400        |
+      +-----------+---------------+
+      | 1025.0    | 0x6401        |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All half floating point will be unchanged for ceil if it is
+      greater than and equal to 1024.
+
+   2. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   3. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_HFmode)
+    real_from_integer (&real, inner_mode, 1024, SIGNED);
+  else if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 36f0256c747..73f90dea36b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7438,7 +7438,7 @@ (define_insn "@pred_fcvt_x<v_su>_f<mode>"
     (reg:SI VTYPE_REGNUM)
     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:<VCONVERT>
-      [(match_operand:VF 3 "register_operand"         " vr, vr, vr, vr")] VFCVTS)
+      [(match_operand:V_VLSF 3 "register_operand"     " vr, vr, vr, vr")] VFCVTS)
  (match_operand:<VCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfcvt.x<v_su>.f.v\t%0,%3%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
new file mode 100644
index 00000000000..88a2ac4b338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_ceilf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(_Float16, __builtin_ceilf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..0908ef269bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..65d4807edef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, __builtin_ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..416698a753e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
new file mode 100644
index 00000000000..f1946e197cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -std=c2x -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_CEIL (_Float16, __builtin_ceilf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1023.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..202944ddd92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, __builtin_ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..f0ff9bca0af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, __builtin_ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..6e913da37f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,38 @@
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
new file mode 100644
index 00000000000..b113df80c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (ceilf16, 1, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 4, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 8, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 16, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 32, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 64, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 128, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 256, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 512, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 1024, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2048, _Float16, __builtin_ceilf16)
+
+DEF_OP_V (ceilf, 1, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 2, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 4, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 8, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 16, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 32, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 64, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 128, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 256, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 512, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 1024, float, __builtin_ceilf)
+
+DEF_OP_V (ceil, 1, double, __builtin_ceil)
+DEF_OP_V (ceil, 2, double, __builtin_ceil)
+DEF_OP_V (ceil, 4, double, __builtin_ceil)
+DEF_OP_V (ceil, 8, double, __builtin_ceil)
+DEF_OP_V (ceil, 16, double, __builtin_ceil)
+DEF_OP_V (ceil, 32, double, __builtin_ceil)
+DEF_OP_V (ceil, 64, double, __builtin_ceil)
+DEF_OP_V (ceil, 128, double, __builtin_ceil)
+DEF_OP_V (ceil, 256, double, __builtin_ceil)
+DEF_OP_V (ceil, 512, double, __builtin_ceil)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization
  2023-09-22  0:46   ` juzhe.zhong
@ 2023-09-22  0:50     ` Li, Pan2
  0 siblings, 0 replies; 15+ messages in thread
From: Li, Pan2 @ 2023-09-22  0:50 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 28383 bytes --]

Committed, thanks Juzhe.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Friday, September 22, 2023 8:47 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization

LGTM

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-09-22 08:12
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v4] RISC-V: Support ceil and ceilf auto-vectorization
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

Update in v4:

* Add test for _Float16.
* Remove unnecessary macro in def.h for test.

Original log:

This patch would like to support auto-vectorization for both the
ceil and ceilf of math.h. It depends on the -ffast-math option.

When we would like to call ceil/ceilf like v2 = ceil (v1), we will
convert it into below insn (reference the implementation of llvm).

* vfcvt.x.f v3, v1, RUP
* vfcvt.f.x v2, v3

However, the floating point value may not need the cvt as above if
its mantissa is zero. For example single precision floating point below.

  +-----------+---------------+
  | float     | binary layout |
  +-----------+---------------+
  | 8388607.5 | 0x4affffff    |
  | 8388608.0 | 0x4b000000    |
  | 8388609.0 | 0x4b000001    |
  +-----------+---------------+

All single floating point great than 8388608.0 will have all zero mantisaa.
We leverage vmflt and mask to filter them out in vector and only do the
cvt on mask.

Befor this patch:
math-ceil-1.c:21:1: missed: couldn't vectorize loop
  ...
.L3:
  flw     fa0,0(s0)
  addi    s0,s0,4
  addi    s1,s1,4
  call    ceilf
  fsw     fa0,-4(s1)
  bne     s0,s2,.L3

After this patch:
  ...
  fsrmi   3
.L4:
  vfabs.v     v0,v1
  vmv1r.v     v2,v1
  vmflt.vv    v0,v0,v4
  sub         a3,a3,a4
  vfcvt.x.f.v v3,v1,v0.t
  vfcvt.f.x.v v2,v3,v0.t
  vfsgnj.vv   v2,v2,v1
  bne         .L4
.L14:
  fsrm    a6
  ret

Please note VLS mode is also involved in this patch and covered by the
test cases.

gcc/ChangeLog:

* config/riscv/autovec.md (ceil<mode>2): New pattern.
* config/riscv/riscv-protos.h (enum insn_flags): New enum type.
(enum insn_type): Ditto.
(expand_vec_ceil): New function decl.
* config/riscv/riscv-v.cc (gen_ceil_const_fp): New function impl.
(expand_vec_float_cmp_mask): Ditto.
(expand_vec_copysign): Ditto.
(expand_vec_ceil): Ditto.
* config/riscv/vector.md: Add VLS mode support.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/math-ceil-0.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-2.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-3.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-0.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-1.c: New test.
* gcc.target/riscv/rvv/autovec/math-ceil-run-2.c: New test.
* gcc.target/riscv/rvv/autovec/test-math.h: New test.
* gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/autovec.md                   |  16 +++
gcc/config/riscv/riscv-protos.h               |   5 +
gcc/config/riscv/riscv-v.cc                   | 133 ++++++++++++++++++
gcc/config/riscv/vector.md                    |   2 +-
.../riscv/rvv/autovec/math-ceil-0.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-1.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-2.c           |  26 ++++
.../riscv/rvv/autovec/math-ceil-3.c           |  28 ++++
.../riscv/rvv/autovec/math-ceil-run-0.c       |  39 +++++
.../riscv/rvv/autovec/math-ceil-run-1.c       |  39 +++++
.../riscv/rvv/autovec/math-ceil-run-2.c       |  39 +++++
.../gcc.target/riscv/rvv/autovec/test-math.h  |  38 +++++
.../riscv/rvv/autovec/vls/math-ceil-1.c       |  56 ++++++++
13 files changed, 472 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index f0f1abc4e82..1b4bd82f9ec 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2239,3 +2239,19 @@ (define_expand "<u>avg<v_double_trunc>3_ceil"
   riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
   DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Math.h.
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ceil/ceilf
+;; -------------------------------------------------------------------------
+(define_expand "ceil<mode>2"
+  [(match_operand:V_VLSF 0 "register_operand")
+   (match_operand:V_VLSF 1 "register_operand")]
+  "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
+  {
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 9ea0bcf15d3..07b4ffe3edf 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -250,6 +250,9 @@ enum insn_flags : unsigned int
   /* flags for the floating-point rounding mode.  */
   /* Means INSN has FRM operand and the value is FRM_DYN.  */
   FRM_DYN_P = 1 << 15,
+
+  /* Means INSN has FRM operand and the value is FRM_RUP.  */
+  FRM_RUP_P = 1 << 16,
};
enum insn_type : unsigned int
@@ -290,6 +293,7 @@ enum insn_type : unsigned int
   UNARY_OP_TAMA = __MASK_OP_TAMA | UNARY_OP_P,
   UNARY_OP_TAMU = __MASK_OP_TAMU | UNARY_OP_P,
   UNARY_OP_FRM_DYN = UNARY_OP | FRM_DYN_P,
+  UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   /* Binary operator.  */
   BINARY_OP = __NORMAL_OP | BINARY_OP_P,
@@ -432,6 +436,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4b9a494f8eb..f63dec573ef 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -323,6 +323,8 @@ public:
     /* Add rounding mode operand.  */
     if (m_insn_flags & FRM_DYN_P)
       add_rounding_mode_operand (FRM_DYN);
+    if (m_insn_flags & FRM_RUP_P)
+      add_rounding_mode_operand (FRM_RUP);
     gcc_assert (insn_data[(int) icode].n_operands == m_opno);
     expand (icode, any_mem_p);
@@ -3439,4 +3441,135 @@ cmp_lmul_gt_one (machine_mode mode)
   return false;
}
+/* We don't have to convert the floating point to integer when the
+   mantissa is zero.  Thus, ther will be a limitation for both the
+   single and double precision floating point.  There will be no
+   mantissa if the floating point is greater than the limit.
+
+   1. Half floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 1023.5    | 0x63ff        |
+      +-----------+---------------+
+      | 1024.0    | 0x6400        |
+      +-----------+---------------+
+      | 1025.0    | 0x6401        |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All half floating point will be unchanged for ceil if it is
+      greater than and equal to 1024.
+
+   2. Single floating point.
+      +-----------+---------------+
+      | float     | binary layout |
+      +-----------+---------------+
+      | 8388607.5 | 0x4affffff    |
+      +-----------+---------------+
+      | 8388608.0 | 0x4b000000    |
+      +-----------+---------------+
+      | 8388609.0 | 0x4b000001    |
+      +-----------+---------------+
+      | ...       | ...           |
+
+      All single floating point will be unchanged for ceil if it is
+      greater than and equal to 8388608.
+
+   3. Double floating point.
+      +--------------------+--------------------+
+      | float              | binary layout      |
+      +--------------------+--------------------+
+      | 4503599627370495.5 | 0X432fffffffffffff |
+      +--------------------+--------------------+
+      | 4503599627370496.0 | 0X4330000000000000 |
+      +--------------------+--------------------+
+      | 4503599627370497.0 | 0X4340000000000000 |
+      +--------------------+--------------------+
+      | ...                | ...                |
+
+      All double floating point will be unchanged for ceil if it is
+      greater than and equal to 4503599627370496.
+ */
+static rtx
+gen_ceil_const_fp (machine_mode inner_mode)
+{
+  REAL_VALUE_TYPE real;
+
+  if (inner_mode == E_HFmode)
+    real_from_integer (&real, inner_mode, 1024, SIGNED);
+  else if (inner_mode == E_SFmode)
+    real_from_integer (&real, inner_mode, 8388608, SIGNED);
+  else if (inner_mode == E_DFmode)
+    real_from_integer (&real, inner_mode, 4503599627370496, SIGNED);
+  else
+    gcc_unreachable ();
+
+  return const_double_from_real_value (real, inner_mode);
+}
+
+static rtx
+expand_vec_float_cmp_mask (rtx fp_vector, rtx_code code, rtx fp_scalar,
+    machine_mode vec_fp_mode)
+{
+  /* Step-1: Get the abs float value for mask generation.  */
+  rtx tmp = gen_reg_rtx (vec_fp_mode);
+  rtx abs_ops[] = {tmp, fp_vector};
+  insn_code icode = code_for_pred (ABS, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP, abs_ops);
+
+  /* Step-2: Prepare the scalar float compare register.  */
+  rtx fp_reg = gen_reg_rtx (GET_MODE_INNER (vec_fp_mode));
+  emit_insn (gen_move_insn (fp_reg, fp_scalar));
+
+  /* Step-3: Prepare the vector float compare register.  */
+  rtx vec_dup = gen_reg_rtx (vec_fp_mode);
+  icode = code_for_pred_broadcast (vec_fp_mode);
+  rtx vfmv_ops[] = {vec_dup, fp_reg};
+  emit_vlmax_insn (icode, UNARY_OP, vfmv_ops);
+
+  /* Step-4: Generate the mask.  */
+  machine_mode mask_mode = get_mask_mode (vec_fp_mode);
+  rtx mask = gen_reg_rtx (mask_mode);
+  expand_vec_cmp (mask, code, tmp, vec_dup);
+
+  return mask;
+}
+
+static void
+expand_vec_copysign (rtx op_dest, rtx op_src_0, rtx op_src_1,
+      machine_mode vec_mode)
+{
+  rtx sgnj_ops[] = {op_dest, op_src_0, op_src_1};
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, vec_mode);
+
+  emit_vlmax_insn (icode, BINARY_OP, sgnj_ops);
+}
+
+void
+expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
+ machine_mode vec_int_mode)
+{
+  /* Step-1: Generate the mask on const fp.  */
+  rtx const_fp = gen_ceil_const_fp (GET_MODE_INNER (vec_fp_mode));
+  rtx mask = expand_vec_float_cmp_mask (op_1, LT, const_fp, vec_fp_mode);
+
+  /* Step-2: Convert to integer on mask, with rounding up (aka ceil).  */
+  rtx tmp = gen_reg_rtx (vec_int_mode);
+  rtx cvt_x_ops[] = {tmp, mask, tmp, op_1};
+  insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_x_ops);
+
+  /* Step-3: Convert to floating-point on mask for the final result.
+     To avoid unnecessary frm register access, we use RUP here and it will
+     never do the rounding up because the tmp rtx comes from the float
+     to int conversion.  */
+  rtx cvt_fp_ops[] = {op_0, mask, op_1, tmp};
+  icode = code_for_pred (FLOAT, vec_fp_mode);
+  emit_vlmax_insn (icode, UNARY_OP_TAMU_FRM_RUP, cvt_fp_ops);
+
+  /* Step-4: Retrieve the sign bit.  */
+  expand_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 36f0256c747..73f90dea36b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7438,7 +7438,7 @@ (define_insn "@pred_fcvt_x<v_su>_f<mode>"
     (reg:SI VTYPE_REGNUM)
     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
  (unspec:<VCONVERT>
-      [(match_operand:VF 3 "register_operand"         " vr, vr, vr, vr")] VFCVTS)
+      [(match_operand:V_VLSF 3 "register_operand"     " vr, vr, vr, vr")] VFCVTS)
  (match_operand:<VCONVERT> 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfcvt.x<v_su>.f.v\t%0,%3%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
new file mode 100644
index 00000000000..88a2ac4b338
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-0.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test__Float16___builtin_ceilf16:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e16,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(_Float16, __builtin_ceilf16)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
new file mode 100644
index 00000000000..0908ef269bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
new file mode 100644
index 00000000000..65d4807edef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_double___builtin_ceil:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_CEIL(double, __builtin_ceil)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
new file mode 100644
index 00000000000..416698a753e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "test-math.h"
+
+/*
+** test_float___builtin_ceilf:
+**   frrm\s+[atx][0-9]+
+**   ...
+**   fsrmi\s+3
+**   ...
+**   vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*mu
+**   vfabs\.v\s+v[0-9]+,\s*v[0-9]+
+**   ...
+**   vmflt\.vv\s+v0,\s*v[0-9]+,\s*v[0-9]+
+**   ...
+**   vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   ...
+**   vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t
+**   vfsgnj\.vv\s+v[0-9]+,v[0-9]+,v[0-9]+
+**   ...
+**   vmerge\.vvm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0
+**   ...
+**   fsrm\s+[atx][0-9]+
+**   ...
+*/
+TEST_COND_CEIL(float, __builtin_ceilf)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
new file mode 100644
index 00000000000..f1946e197cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-0.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv_zvfh -std=c2x -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+_Float16 in[ARRAY_SIZE];
+_Float16 out[ARRAY_SIZE];
+_Float16 ref[ARRAY_SIZE];
+
+TEST_CEIL (_Float16, __builtin_ceilf16)
+TEST_ASSERT (_Float16)
+
+TEST_INIT (_Float16, 1.2, 2.0, 1)
+TEST_INIT (_Float16, -1.2, -1.0, 2)
+TEST_INIT (_Float16, 3.0, 3.0, 3)
+TEST_INIT (_Float16, 1023.5, 1024.0, 4)
+TEST_INIT (_Float16, 1025.0, 1025.0, 5)
+TEST_INIT (_Float16, 0.0, 0.0, 6)
+TEST_INIT (_Float16, -0.0, -0.0, 7)
+TEST_INIT (_Float16, -1023.5, -1023.0, 8)
+TEST_INIT (_Float16, -1024.0, -1024.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (_Float16, 1, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 2, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 3, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 4, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 5, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 6, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 7, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 8, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (_Float16, 9, __builtin_ceilf16, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
new file mode 100644
index 00000000000..202944ddd92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+float out[ARRAY_SIZE];
+float ref[ARRAY_SIZE];
+
+TEST_CEIL (float, __builtin_ceilf)
+TEST_ASSERT (float)
+
+TEST_INIT (float, 1.2, 2.0, 1)
+TEST_INIT (float, -1.2, -1.0, 2)
+TEST_INIT (float, 3.0, 3.0, 3)
+TEST_INIT (float, 8388607.5, 8388608.0, 4)
+TEST_INIT (float, 8388609.0, 8388609.0, 5)
+TEST_INIT (float, 0.0, 0.0, 6)
+TEST_INIT (float, -0.0, -0.0, 7)
+TEST_INIT (float, -8388607.5, -8388607.0, 8)
+TEST_INIT (float, -8388608.0, -8388608.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (float, 1, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 2, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 3, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 4, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 5, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 6, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 7, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 8, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (float, 9, __builtin_ceilf, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
new file mode 100644
index 00000000000..f0ff9bca0af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/math-ceil-run-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-march=rv64gcv -std=c99 -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+double in[ARRAY_SIZE];
+double out[ARRAY_SIZE];
+double ref[ARRAY_SIZE];
+
+TEST_CEIL (double, __builtin_ceil)
+TEST_ASSERT (double)
+
+TEST_INIT (double, 1.2, 2.0, 1)
+TEST_INIT (double, -1.2, -1.0, 2)
+TEST_INIT (double, 3.0, 3.0, 3)
+TEST_INIT (double, 4503599627370495.5, 4503599627370496.0, 4)
+TEST_INIT (double, 4503599627370497.0, 4503599627370497.0, 5)
+TEST_INIT (double, 0.0, 0.0, 6)
+TEST_INIT (double, -0.0, -0.0, 7)
+TEST_INIT (double, -4503599627370495.5, -4503599627370495.0, 8)
+TEST_INIT (double, -4503599627370496.0, -4503599627370496.0, 9)
+
+int
+main ()
+{
+  RUN_TEST (double, 1, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 2, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 3, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 4, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 5, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 6, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 7, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 8, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+  RUN_TEST (double, 9, __builtin_ceil, in, out, ref, ARRAY_SIZE);
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
new file mode 100644
index 00000000000..6e913da37f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/test-math.h
@@ -0,0 +1,38 @@
+#define TEST_CEIL(TYPE, CALL) \
+  void test_##TYPE##_##CALL (TYPE *out, TYPE *in, unsigned count) \
+  {                                                               \
+    for (unsigned i = 0; i < count; i++)                          \
+      out[i] = CALL (in[i]);                                      \
+  }
+
+#define TEST_COND_CEIL(TYPE, CALL)                                           \
+  void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
+  {                                                                          \
+    for (unsigned i = 0; i < count; i++)                                     \
+      out[i] = cond[i] ? CALL (in[i]) : in[i];                               \
+  }
+
+#define TEST_INIT(TYPE, VAL_IN, VAL_REF, NUM)                        \
+  void test_##TYPE##_init_##NUM (TYPE *in, TYPE *ref, unsigned size) \
+  {                                                                  \
+    for (unsigned i = 0; i < size; i++)                              \
+      {                                                              \
+ in[i] = VAL_IN;                                              \
+ ref[i] = VAL_REF;                                            \
+      }                                                              \
+  }
+
+#define TEST_ASSERT(TYPE)                                         \
+  void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
+  {                                                               \
+    for (unsigned i = 0; i < size; i++)                           \
+      {                                                           \
+ if (out[i] != ref[i])                                     \
+   __builtin_abort ();                                     \
+      }                                                           \
+  }
+
+#define RUN_TEST(TYPE, NUM, CALL, IN, OUT, REF, SIZE) \
+  test_##TYPE##_init_##NUM (IN, REF, SIZE);           \
+  test_##TYPE##_##CALL (OUT, IN, SIZE);               \
+  test_##TYPE##_assert (OUT, REF, SIZE);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
new file mode 100644
index 00000000000..b113df80c5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-ceil-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_V (ceilf16, 1, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 4, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 8, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 16, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 32, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 64, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 128, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 256, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 512, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 1024, _Float16, __builtin_ceilf16)
+DEF_OP_V (ceilf16, 2048, _Float16, __builtin_ceilf16)
+
+DEF_OP_V (ceilf, 1, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 2, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 4, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 8, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 16, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 32, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 64, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 128, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 256, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 512, float, __builtin_ceilf)
+DEF_OP_V (ceilf, 1024, float, __builtin_ceilf)
+
+DEF_OP_V (ceil, 1, double, __builtin_ceil)
+DEF_OP_V (ceil, 2, double, __builtin_ceil)
+DEF_OP_V (ceil, 4, double, __builtin_ceil)
+DEF_OP_V (ceil, 8, double, __builtin_ceil)
+DEF_OP_V (ceil, 16, double, __builtin_ceil)
+DEF_OP_V (ceil, 32, double, __builtin_ceil)
+DEF_OP_V (ceil, 64, double, __builtin_ceil)
+DEF_OP_V (ceil, 128, double, __builtin_ceil)
+DEF_OP_V (ceil, 256, double, __builtin_ceil)
+DEF_OP_V (ceil, 512, double, __builtin_ceil)
+
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
+/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
+/* { dg-final { scan-assembler-times {vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
--
2.34.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-09-22  0:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-20  2:30 [PATCH v1] RISC-V: Support ceil and ceilf auto-vectorization pan2.li
2023-09-20  2:35 ` juzhe.zhong
2023-09-20  2:44   ` Li, Pan2
2023-09-20  3:01     ` juzhe.zhong
2023-09-20  3:37     ` juzhe.zhong
2023-09-20  3:42       ` Li, Pan2
2023-09-21 10:32 ` [PATCH v2] " pan2.li
2023-09-21 11:05   ` juzhe.zhong
2023-09-21 11:30   ` juzhe.zhong
2023-09-21 11:56     ` Li, Pan2
2023-09-21 15:18 ` [PATCH v3] " pan2.li
2023-09-21 22:04   ` 钟居哲
2023-09-22  0:12 ` [PATCH v4] " pan2.li
2023-09-22  0:46   ` juzhe.zhong
2023-09-22  0:50     ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).