public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern
@ 2023-09-12 16:25 Lehua Ding
  2023-09-13  8:49 ` Kito Cheng
  0 siblings, 1 reply; 3+ messages in thread
From: Lehua Ding @ 2023-09-12 16:25 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, rdapp.gcc, palmer, jeffreyalaw, lehua.ding

This patch add combine patterns to combine vfsgnj.vv + vcond_mask
to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
currently. We will send another patch to take this issue.

gcc/ChangeLog:

	* config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
	(*cond_copysign<mode>): New combine pattern.
	* config/riscv/riscv-v.cc (needs_fp_rounding): Extend.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
	* gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.

---
 gcc/config/riscv/autovec-opt.md               | 68 +++++++++----
 gcc/config/riscv/riscv-v.cc                   |  4 +-
 .../rvv/autovec/cond/cond_copysign-run.c      | 99 +++++++++++++++++++
 .../rvv/autovec/cond/cond_copysign-rv32gcv.c  | 12 +++
 .../rvv/autovec/cond/cond_copysign-rv64gcv.c  | 12 +++
 .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
 .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
 7 files changed, 349 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 58e80044f1e..f759525f96b 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -609,6 +609,10 @@
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
 
+;; =============================================================================
+;; Combine op + vmerge to cond_op
+;; =============================================================================
+
 ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
 ;; Currently supported operations:
 ;;   abs(FP)
@@ -651,25 +655,6 @@
   DONE;
 })
 
-;; Combine vlmax neg and UNSPEC_VCOPYSIGN
-(define_insn_and_split "*copysign<mode>_neg"
-  [(set (match_operand:VF 0 "register_operand")
-        (neg:VF
-          (unspec:VF [
-            (match_operand:VF 1 "register_operand")
-            (match_operand:VF 2 "register_operand")
-          ] UNSPEC_VCOPYSIGN)))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
-                                  riscv_vector::BINARY_OP, operands);
-  DONE;
-}
-[(set_attr "type" "vector")])
-
 ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
 (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
   [(set (match_operand:VWEXTI 0 "register_operand")
@@ -918,6 +903,27 @@
 }
 [(set_attr "type" "vector")])
 
+;; Combine vfsgnj.vv + vcond_mask
+(define_insn_and_split "*cond_copysign<mode>"
+   [(set (match_operand:VF 0 "register_operand")
+    (if_then_else:VF
+      (match_operand:<VM> 1 "register_operand")
+      (unspec:VF
+       [(match_operand:VF 2 "register_operand")
+        (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
+      (match_operand:VF 4 "register_operand")))]
+   "TARGET_VECTOR && can_create_pseudo_p ()"
+   "#"
+   "&& 1"
+   [(const_int 0)]
+{
+  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
+  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_cond_len_binop (icode, ops);
+   DONE;
+})
+
 ;; =============================================================================
 ;; Combine extend + binop to widen_binop
 ;; =============================================================================
@@ -1119,3 +1125,27 @@
   DONE;
 }
 [(set_attr "type" "vfwmul")])
+
+
+;; =============================================================================
+;; Misc combine patterns
+;; =============================================================================
+
+;; Combine vlmax neg and UNSPEC_VCOPYSIGN
+(define_insn_and_split "*copysign<mode>_neg"
+  [(set (match_operand:VF 0 "register_operand")
+        (neg:VF
+          (unspec:VF [
+            (match_operand:VF 1 "register_operand")
+            (match_operand:VF 2 "register_operand")
+          ] UNSPEC_VCOPYSIGN)))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
+                                  riscv_vector::BINARY_OP, operands);
+  DONE;
+}
+[(set_attr "type" "vector")])
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 4d95bd773a2..76e6094f45b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
 	 && icode != maybe_code_for_pred_extend (mode)
 	 /* narrower-INT -> FP */
 	 && icode != maybe_code_for_pred_widen (FLOAT, mode)
-	 && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
+	 && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
+	 /* vfsgnj */
+	 && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
 }
 
 /* Subroutine to expand COND_LEN_* patterns.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
new file mode 100644
index 00000000000..be37854c135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
@@ -0,0 +1,99 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define EPS 1e-6
+
+#define INIT_PRED()                                                            \
+  int pred[SZ];                                                                \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      pred[i] = i % 3;                                                         \
+    }
+
+#define RUN(TYPE, VAL)                                                         \
+  TYPE a##TYPE[SZ];                                                            \
+  TYPE b##TYPE[SZ];                                                            \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE[i] = i;                                                          \
+      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
+    }                                                                          \
+  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN2(TYPE, VAL)                                                        \
+  TYPE a2##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a2##TYPE[i] = i;                                                           \
+  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
+
+#define RUN3(TYPE, VAL)                                                        \
+  TYPE a3##TYPE[SZ];                                                           \
+  TYPE b3##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
+      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
+    }                                                                          \
+  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
+
+#define RUN4(TYPE, VAL)                                                        \
+  TYPE a4##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a4##TYPE[i] = -i;                                                          \
+  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
+
+#define RUN5(TYPE, VAL)                                                        \
+  TYPE a5##TYPE[SZ];                                                           \
+  TYPE b5##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a5##TYPE[i] = i;                                                         \
+      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
+    }                                                                          \
+  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i]                                                           \
+	    || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN6(TYPE, VAL)                                                        \
+  TYPE a6##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a6##TYPE[i] = i;                                                           \
+  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
+
+#define RUN_ALL()                                                              \
+  RUN (float, 5)                                                               \
+  RUN (double, 6)                                                              \
+  RUN2 (float, 11)                                                             \
+  RUN2 (double, 12)                                                            \
+  RUN3 (float, 16)                                                             \
+  RUN3 (double, 18)                                                            \
+  RUN4 (float, 17)                                                             \
+  RUN4 (double, 19)                                                            \
+  RUN5 (float, 123)                                                            \
+  RUN5 (double, 523)                                                           \
+  RUN6 (float, 777)                                                            \
+  RUN6 (double, 877)
+
+int
+main ()
+{
+  INIT_PRED ()
+  RUN_ALL ()
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
new file mode 100644
index 00000000000..cef531b9700
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
+      expand cannot handle currently.
+   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
+/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
new file mode 100644
index 00000000000..cc2aa4de757
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
+/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
+      expand cannot handle currently.
+   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
+/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
+/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
+/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
new file mode 100644
index 00000000000..4191500fd83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
@@ -0,0 +1,81 @@
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE, SUFFIX)                                                \
+  __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst,            \
+						TYPE *restrict a,              \
+						TYPE *restrict b,              \
+						int *restrict pred, int n)     \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];     \
+  }
+
+#define TEST_TYPE2(TYPE, SUFFIX)                                               \
+  __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst,           \
+						 TYPE *restrict a, TYPE b,     \
+						 int *restrict pred, int n)    \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i];        \
+  }
+
+#define TEST_TYPE3(TYPE, SUFFIX)                                               \
+  __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst,             \
+					       TYPE *restrict a,               \
+					       TYPE *restrict b,               \
+					       int *restrict pred, int n)      \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i]                                                                   \
+	= pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i];    \
+  }
+
+#define TEST_TYPE4(TYPE, SUFFIX)                                               \
+  __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst,            \
+						TYPE *restrict a, TYPE b,      \
+						int *restrict pred, int n)     \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i];  \
+  }
+
+#define TEST_TYPE5(TYPE, SUFFIX)                                               \
+  __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst,           \
+						 TYPE *restrict a,             \
+						 TYPE *restrict b,             \
+						 int *restrict pred, int n)    \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];    \
+  }
+
+#define TEST_TYPE6(TYPE, SUFFIX)                                               \
+  __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst,          \
+						  TYPE *restrict a, TYPE b,    \
+						  int *restrict pred, int n)   \
+  {                                                                            \
+    for (int i = 0; i < n; i++)                                                \
+      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i];       \
+  }
+
+#define TEST_ALL()                                                             \
+  TEST_TYPE (_Float16, f16)                                                    \
+  TEST_TYPE (float, f)                                                         \
+  TEST_TYPE (double, )                                                         \
+  TEST_TYPE2 (_Float16, f16)                                                   \
+  TEST_TYPE2 (float, f)                                                        \
+  TEST_TYPE2 (double, )                                                        \
+  TEST_TYPE3 (_Float16, f16)                                                   \
+  TEST_TYPE3 (float, f)                                                        \
+  TEST_TYPE3 (double, )                                                        \
+  TEST_TYPE4 (_Float16, f16)                                                   \
+  TEST_TYPE4 (float, f)                                                        \
+  TEST_TYPE4 (double, )                                                        \
+  TEST_TYPE5 (_Float16, f16)                                                   \
+  TEST_TYPE5 (float, f)                                                        \
+  TEST_TYPE5 (double, )                                                        \
+  TEST_TYPE6 (_Float16, f16)                                                   \
+  TEST_TYPE6 (float, f)                                                        \
+  TEST_TYPE6 (double, )
+
+TEST_ALL ()
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
new file mode 100644
index 00000000000..6e337f9e74c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
@@ -0,0 +1,93 @@
+/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+
+#include "cond_copysign-template.h"
+
+#include <assert.h>
+
+#define SZ 512
+
+#define EPS 1e-6
+
+#define INIT_PRED()                                                            \
+  int pred[SZ];                                                                \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      pred[i] = i % 3;                                                         \
+    }
+
+#define RUN(TYPE, VAL)                                                         \
+  TYPE a##TYPE[SZ];                                                            \
+  TYPE b##TYPE[SZ];                                                            \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a##TYPE[i] = i;                                                          \
+      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
+    }                                                                          \
+  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN2(TYPE, VAL)                                                        \
+  TYPE a2##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a2##TYPE[i] = i;                                                           \
+  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
+
+#define RUN3(TYPE, VAL)                                                        \
+  TYPE a3##TYPE[SZ];                                                           \
+  TYPE b3##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
+      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
+    }                                                                          \
+  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
+
+#define RUN4(TYPE, VAL)                                                        \
+  TYPE a4##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a4##TYPE[i] = -i;                                                          \
+  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
+
+#define RUN5(TYPE, VAL)                                                        \
+  TYPE a5##TYPE[SZ];                                                           \
+  TYPE b5##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    {                                                                          \
+      a5##TYPE[i] = i;                                                         \
+      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
+    }                                                                          \
+  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i]                                                           \
+	    || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
+
+#define RUN6(TYPE, VAL)                                                        \
+  TYPE a6##TYPE[SZ];                                                           \
+  for (int i = 0; i < SZ; i++)                                                 \
+    a6##TYPE[i] = i;                                                           \
+  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
+  for (int i = 0; i < SZ; i++)                                                 \
+    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
+
+#define RUN_ALL()                                                              \
+  RUN (_Float16, 5)                                                            \
+  RUN2 (_Float16, 11)                                                          \
+  RUN3 (_Float16, 16)                                                          \
+  RUN4 (_Float16, 17)                                                          \
+  RUN5 (_Float16, 123)                                                         \
+  RUN6 (_Float16, 777)
+
+int
+main ()
+{
+  INIT_PRED ()
+  RUN_ALL ()
+}
-- 
2.36.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern
  2023-09-12 16:25 [PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern Lehua Ding
@ 2023-09-13  8:49 ` Kito Cheng
  2023-09-13 10:35   ` Lehua Ding
  0 siblings, 1 reply; 3+ messages in thread
From: Kito Cheng @ 2023-09-13  8:49 UTC (permalink / raw)
  To: Lehua Ding; +Cc: gcc-patches, juzhe.zhong, rdapp.gcc, palmer, jeffreyalaw

LGTM

On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.ding@rivai.ai> wrote:
>
> This patch add combine patterns to combine vfsgnj.vv + vcond_mask
> to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
> currently. We will send another patch to take this issue.
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
>         (*cond_copysign<mode>): New combine pattern.
>         * config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
>         * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.
>
> ---
>  gcc/config/riscv/autovec-opt.md               | 68 +++++++++----
>  gcc/config/riscv/riscv-v.cc                   |  4 +-
>  .../rvv/autovec/cond/cond_copysign-run.c      | 99 +++++++++++++++++++
>  .../rvv/autovec/cond/cond_copysign-rv32gcv.c  | 12 +++
>  .../rvv/autovec/cond/cond_copysign-rv64gcv.c  | 12 +++
>  .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
>  .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
>  7 files changed, 349 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
> index 58e80044f1e..f759525f96b 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -609,6 +609,10 @@
>     (set_attr "mode" "<V_DOUBLE_TRUNC>")
>     (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
>
> +;; =============================================================================
> +;; Combine op + vmerge to cond_op
> +;; =============================================================================
> +
>  ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
>  ;; Currently supported operations:
>  ;;   abs(FP)
> @@ -651,25 +655,6 @@
>    DONE;
>  })
>
> -;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> -(define_insn_and_split "*copysign<mode>_neg"
> -  [(set (match_operand:VF 0 "register_operand")
> -        (neg:VF
> -          (unspec:VF [
> -            (match_operand:VF 1 "register_operand")
> -            (match_operand:VF 2 "register_operand")
> -          ] UNSPEC_VCOPYSIGN)))]
> -  "TARGET_VECTOR && can_create_pseudo_p ()"
> -  "#"
> -  "&& 1"
> -  [(const_int 0)]
> -{
> -  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> -                                  riscv_vector::BINARY_OP, operands);
> -  DONE;
> -}
> -[(set_attr "type" "vector")])
> -
>  ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
>  (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
>    [(set (match_operand:VWEXTI 0 "register_operand")
> @@ -918,6 +903,27 @@
>  }
>  [(set_attr "type" "vector")])
>
> +;; Combine vfsgnj.vv + vcond_mask
> +(define_insn_and_split "*cond_copysign<mode>"
> +   [(set (match_operand:VF 0 "register_operand")
> +    (if_then_else:VF
> +      (match_operand:<VM> 1 "register_operand")
> +      (unspec:VF
> +       [(match_operand:VF 2 "register_operand")
> +        (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
> +      (match_operand:VF 4 "register_operand")))]
> +   "TARGET_VECTOR && can_create_pseudo_p ()"
> +   "#"
> +   "&& 1"
> +   [(const_int 0)]
> +{
> +  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
> +  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
> +               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
> +  riscv_vector::expand_cond_len_binop (icode, ops);
> +   DONE;
> +})
> +
>  ;; =============================================================================
>  ;; Combine extend + binop to widen_binop
>  ;; =============================================================================
> @@ -1119,3 +1125,27 @@
>    DONE;
>  }
>  [(set_attr "type" "vfwmul")])
> +
> +
> +;; =============================================================================
> +;; Misc combine patterns
> +;; =============================================================================
> +
> +;; Combine vlmax neg and UNSPEC_VCOPYSIGN
> +(define_insn_and_split "*copysign<mode>_neg"
> +  [(set (match_operand:VF 0 "register_operand")
> +        (neg:VF
> +          (unspec:VF [
> +            (match_operand:VF 1 "register_operand")
> +            (match_operand:VF 2 "register_operand")
> +          ] UNSPEC_VCOPYSIGN)))]
> +  "TARGET_VECTOR && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
> +                                  riscv_vector::BINARY_OP, operands);
> +  DONE;
> +}
> +[(set_attr "type" "vector")])
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 4d95bd773a2..76e6094f45b 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
>          && icode != maybe_code_for_pred_extend (mode)
>          /* narrower-INT -> FP */
>          && icode != maybe_code_for_pred_widen (FLOAT, mode)
> -        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
> +        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
> +        /* vfsgnj */
> +        && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
>  }
>
>  /* Subroutine to expand COND_LEN_* patterns.  */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> new file mode 100644
> index 00000000000..be37854c135
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
> @@ -0,0 +1,99 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED()                                                            \
> +  int pred[SZ];                                                                \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      pred[i] = i % 3;                                                         \
> +    }
> +
> +#define RUN(TYPE, VAL)                                                         \
> +  TYPE a##TYPE[SZ];                                                            \
> +  TYPE b##TYPE[SZ];                                                            \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a##TYPE[i] = i;                                                          \
> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
> +    }                                                                          \
> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN2(TYPE, VAL)                                                        \
> +  TYPE a2##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a2##TYPE[i] = i;                                                           \
> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL)                                                        \
> +  TYPE a3##TYPE[SZ];                                                           \
> +  TYPE b3##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
> +    }                                                                          \
> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL)                                                        \
> +  TYPE a4##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a4##TYPE[i] = -i;                                                          \
> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL)                                                        \
> +  TYPE a5##TYPE[SZ];                                                           \
> +  TYPE b5##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a5##TYPE[i] = i;                                                         \
> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
> +    }                                                                          \
> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i]                                                           \
> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL)                                                        \
> +  TYPE a6##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a6##TYPE[i] = i;                                                           \
> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL()                                                              \
> +  RUN (float, 5)                                                               \
> +  RUN (double, 6)                                                              \
> +  RUN2 (float, 11)                                                             \
> +  RUN2 (double, 12)                                                            \
> +  RUN3 (float, 16)                                                             \
> +  RUN3 (double, 18)                                                            \
> +  RUN4 (float, 17)                                                             \
> +  RUN4 (double, 19)                                                            \
> +  RUN5 (float, 123)                                                            \
> +  RUN5 (double, 523)                                                           \
> +  RUN6 (float, 777)                                                            \
> +  RUN6 (double, 877)
> +
> +int
> +main ()
> +{
> +  INIT_PRED ()
> +  RUN_ALL ()
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> new file mode 100644
> index 00000000000..cef531b9700
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
> +      expand cannot handle currently.
> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> new file mode 100644
> index 00000000000..cc2aa4de757
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
> +      expand cannot handle currently.
> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> new file mode 100644
> index 00000000000..4191500fd83
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
> @@ -0,0 +1,81 @@
> +#include <stdint-gcc.h>
> +
> +#define TEST_TYPE(TYPE, SUFFIX)                                                \
> +  __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst,            \
> +                                               TYPE *restrict a,              \
> +                                               TYPE *restrict b,              \
> +                                               int *restrict pred, int n)     \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];     \
> +  }
> +
> +#define TEST_TYPE2(TYPE, SUFFIX)                                               \
> +  __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst,           \
> +                                                TYPE *restrict a, TYPE b,     \
> +                                                int *restrict pred, int n)    \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i];        \
> +  }
> +
> +#define TEST_TYPE3(TYPE, SUFFIX)                                               \
> +  __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst,             \
> +                                              TYPE *restrict a,               \
> +                                              TYPE *restrict b,               \
> +                                              int *restrict pred, int n)      \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i]                                                                   \
> +       = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i];    \
> +  }
> +
> +#define TEST_TYPE4(TYPE, SUFFIX)                                               \
> +  __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst,            \
> +                                               TYPE *restrict a, TYPE b,      \
> +                                               int *restrict pred, int n)     \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i];  \
> +  }
> +
> +#define TEST_TYPE5(TYPE, SUFFIX)                                               \
> +  __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst,           \
> +                                                TYPE *restrict a,             \
> +                                                TYPE *restrict b,             \
> +                                                int *restrict pred, int n)    \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];    \
> +  }
> +
> +#define TEST_TYPE6(TYPE, SUFFIX)                                               \
> +  __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst,          \
> +                                                 TYPE *restrict a, TYPE b,    \
> +                                                 int *restrict pred, int n)   \
> +  {                                                                            \
> +    for (int i = 0; i < n; i++)                                                \
> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i];       \
> +  }
> +
> +#define TEST_ALL()                                                             \
> +  TEST_TYPE (_Float16, f16)                                                    \
> +  TEST_TYPE (float, f)                                                         \
> +  TEST_TYPE (double, )                                                         \
> +  TEST_TYPE2 (_Float16, f16)                                                   \
> +  TEST_TYPE2 (float, f)                                                        \
> +  TEST_TYPE2 (double, )                                                        \
> +  TEST_TYPE3 (_Float16, f16)                                                   \
> +  TEST_TYPE3 (float, f)                                                        \
> +  TEST_TYPE3 (double, )                                                        \
> +  TEST_TYPE4 (_Float16, f16)                                                   \
> +  TEST_TYPE4 (float, f)                                                        \
> +  TEST_TYPE4 (double, )                                                        \
> +  TEST_TYPE5 (_Float16, f16)                                                   \
> +  TEST_TYPE5 (float, f)                                                        \
> +  TEST_TYPE5 (double, )                                                        \
> +  TEST_TYPE6 (_Float16, f16)                                                   \
> +  TEST_TYPE6 (float, f)                                                        \
> +  TEST_TYPE6 (double, )
> +
> +TEST_ALL ()
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> new file mode 100644
> index 00000000000..6e337f9e74c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
> @@ -0,0 +1,93 @@
> +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
> +
> +#include "cond_copysign-template.h"
> +
> +#include <assert.h>
> +
> +#define SZ 512
> +
> +#define EPS 1e-6
> +
> +#define INIT_PRED()                                                            \
> +  int pred[SZ];                                                                \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      pred[i] = i % 3;                                                         \
> +    }
> +
> +#define RUN(TYPE, VAL)                                                         \
> +  TYPE a##TYPE[SZ];                                                            \
> +  TYPE b##TYPE[SZ];                                                            \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a##TYPE[i] = i;                                                          \
> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
> +    }                                                                          \
> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN2(TYPE, VAL)                                                        \
> +  TYPE a2##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a2##TYPE[i] = i;                                                           \
> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
> +
> +#define RUN3(TYPE, VAL)                                                        \
> +  TYPE a3##TYPE[SZ];                                                           \
> +  TYPE b3##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
> +    }                                                                          \
> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
> +
> +#define RUN4(TYPE, VAL)                                                        \
> +  TYPE a4##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a4##TYPE[i] = -i;                                                          \
> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
> +
> +#define RUN5(TYPE, VAL)                                                        \
> +  TYPE a5##TYPE[SZ];                                                           \
> +  TYPE b5##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    {                                                                          \
> +      a5##TYPE[i] = i;                                                         \
> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
> +    }                                                                          \
> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i]                                                           \
> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
> +
> +#define RUN6(TYPE, VAL)                                                        \
> +  TYPE a6##TYPE[SZ];                                                           \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    a6##TYPE[i] = i;                                                           \
> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
> +  for (int i = 0; i < SZ; i++)                                                 \
> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
> +
> +#define RUN_ALL()                                                              \
> +  RUN (_Float16, 5)                                                            \
> +  RUN2 (_Float16, 11)                                                          \
> +  RUN3 (_Float16, 16)                                                          \
> +  RUN4 (_Float16, 17)                                                          \
> +  RUN5 (_Float16, 123)                                                         \
> +  RUN6 (_Float16, 777)
> +
> +int
> +main ()
> +{
> +  INIT_PRED ()
> +  RUN_ALL ()
> +}
> --
> 2.36.3
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern
  2023-09-13  8:49 ` Kito Cheng
@ 2023-09-13 10:35   ` Lehua Ding
  0 siblings, 0 replies; 3+ messages in thread
From: Lehua Ding @ 2023-09-13 10:35 UTC (permalink / raw)
  To: Kito Cheng; +Cc: gcc-patches, juzhe.zhong, rdapp.gcc, palmer, jeffreyalaw

Committed, thanks Kito.

On 2023/9/13 16:49, Kito Cheng wrote:
> LGTM
> 
> On Wed, Sep 13, 2023 at 12:25 AM Lehua Ding <lehua.ding@rivai.ai> wrote:
>>
>> This patch add combine patterns to combine vfsgnj.vv + vcond_mask
>> to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend
>> currently. We will send another patch to take this issue.
>>
>> gcc/ChangeLog:
>>
>>          * config/riscv/autovec-opt.md (*copysign<mode>_neg): Move.
>>          (*cond_copysign<mode>): New combine pattern.
>>          * config/riscv/riscv-v.cc (needs_fp_rounding): Extend.
>>
>> gcc/testsuite/ChangeLog:
>>
>>          * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test.
>>          * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test.
>>          * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test.
>>          * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test.
>>          * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test.
>>
>> ---
>>   gcc/config/riscv/autovec-opt.md               | 68 +++++++++----
>>   gcc/config/riscv/riscv-v.cc                   |  4 +-
>>   .../rvv/autovec/cond/cond_copysign-run.c      | 99 +++++++++++++++++++
>>   .../rvv/autovec/cond/cond_copysign-rv32gcv.c  | 12 +++
>>   .../rvv/autovec/cond/cond_copysign-rv64gcv.c  | 12 +++
>>   .../rvv/autovec/cond/cond_copysign-template.h | 81 +++++++++++++++
>>   .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 +++++++++++++++++
>>   7 files changed, 349 insertions(+), 20 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>>
>> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
>> index 58e80044f1e..f759525f96b 100644
>> --- a/gcc/config/riscv/autovec-opt.md
>> +++ b/gcc/config/riscv/autovec-opt.md
>> @@ -609,6 +609,10 @@
>>      (set_attr "mode" "<V_DOUBLE_TRUNC>")
>>      (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))])
>>
>> +;; =============================================================================
>> +;; Combine op + vmerge to cond_op
>> +;; =============================================================================
>> +
>>   ;; Combine <op> and vcond_mask generated by midend into cond_len_<op>
>>   ;; Currently supported operations:
>>   ;;   abs(FP)
>> @@ -651,25 +655,6 @@
>>     DONE;
>>   })
>>
>> -;; Combine vlmax neg and UNSPEC_VCOPYSIGN
>> -(define_insn_and_split "*copysign<mode>_neg"
>> -  [(set (match_operand:VF 0 "register_operand")
>> -        (neg:VF
>> -          (unspec:VF [
>> -            (match_operand:VF 1 "register_operand")
>> -            (match_operand:VF 2 "register_operand")
>> -          ] UNSPEC_VCOPYSIGN)))]
>> -  "TARGET_VECTOR && can_create_pseudo_p ()"
>> -  "#"
>> -  "&& 1"
>> -  [(const_int 0)]
>> -{
>> -  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
>> -                                  riscv_vector::BINARY_OP, operands);
>> -  DONE;
>> -}
>> -[(set_attr "type" "vector")])
>> -
>>   ;; Combine sign_extend/zero_extend(vf2) and vcond_mask
>>   (define_insn_and_split "*cond_<optab><v_double_trunc><mode>"
>>     [(set (match_operand:VWEXTI 0 "register_operand")
>> @@ -918,6 +903,27 @@
>>   }
>>   [(set_attr "type" "vector")])
>>
>> +;; Combine vfsgnj.vv + vcond_mask
>> +(define_insn_and_split "*cond_copysign<mode>"
>> +   [(set (match_operand:VF 0 "register_operand")
>> +    (if_then_else:VF
>> +      (match_operand:<VM> 1 "register_operand")
>> +      (unspec:VF
>> +       [(match_operand:VF 2 "register_operand")
>> +        (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN)
>> +      (match_operand:VF 4 "register_operand")))]
>> +   "TARGET_VECTOR && can_create_pseudo_p ()"
>> +   "#"
>> +   "&& 1"
>> +   [(const_int 0)]
>> +{
>> +  insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, <MODE>mode);
>> +  rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4],
>> +               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
>> +  riscv_vector::expand_cond_len_binop (icode, ops);
>> +   DONE;
>> +})
>> +
>>   ;; =============================================================================
>>   ;; Combine extend + binop to widen_binop
>>   ;; =============================================================================
>> @@ -1119,3 +1125,27 @@
>>     DONE;
>>   }
>>   [(set_attr "type" "vfwmul")])
>> +
>> +
>> +;; =============================================================================
>> +;; Misc combine patterns
>> +;; =============================================================================
>> +
>> +;; Combine vlmax neg and UNSPEC_VCOPYSIGN
>> +(define_insn_and_split "*copysign<mode>_neg"
>> +  [(set (match_operand:VF 0 "register_operand")
>> +        (neg:VF
>> +          (unspec:VF [
>> +            (match_operand:VF 1 "register_operand")
>> +            (match_operand:VF 2 "register_operand")
>> +          ] UNSPEC_VCOPYSIGN)))]
>> +  "TARGET_VECTOR && can_create_pseudo_p ()"
>> +  "#"
>> +  "&& 1"
>> +  [(const_int 0)]
>> +{
>> +  riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (<MODE>mode),
>> +                                  riscv_vector::BINARY_OP, operands);
>> +  DONE;
>> +}
>> +[(set_attr "type" "vector")])
>> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
>> index 4d95bd773a2..76e6094f45b 100644
>> --- a/gcc/config/riscv/riscv-v.cc
>> +++ b/gcc/config/riscv/riscv-v.cc
>> @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode)
>>           && icode != maybe_code_for_pred_extend (mode)
>>           /* narrower-INT -> FP */
>>           && icode != maybe_code_for_pred_widen (FLOAT, mode)
>> -        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode);
>> +        && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode)
>> +        /* vfsgnj */
>> +        && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode);
>>   }
>>
>>   /* Subroutine to expand COND_LEN_* patterns.  */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>> new file mode 100644
>> index 00000000000..be37854c135
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c
>> @@ -0,0 +1,99 @@
>> +/* { dg-do run { target { riscv_vector } } } */
>> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +#include <assert.h>
>> +
>> +#define SZ 512
>> +
>> +#define EPS 1e-6
>> +
>> +#define INIT_PRED()                                                            \
>> +  int pred[SZ];                                                                \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      pred[i] = i % 3;                                                         \
>> +    }
>> +
>> +#define RUN(TYPE, VAL)                                                         \
>> +  TYPE a##TYPE[SZ];                                                            \
>> +  TYPE b##TYPE[SZ];                                                            \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a##TYPE[i] = i;                                                          \
>> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
>> +    }                                                                          \
>> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN2(TYPE, VAL)                                                        \
>> +  TYPE a2##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a2##TYPE[i] = i;                                                           \
>> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
>> +
>> +#define RUN3(TYPE, VAL)                                                        \
>> +  TYPE a3##TYPE[SZ];                                                           \
>> +  TYPE b3##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
>> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
>> +    }                                                                          \
>> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
>> +
>> +#define RUN4(TYPE, VAL)                                                        \
>> +  TYPE a4##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a4##TYPE[i] = -i;                                                          \
>> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
>> +
>> +#define RUN5(TYPE, VAL)                                                        \
>> +  TYPE a5##TYPE[SZ];                                                           \
>> +  TYPE b5##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a5##TYPE[i] = i;                                                         \
>> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
>> +    }                                                                          \
>> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i]                                                           \
>> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN6(TYPE, VAL)                                                        \
>> +  TYPE a6##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a6##TYPE[i] = i;                                                           \
>> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
>> +
>> +#define RUN_ALL()                                                              \
>> +  RUN (float, 5)                                                               \
>> +  RUN (double, 6)                                                              \
>> +  RUN2 (float, 11)                                                             \
>> +  RUN2 (double, 12)                                                            \
>> +  RUN3 (float, 16)                                                             \
>> +  RUN3 (double, 18)                                                            \
>> +  RUN4 (float, 17)                                                             \
>> +  RUN4 (double, 19)                                                            \
>> +  RUN5 (float, 123)                                                            \
>> +  RUN5 (double, 523)                                                           \
>> +  RUN6 (float, 777)                                                            \
>> +  RUN6 (double, 877)
>> +
>> +int
>> +main ()
>> +{
>> +  INIT_PRED ()
>> +  RUN_ALL ()
>> +}
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>> new file mode 100644
>> index 00000000000..cef531b9700
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
>> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
>> +      expand cannot handle currently.
>> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
>> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>> new file mode 100644
>> index 00000000000..cc2aa4de757
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */
>> +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which
>> +      expand cannot handle currently.
>> +   2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently.  */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */
>> +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */
>> +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>> new file mode 100644
>> index 00000000000..4191500fd83
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h
>> @@ -0,0 +1,81 @@
>> +#include <stdint-gcc.h>
>> +
>> +#define TEST_TYPE(TYPE, SUFFIX)                                                \
>> +  __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst,            \
>> +                                               TYPE *restrict a,              \
>> +                                               TYPE *restrict b,              \
>> +                                               int *restrict pred, int n)     \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];     \
>> +  }
>> +
>> +#define TEST_TYPE2(TYPE, SUFFIX)                                               \
>> +  __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst,           \
>> +                                                TYPE *restrict a, TYPE b,     \
>> +                                                int *restrict pred, int n)    \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i];        \
>> +  }
>> +
>> +#define TEST_TYPE3(TYPE, SUFFIX)                                               \
>> +  __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst,             \
>> +                                              TYPE *restrict a,               \
>> +                                              TYPE *restrict b,               \
>> +                                              int *restrict pred, int n)      \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i]                                                                   \
>> +       = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i];    \
>> +  }
>> +
>> +#define TEST_TYPE4(TYPE, SUFFIX)                                               \
>> +  __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst,            \
>> +                                               TYPE *restrict a, TYPE b,      \
>> +                                               int *restrict pred, int n)     \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i];  \
>> +  }
>> +
>> +#define TEST_TYPE5(TYPE, SUFFIX)                                               \
>> +  __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst,           \
>> +                                                TYPE *restrict a,             \
>> +                                                TYPE *restrict b,             \
>> +                                                int *restrict pred, int n)    \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i];    \
>> +  }
>> +
>> +#define TEST_TYPE6(TYPE, SUFFIX)                                               \
>> +  __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst,          \
>> +                                                 TYPE *restrict a, TYPE b,    \
>> +                                                 int *restrict pred, int n)   \
>> +  {                                                                            \
>> +    for (int i = 0; i < n; i++)                                                \
>> +      dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i];       \
>> +  }
>> +
>> +#define TEST_ALL()                                                             \
>> +  TEST_TYPE (_Float16, f16)                                                    \
>> +  TEST_TYPE (float, f)                                                         \
>> +  TEST_TYPE (double, )                                                         \
>> +  TEST_TYPE2 (_Float16, f16)                                                   \
>> +  TEST_TYPE2 (float, f)                                                        \
>> +  TEST_TYPE2 (double, )                                                        \
>> +  TEST_TYPE3 (_Float16, f16)                                                   \
>> +  TEST_TYPE3 (float, f)                                                        \
>> +  TEST_TYPE3 (double, )                                                        \
>> +  TEST_TYPE4 (_Float16, f16)                                                   \
>> +  TEST_TYPE4 (float, f)                                                        \
>> +  TEST_TYPE4 (double, )                                                        \
>> +  TEST_TYPE5 (_Float16, f16)                                                   \
>> +  TEST_TYPE5 (float, f)                                                        \
>> +  TEST_TYPE5 (double, )                                                        \
>> +  TEST_TYPE6 (_Float16, f16)                                                   \
>> +  TEST_TYPE6 (float, f)                                                        \
>> +  TEST_TYPE6 (double, )
>> +
>> +TEST_ALL ()
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>> new file mode 100644
>> index 00000000000..6e337f9e74c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c
>> @@ -0,0 +1,93 @@
>> +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */
>> +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
>> +
>> +#include "cond_copysign-template.h"
>> +
>> +#include <assert.h>
>> +
>> +#define SZ 512
>> +
>> +#define EPS 1e-6
>> +
>> +#define INIT_PRED()                                                            \
>> +  int pred[SZ];                                                                \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      pred[i] = i % 3;                                                         \
>> +    }
>> +
>> +#define RUN(TYPE, VAL)                                                         \
>> +  TYPE a##TYPE[SZ];                                                            \
>> +  TYPE b##TYPE[SZ];                                                            \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a##TYPE[i] = i;                                                          \
>> +      b##TYPE[i] = (i & 1) ? VAL : -VAL;                                       \
>> +    }                                                                          \
>> +  copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ);                       \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN2(TYPE, VAL)                                                        \
>> +  TYPE a2##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a2##TYPE[i] = i;                                                           \
>> +  copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ);                       \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS);
>> +
>> +#define RUN3(TYPE, VAL)                                                        \
>> +  TYPE a3##TYPE[SZ];                                                           \
>> +  TYPE b3##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a3##TYPE[i] = (i & 1) ? -i : i;                                          \
>> +      b3##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
>> +    }                                                                          \
>> +  xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ);                     \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS);
>> +
>> +#define RUN4(TYPE, VAL)                                                        \
>> +  TYPE a4##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a4##TYPE[i] = -i;                                                          \
>> +  xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ);                        \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS);
>> +
>> +#define RUN5(TYPE, VAL)                                                        \
>> +  TYPE a5##TYPE[SZ];                                                           \
>> +  TYPE b5##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    {                                                                          \
>> +      a5##TYPE[i] = i;                                                         \
>> +      b5##TYPE[i] = (i & 1) ? VAL : -VAL;                                      \
>> +    }                                                                          \
>> +  ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ);                    \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i]                                                           \
>> +           || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS);
>> +
>> +#define RUN6(TYPE, VAL)                                                        \
>> +  TYPE a6##TYPE[SZ];                                                           \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    a6##TYPE[i] = i;                                                           \
>> +  ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ);                      \
>> +  for (int i = 0; i < SZ; i++)                                                 \
>> +    assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS);
>> +
>> +#define RUN_ALL()                                                              \
>> +  RUN (_Float16, 5)                                                            \
>> +  RUN2 (_Float16, 11)                                                          \
>> +  RUN3 (_Float16, 16)                                                          \
>> +  RUN4 (_Float16, 17)                                                          \
>> +  RUN5 (_Float16, 123)                                                         \
>> +  RUN6 (_Float16, 777)
>> +
>> +int
>> +main ()
>> +{
>> +  INIT_PRED ()
>> +  RUN_ALL ()
>> +}
>> --
>> 2.36.3
>>
> 

-- 
Best,
Lehua

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-13 10:36 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-12 16:25 [PATCH] RISC-V: Support cond vfsgnj.vv autovec pattern Lehua Ding
2023-09-13  8:49 ` Kito Cheng
2023-09-13 10:35   ` Lehua Ding

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).