From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7928) id EC8943858CDA; Wed, 13 Sep 2023 10:35:17 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org EC8943858CDA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1694601317; bh=TXAcVJqhNlMmevBjMgLkwJSX++sVO+Rs4tp755ZNCrQ=; h=From:To:Subject:Date:From; b=o+KAW8UtQ18HLcEeQY0qMTDnhxlDxsmNENtrzPgVgFNB21jxpYJh9ybOo5M/0kkmp O+1OaO00OFiPucDtxyyuzdHTgzzCIfDehBhEC7zfNG3yFRt1unCdUveglzkJf8HpnA sQssU5/8p6omGPYa84/o1RieJCsjZp699TuQIJg8= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Lehua Ding To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-3917] RISC-V: Support cond vfsgnj.vv autovec patterns X-Act-Checkin: gcc X-Git-Author: Lehua Ding X-Git-Refname: refs/heads/trunk X-Git-Oldrev: 92ea12ea99fce546772a40b7bbc2ea850db9b1be X-Git-Newrev: 6737a51728881790b54e490494b468267f04a608 Message-Id: <20230913103517.EC8943858CDA@sourceware.org> Date: Wed, 13 Sep 2023 10:35:17 +0000 (GMT) List-Id: https://gcc.gnu.org/g:6737a51728881790b54e490494b468267f04a608 commit r14-3917-g6737a51728881790b54e490494b468267f04a608 Author: Lehua Ding Date: Wed Sep 13 17:34:43 2023 +0800 RISC-V: Support cond vfsgnj.vv autovec patterns This patch add combine patterns to combine vfsgnj.vv + vcond_mask to mask vfsgnj.vv. For vfsgnjx.vv, it can not be produced in midend currently. We will send another patch to take this issue. gcc/ChangeLog: * config/riscv/autovec-opt.md (*copysign_neg): Move. (*cond_copysign): New combine pattern. * config/riscv/riscv-v.cc (needs_fp_rounding): Extend. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h: New test. * gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c: New test. Diff: --- gcc/config/riscv/autovec-opt.md | 69 ++++++++++----- gcc/config/riscv/riscv-v.cc | 4 +- .../riscv/rvv/autovec/cond/cond_copysign-run.c | 99 ++++++++++++++++++++++ .../riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c | 12 +++ .../riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c | 12 +++ .../rvv/autovec/cond/cond_copysign-template.h | 81 ++++++++++++++++++ .../rvv/autovec/cond/cond_copysign-zvfh-run.c | 93 ++++++++++++++++++++ 7 files changed, 350 insertions(+), 20 deletions(-) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index f1d058ce911b..94416244553c 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -609,6 +609,10 @@ (set_attr "mode" "") (set (attr "frm_mode") (symbol_ref "riscv_vector::FRM_DYN"))]) +;; ============================================================================= +;; Combine op + vmerge to cond_op +;; ============================================================================= + ;; Combine and vcond_mask generated by midend into cond_len_ ;; Currently supported operations: ;; abs(FP) @@ -652,25 +656,6 @@ } [(set_attr "type" "vector")]) -;; Combine vlmax neg and UNSPEC_VCOPYSIGN -(define_insn_and_split "*copysign_neg" - [(set (match_operand:VF 0 "register_operand") - (neg:VF - (unspec:VF [ - (match_operand:VF 1 "register_operand") - (match_operand:VF 2 "register_operand") - ] UNSPEC_VCOPYSIGN)))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" - [(const_int 0)] -{ - riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (mode), - riscv_vector::BINARY_OP, operands); - DONE; -} -[(set_attr "type" "vector")]) - ;; Combine sign_extend/zero_extend(vf2) and vcond_mask (define_insn_and_split "*cond_" [(set (match_operand:VWEXTI 0 "register_operand") @@ -919,6 +904,28 @@ } [(set_attr "type" "vector")]) +;; Combine vfsgnj.vv + vcond_mask +(define_insn_and_split "*cond_copysign" + [(set (match_operand:VF 0 "register_operand") + (if_then_else:VF + (match_operand: 1 "register_operand") + (unspec:VF + [(match_operand:VF 2 "register_operand") + (match_operand:VF 3 "register_operand")] UNSPEC_VCOPYSIGN) + (match_operand:VF 4 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + insn_code icode = code_for_pred (UNSPEC_VCOPYSIGN, mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], operands[4], + gen_int_mode (GET_MODE_NUNITS (mode), Pmode)}; + riscv_vector::expand_cond_len_binop (icode, ops); + DONE; +} +[(set_attr "type" "vector")]) + ;; ============================================================================= ;; Combine extend + binop to widen_binop ;; ============================================================================= @@ -1120,3 +1127,27 @@ DONE; } [(set_attr "type" "vfwmul")]) + + +;; ============================================================================= +;; Misc combine patterns +;; ============================================================================= + +;; Combine vlmax neg and UNSPEC_VCOPYSIGN +(define_insn_and_split "*copysign_neg" + [(set (match_operand:VF 0 "register_operand") + (neg:VF + (unspec:VF [ + (match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand") + ] UNSPEC_VCOPYSIGN)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + riscv_vector::emit_vlmax_insn (code_for_pred_ncopysign (mode), + riscv_vector::BINARY_OP, operands); + DONE; +} +[(set_attr "type" "vector")]) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4d95bd773a2c..76e6094f45b8 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2970,7 +2970,9 @@ needs_fp_rounding (unsigned icode, machine_mode mode) && icode != maybe_code_for_pred_extend (mode) /* narrower-INT -> FP */ && icode != maybe_code_for_pred_widen (FLOAT, mode) - && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode); + && icode != maybe_code_for_pred_widen (UNSIGNED_FLOAT, mode) + /* vfsgnj */ + && icode != maybe_code_for_pred (UNSPEC_VCOPYSIGN, mode); } /* Subroutine to expand COND_LEN_* patterns. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c new file mode 100644 index 000000000000..be37854c1352 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-run.c @@ -0,0 +1,99 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +#include + +#define SZ 512 + +#define EPS 1e-6 + +#define INIT_PRED() \ + int pred[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + pred[i] = i % 3; \ + } + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE[i] = i; \ + b##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN2(TYPE, VAL) \ + TYPE a2##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a2##TYPE[i] = i; \ + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); + +#define RUN3(TYPE, VAL) \ + TYPE a3##TYPE[SZ]; \ + TYPE b3##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a3##TYPE[i] = (i & 1) ? -i : i; \ + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); + +#define RUN4(TYPE, VAL) \ + TYPE a4##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a4##TYPE[i] = -i; \ + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); + +#define RUN5(TYPE, VAL) \ + TYPE a5##TYPE[SZ]; \ + TYPE b5##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a5##TYPE[i] = i; \ + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] \ + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN6(TYPE, VAL) \ + TYPE a6##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a6##TYPE[i] = i; \ + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); + +#define RUN_ALL() \ + RUN (float, 5) \ + RUN (double, 6) \ + RUN2 (float, 11) \ + RUN2 (double, 12) \ + RUN3 (float, 16) \ + RUN3 (double, 18) \ + RUN4 (float, 17) \ + RUN4 (double, 19) \ + RUN5 (float, 123) \ + RUN5 (double, 523) \ + RUN6 (float, 777) \ + RUN6 (double, 877) + +int +main () +{ + INIT_PRED () + RUN_ALL () +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c new file mode 100644 index 000000000000..cef531b97008 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv32gcv.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which + expand cannot handle currently. + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */ +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c new file mode 100644 index 000000000000..cc2aa4de7578 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-rv64gcv.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +/* { dg-final { scan-assembler-times {\tvfsgnj\.vv} 6 } } */ +/* 1. The vectorizer wraps scalar variants of copysign into vector constants which + expand cannot handle currently. + 2. match.pd convert .COPYSIGN (1, b) + COND_MUL to AND + XOR currently. */ +/* { dg-final { scan-assembler-times {\tvfsgnjx\.vv} 6 { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-times {\tvfsgnjn\.vv} 6 } } */ +/* { dg-final { scan-assembler-not {\tvmerge\.vvm} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h new file mode 100644 index 000000000000..4191500fd833 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-template.h @@ -0,0 +1,81 @@ +#include + +#define TEST_TYPE(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void copysign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \ + } + +#define TEST_TYPE2(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void copysigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? __builtin_copysign##SUFFIX (a[i], b) : dst[i]; \ + } + +#define TEST_TYPE3(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void xorsign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] \ + = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b[i]) : dst[i]; \ + } + +#define TEST_TYPE4(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void xorsigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? a[i] * __builtin_copysign##SUFFIX (1.0, b) : dst[i]; \ + } + +#define TEST_TYPE5(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void ncopysign_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, \ + TYPE *restrict b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b[i]) : dst[i]; \ + } + +#define TEST_TYPE6(TYPE, SUFFIX) \ + __attribute__ ((noipa)) void ncopysigns_##TYPE (TYPE *restrict dst, \ + TYPE *restrict a, TYPE b, \ + int *restrict pred, int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = pred[i] ? -__builtin_copysign##SUFFIX (a[i], b) : dst[i]; \ + } + +#define TEST_ALL() \ + TEST_TYPE (_Float16, f16) \ + TEST_TYPE (float, f) \ + TEST_TYPE (double, ) \ + TEST_TYPE2 (_Float16, f16) \ + TEST_TYPE2 (float, f) \ + TEST_TYPE2 (double, ) \ + TEST_TYPE3 (_Float16, f16) \ + TEST_TYPE3 (float, f) \ + TEST_TYPE3 (double, ) \ + TEST_TYPE4 (_Float16, f16) \ + TEST_TYPE4 (float, f) \ + TEST_TYPE4 (double, ) \ + TEST_TYPE5 (_Float16, f16) \ + TEST_TYPE5 (float, f) \ + TEST_TYPE5 (double, ) \ + TEST_TYPE6 (_Float16, f16) \ + TEST_TYPE6 (float, f) \ + TEST_TYPE6 (double, ) + +TEST_ALL () diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c new file mode 100644 index 000000000000..6e337f9e74cf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_copysign-zvfh-run.c @@ -0,0 +1,93 @@ +/* { dg-do run { target { riscv_vector && riscv_zvfh_hw } } } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */ + +#include "cond_copysign-template.h" + +#include + +#define SZ 512 + +#define EPS 1e-6 + +#define INIT_PRED() \ + int pred[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + pred[i] = i % 3; \ + } + +#define RUN(TYPE, VAL) \ + TYPE a##TYPE[SZ]; \ + TYPE b##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a##TYPE[i] = i; \ + b##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + copysign_##TYPE (a##TYPE, a##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN2(TYPE, VAL) \ + TYPE a2##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a2##TYPE[i] = i; \ + copysigns_##TYPE (a2##TYPE, a2##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a2##TYPE[i] + i) < EPS); + +#define RUN3(TYPE, VAL) \ + TYPE a3##TYPE[SZ]; \ + TYPE b3##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a3##TYPE[i] = (i & 1) ? -i : i; \ + b3##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + xorsign_##TYPE (a3##TYPE, a3##TYPE, b3##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a3##TYPE[i] + i) < EPS); + +#define RUN4(TYPE, VAL) \ + TYPE a4##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a4##TYPE[i] = -i; \ + xorsigns_##TYPE (a4##TYPE, a4##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (a4##TYPE[i] - i) < EPS); + +#define RUN5(TYPE, VAL) \ + TYPE a5##TYPE[SZ]; \ + TYPE b5##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + { \ + a5##TYPE[i] = i; \ + b5##TYPE[i] = (i & 1) ? VAL : -VAL; \ + } \ + ncopysign_##TYPE (a5##TYPE, a5##TYPE, b##TYPE, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] \ + || __builtin_fabs (-a5##TYPE[i] - ((i & 1) ? i : -i)) < EPS); + +#define RUN6(TYPE, VAL) \ + TYPE a6##TYPE[SZ]; \ + for (int i = 0; i < SZ; i++) \ + a6##TYPE[i] = i; \ + ncopysigns_##TYPE (a6##TYPE, a6##TYPE, -VAL, pred, SZ); \ + for (int i = 0; i < SZ; i++) \ + assert (!pred[i] || __builtin_fabs (-a6##TYPE[i] + i) < EPS); + +#define RUN_ALL() \ + RUN (_Float16, 5) \ + RUN2 (_Float16, 11) \ + RUN3 (_Float16, 16) \ + RUN4 (_Float16, 17) \ + RUN5 (_Float16, 123) \ + RUN6 (_Float16, 777) + +int +main () +{ + INIT_PRED () + RUN_ALL () +}