From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2119) id 20BE538582BC; Fri, 14 Jul 2023 02:47:25 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 20BE538582BC DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1689302845; bh=/tMjw8YLRB7EvhJxfCEe4aR61WbTGSYAZSy8dksPAj4=; h=From:To:Subject:Date:From; b=AVs5CXHjf4QLaDidsgy3POWB+XQuUZf9hc+Rnyg3MzqO/VtdQaQWDvrsnV4V7v43Y 2WipbptAOMyF5tjDAjroaBkFEGSoAHLoVZJ1GBfy9hGyuPM4q/72eo/x/afURdRhfp RYv8/yT07TqQwuQvM4r13HhTP9KAGZ4brixnB3uc= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Jeff Law To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] RISC-V: Bugfix for RVV float reduction in ZVE32/64 X-Act-Checkin: gcc X-Git-Author: Pan Li X-Git-Refname: refs/vendors/riscv/heads/gcc-13-with-riscv-opts X-Git-Oldrev: 81e1062e35b9bcc79561b3645a1f97b2e801c1a7 X-Git-Newrev: 1cc12147a93e6689dbe4ae8aa575ff4a3f4ca6b7 Message-Id: <20230714024725.20BE538582BC@sourceware.org> Date: Fri, 14 Jul 2023 02:47:25 +0000 (GMT) List-Id: https://gcc.gnu.org/g:1cc12147a93e6689dbe4ae8aa575ff4a3f4ca6b7 commit 1cc12147a93e6689dbe4ae8aa575ff4a3f4ca6b7 Author: Pan Li Date: Sat Jun 17 22:11:02 2023 +0800 RISC-V: Bugfix for RVV float reduction in ZVE32/64 The rvv integer reduction has 3 different patterns for zve128+, zve64 and zve32. They take the same iterator with different attributions. However, we need the generated function code_for_reduc (code, mode1, mode2). The implementation of code_for_reduc may look like below. code_for_reduc (code, mode1, mode2) { if (code == max && mode1 == VNx1HF && mode2 == VNx1HF) return CODE_FOR_pred_reduc_maxvnx1hfvnx16hf; // ZVE128+ if (code == max && mode1 == VNx1HF && mode2 == VNx1HF) return CODE_FOR_pred_reduc_maxvnx1hfvnx8hf; // ZVE64 if (code == max && mode1 == VNx1HF && mode2 == VNx1HF) return CODE_FOR_pred_reduc_maxvnx1hfvnx4hf; // ZVE32 } Thus there will be a problem here. For example zve32, we will have code_for_reduc (max, VNx1HF, VNx1HF) which will return the code of the ZVE128+ instead of the ZVE32 logically. This patch will merge the 3 patterns into pattern, and pass both the input_vector and the ret_vector of code_for_reduc. For example, ZVE32 will be code_for_reduc (max, VNx1HF, VNx2HF), then the correct code of ZVE32 will be returned as expectation. Please note both GCC 13 and 14 are impacted by this issue. Signed-off-by: Pan Li Co-Authored by: Juzhe-Zhong gcc/ChangeLog: PR target/110277 * config/riscv/riscv-vector-builtins-bases.cc: Adjust expand for ret_mode. * config/riscv/vector-iterators.md: Add VHF, VSF, VDF, VHF_LMUL1, VSF_LMUL1, VDF_LMUL1, and remove unused attr. * config/riscv/vector.md (@pred_reduc_): Removed. (@pred_reduc_): Ditto. (@pred_reduc_): Ditto. (@pred_reduc_plus): Ditto. (@pred_reduc_plus): Ditto. (@pred_reduc_plus): Ditto. (@pred_reduc_): New pattern. (@pred_reduc_): Ditto. (@pred_reduc_): Ditto. (@pred_reduc_plus): Ditto. (@pred_reduc_plus): Ditto. (@pred_reduc_plus): Ditto. gcc/testsuite/ChangeLog: PR target/110277 * gcc.target/riscv/rvv/base/pr110277-1.c: New test. * gcc.target/riscv/rvv/base/pr110277-1.h: New test. * gcc.target/riscv/rvv/base/pr110277-2.c: New test. * gcc.target/riscv/rvv/base/pr110277-2.h: New test. Diff: --- gcc/config/riscv/riscv-vector-builtins-bases.cc | 5 +- gcc/config/riscv/vector-iterators.md | 128 +++----- gcc/config/riscv/vector.md | 363 +++++++++++++-------- .../gcc.target/riscv/rvv/base/pr110277-1.c | 9 + .../gcc.target/riscv/rvv/base/pr110277-1.h | 33 ++ .../gcc.target/riscv/rvv/base/pr110277-2.c | 11 + .../gcc.target/riscv/rvv/base/pr110277-2.h | 33 ++ 7 files changed, 366 insertions(+), 216 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index b11b544291a..27545113996 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -1400,8 +1400,7 @@ public: machine_mode ret_mode = e.ret_mode (); /* TODO: we will use ret_mode after all types of PR110265 are addressed. */ - if ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) - || GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode)) + if (GET_MODE_INNER (mode) != GET_MODE_INNER (ret_mode)) return e.use_exact_insn ( code_for_pred_reduc (CODE, e.vector_mode (), e.vector_mode ())); else @@ -1435,7 +1434,7 @@ public: rtx expand (function_expander &e) const override { return e.use_exact_insn ( - code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.vector_mode ())); + code_for_pred_reduc_plus (UNSPEC, e.vector_mode (), e.ret_mode ())); } }; diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 2ea01a87be5..264ca33c232 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -1032,6 +1032,33 @@ (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") ]) +(define_mode_iterator VHF [ + (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128") + (VNx2HF "TARGET_VECTOR_ELEN_FP_16") + (VNx4HF "TARGET_VECTOR_ELEN_FP_16") + (VNx8HF "TARGET_VECTOR_ELEN_FP_16") + (VNx16HF "TARGET_VECTOR_ELEN_FP_16") + (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VSF [ + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32") + (VNx4SF "TARGET_VECTOR_ELEN_FP_32") + (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VDF [ + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") + (VNx2DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") +]) + (define_mode_iterator VQI_LMUL1 [ (VNx16QI "TARGET_MIN_VLEN >= 128") (VNx8QI "TARGET_MIN_VLEN == 64") @@ -1055,6 +1082,23 @@ (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN == 64") ]) +(define_mode_iterator VHF_LMUL1 [ + (VNx8HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128") + (VNx4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 64") + (VNx2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN == 32") +]) + +(define_mode_iterator VSF_LMUL1 [ + (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 64") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32") +]) + +(define_mode_iterator VDF_LMUL1 [ + (VNx2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN == 64") +]) + (define_mode_attr VLMULX2 [ (VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") (VNx64QI "VNx128QI") (VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") (VNx32HI "VNx64HI") @@ -1418,48 +1462,6 @@ (VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI") ]) -(define_mode_attr VLMUL1 [ - (VNx1QI "VNx16QI") (VNx2QI "VNx16QI") (VNx4QI "VNx16QI") - (VNx8QI "VNx16QI") (VNx16QI "VNx16QI") (VNx32QI "VNx16QI") (VNx64QI "VNx16QI") (VNx128QI "VNx16QI") - (VNx1HI "VNx8HI") (VNx2HI "VNx8HI") (VNx4HI "VNx8HI") - (VNx8HI "VNx8HI") (VNx16HI "VNx8HI") (VNx32HI "VNx8HI") (VNx64HI "VNx8HI") - (VNx1SI "VNx4SI") (VNx2SI "VNx4SI") (VNx4SI "VNx4SI") - (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI") - (VNx1DI "VNx2DI") (VNx2DI "VNx2DI") - (VNx4DI "VNx2DI") (VNx8DI "VNx2DI") (VNx16DI "VNx2DI") - (VNx1HF "VNx8HF") (VNx2HF "VNx8HF") (VNx4HF "VNx8HF") (VNx8HF "VNx8HF") (VNx16HF "VNx8HF") (VNx32HF "VNx8HF") (VNx64HF "VNx8HF") - (VNx1SF "VNx4SF") (VNx2SF "VNx4SF") - (VNx4SF "VNx4SF") (VNx8SF "VNx4SF") (VNx16SF "VNx4SF") (VNx32SF "VNx4SF") - (VNx1DF "VNx2DF") (VNx2DF "VNx2DF") - (VNx4DF "VNx2DF") (VNx8DF "VNx2DF") (VNx16DF "VNx2DF") -]) - -(define_mode_attr VLMUL1_ZVE64 [ - (VNx1QI "VNx8QI") (VNx2QI "VNx8QI") (VNx4QI "VNx8QI") - (VNx8QI "VNx8QI") (VNx16QI "VNx8QI") (VNx32QI "VNx8QI") (VNx64QI "VNx8QI") - (VNx1HI "VNx4HI") (VNx2HI "VNx4HI") (VNx4HI "VNx4HI") - (VNx8HI "VNx4HI") (VNx16HI "VNx4HI") (VNx32HI "VNx4HI") - (VNx1SI "VNx2SI") (VNx2SI "VNx2SI") (VNx4SI "VNx2SI") - (VNx8SI "VNx2SI") (VNx16SI "VNx2SI") - (VNx1DI "VNx1DI") (VNx2DI "VNx1DI") - (VNx4DI "VNx1DI") (VNx8DI "VNx1DI") - (VNx1SF "VNx2SF") (VNx2SF "VNx2SF") - (VNx4SF "VNx2SF") (VNx8SF "VNx2SF") (VNx16SF "VNx2SF") - (VNx1DF "VNx1DF") (VNx2DF "VNx1DF") - (VNx4DF "VNx1DF") (VNx8DF "VNx1DF") -]) - -(define_mode_attr VLMUL1_ZVE32 [ - (VNx1QI "VNx4QI") (VNx2QI "VNx4QI") (VNx4QI "VNx4QI") - (VNx8QI "VNx4QI") (VNx16QI "VNx4QI") (VNx32QI "VNx4QI") - (VNx1HI "VNx2HI") (VNx2HI "VNx2HI") (VNx4HI "VNx2HI") - (VNx8HI "VNx2HI") (VNx16HI "VNx2HI") - (VNx1SI "VNx1SI") (VNx2SI "VNx1SI") (VNx4SI "VNx1SI") - (VNx8SI "VNx1SI") - (VNx1SF "VNx2SF") (VNx2SF "VNx2SF") - (VNx4SF "VNx2SF") (VNx8SF "VNx2SF") -]) - (define_mode_attr VWLMUL1 [ (VNx1QI "VNx8HI") (VNx2QI "VNx8HI") (VNx4QI "VNx8HI") (VNx8QI "VNx8HI") (VNx16QI "VNx8HI") (VNx32QI "VNx8HI") (VNx64QI "VNx8HI") (VNx128QI "VNx8HI") @@ -1491,48 +1493,6 @@ (VNx8HI "VNx1SI") (VNx16HI "VNx1SI") ]) -(define_mode_attr vlmul1 [ - (VNx1QI "vnx16qi") (VNx2QI "vnx16qi") (VNx4QI "vnx16qi") - (VNx8QI "vnx16qi") (VNx16QI "vnx16qi") (VNx32QI "vnx16qi") (VNx64QI "vnx16qi") (VNx128QI "vnx16qi") - (VNx1HI "vnx8hi") (VNx2HI "vnx8hi") (VNx4HI "vnx8hi") - (VNx8HI "vnx8hi") (VNx16HI "vnx8hi") (VNx32HI "vnx8hi") (VNx64HI "vnx8hi") - (VNx1SI "vnx4si") (VNx2SI "vnx4si") (VNx4SI "vnx4si") - (VNx8SI "vnx4si") (VNx16SI "vnx4si") (VNx32SI "vnx4si") - (VNx1DI "vnx2di") (VNx2DI "vnx2di") - (VNx4DI "vnx2di") (VNx8DI "vnx2di") (VNx16DI "vnx2di") - (VNx1HF "vnx8hf") (VNx2HF "vnx8hf") (VNx4HF "vnx8hf") (VNx8HF "vnx8hf") (VNx16HF "vnx8hf") (VNx32HF "vnx8hf") (VNx64HF "vnx8hf") - (VNx1SF "vnx4sf") (VNx2SF "vnx4sf") - (VNx4SF "vnx4sf") (VNx8SF "vnx4sf") (VNx16SF "vnx4sf") (VNx32SF "vnx4sf") - (VNx1DF "vnx2df") (VNx2DF "vnx2df") - (VNx4DF "vnx2df") (VNx8DF "vnx2df") (VNx16DF "vnx2df") -]) - -(define_mode_attr vlmul1_zve64 [ - (VNx1QI "vnx8qi") (VNx2QI "vnx8qi") (VNx4QI "vnx8qi") - (VNx8QI "vnx8qi") (VNx16QI "vnx8qi") (VNx32QI "vnx8qi") (VNx64QI "vnx8qi") - (VNx1HI "vnx4hi") (VNx2HI "vnx4hi") (VNx4HI "vnx4hi") - (VNx8HI "vnx4hi") (VNx16HI "vnx4hi") (VNx32HI "vnx4hi") - (VNx1SI "vnx2si") (VNx2SI "vnx2si") (VNx4SI "vnx2si") - (VNx8SI "vnx2si") (VNx16SI "vnx2si") - (VNx1DI "vnx1di") (VNx2DI "vnx1di") - (VNx4DI "vnx1di") (VNx8DI "vnx1di") - (VNx1SF "vnx2sf") (VNx2SF "vnx2sf") - (VNx4SF "vnx2sf") (VNx8SF "vnx2sf") (VNx16SF "vnx2sf") - (VNx1DF "vnx1df") (VNx2DF "vnx1df") - (VNx4DF "vnx1df") (VNx8DF "vnx1df") -]) - -(define_mode_attr vlmul1_zve32 [ - (VNx1QI "vnx4qi") (VNx2QI "vnx4qi") (VNx4QI "vnx4qi") - (VNx8QI "vnx4qi") (VNx16QI "vnx4qi") (VNx32QI "vnx4qi") - (VNx1HI "vnx2hi") (VNx2HI "vnx2hi") (VNx4HI "vnx2hi") - (VNx8HI "vnx2hi") (VNx16HI "vnx2hi") - (VNx1SI "vnx1si") (VNx2SI "vnx1si") (VNx4SI "vnx1si") - (VNx8SI "vnx1si") - (VNx1SF "vnx1sf") (VNx2SF "vnx1sf") - (VNx4SF "vnx1sf") (VNx8SF "vnx1sf") -]) - (define_mode_attr vwlmul1 [ (VNx1QI "vnx8hi") (VNx2QI "vnx8hi") (VNx4QI "vnx8hi") (VNx8QI "vnx8hi") (VNx16QI "vnx8hi") (VNx32QI "vnx8hi") (VNx64QI "vnx8hi") (VNx128QI "vnx8hi") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index d396e278503..efce992a012 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7462,152 +7462,257 @@ [(set_attr "type" "viwred") (set_attr "mode" "")]) -(define_insn "@pred_reduc_" - [(set (match_operand: 0 "register_operand" "=vr, vr") - (unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") +;; Float Reduction for HF +(define_insn "@pred_reduc_" + [ + (set + (match_operand:VHF_LMUL1 0 "register_operand" "=vr, vr") + (unspec:VHF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (any_freduc:VF - (vec_duplicate:VF - (vec_select: - (match_operand: 4 "register_operand" " vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF 3 "register_operand" " vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" + ] UNSPEC_VPREDICATE + ) + (any_reduc:VHF + (vec_duplicate:VHF + (vec_select: + (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VHF 3 "register_operand" " vr, vr") + ) + (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ) + ] + "TARGET_VECTOR" "vfred.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfredu") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfredu") + (set_attr "mode" "") + ] +) -(define_insn "@pred_reduc_" - [(set (match_operand: 0 "register_operand" "=vr, vr") - (unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") +;; Float Reduction for SF +(define_insn "@pred_reduc_" + [ + (set + (match_operand:VSF_LMUL1 0 "register_operand" "=vr, vr") + (unspec:VSF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (any_freduc:VF_ZVE64 - (vec_duplicate:VF_ZVE64 - (vec_select: - (match_operand: 4 "register_operand" " vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF_ZVE64 3 "register_operand" " vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN == 64" + ] UNSPEC_VPREDICATE + ) + (any_reduc:VSF + (vec_duplicate:VSF + (vec_select: + (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VSF 3 "register_operand" " vr, vr") + ) + (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ) + ] + "TARGET_VECTOR" "vfred.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfredu") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfredu") + (set_attr "mode" "") + ] +) -(define_insn "@pred_reduc_" - [(set (match_operand: 0 "register_operand" "=vd, vd, vr, vr") - (unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i") +;; Float Reduction for DF +(define_insn "@pred_reduc_" + [ + (set + (match_operand:VDF_LMUL1 0 "register_operand" "=vr, vr") + (unspec:VDF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (any_freduc:VF_ZVE32 - (vec_duplicate:VF_ZVE32 - (vec_select: - (match_operand: 4 "register_operand" " vr, vr, vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN == 32" + ] UNSPEC_VPREDICATE + ) + (any_reduc:VDF + (vec_duplicate:VDF + (vec_select: + (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VDF 3 "register_operand" " vr, vr") + ) + (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ) + ] + "TARGET_VECTOR" "vfred.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfredu") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfredu") + (set_attr "mode" "") + ] +) -(define_insn "@pred_reduc_plus" - [(set (match_operand: 0 "register_operand" "=vr, vr") - (unspec: - [(unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (plus:VF - (vec_duplicate:VF - (vec_select: - (match_operand: 4 "register_operand" " vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF 3 "register_operand" " vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))] - "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" +;; Float Ordered Reduction Sum for HF +(define_insn "@pred_reduc_plus" + [ + (set + (match_operand:VHF_LMUL1 0 "register_operand" "=vr,vr") + (unspec:VHF_LMUL1 + [ + (unspec:VHF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM) + ] UNSPEC_VPREDICATE + ) + (plus:VHF + (vec_duplicate:VHF + (vec_select: + (match_operand:VHF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VHF 3 "register_operand" " vr, vr") + ) + (match_operand:VHF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ] ORDER + ) + ) + ] + "TARGET_VECTOR" "vfredsum.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfred") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfred") + (set_attr "mode" "") + ] +) -(define_insn "@pred_reduc_plus" - [(set (match_operand: 0 "register_operand" "=vr, vr") - (unspec: - [(unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (plus:VF_ZVE64 - (vec_duplicate:VF_ZVE64 - (vec_select: - (match_operand: 4 "register_operand" " vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF_ZVE64 3 "register_operand" " vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))] - "TARGET_VECTOR && TARGET_MIN_VLEN == 64" +;; Float Ordered Reduction Sum for SF +(define_insn "@pred_reduc_plus" + [ + (set + (match_operand:VSF_LMUL1 0 "register_operand" "=vr,vr") + (unspec:VSF_LMUL1 + [ + (unspec:VSF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM) + ] UNSPEC_VPREDICATE + ) + (plus:VSF + (vec_duplicate:VSF + (vec_select: + (match_operand:VSF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VSF 3 "register_operand" " vr, vr") + ) + (match_operand:VSF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ] ORDER + ) + ) + ] + "TARGET_VECTOR" "vfredsum.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfred") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfred") + (set_attr "mode" "") + ] +) -(define_insn "@pred_reduc_plus" - [(set (match_operand: 0 "register_operand" "=vd, vd, vr, vr") - (unspec: - [(unspec: - [(unspec: - [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM) - (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (plus:VF_ZVE32 - (vec_duplicate:VF_ZVE32 - (vec_select: - (match_operand: 4 "register_operand" " vr, vr, vr, vr") - (parallel [(const_int 0)]))) - (match_operand:VF_ZVE32 3 "register_operand" " vr, vr, vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))] - "TARGET_VECTOR && TARGET_MIN_VLEN == 32" +;; Float Ordered Reduction Sum for DF +(define_insn "@pred_reduc_plus" + [ + (set + (match_operand:VDF_LMUL1 0 "register_operand" "=vr,vr") + (unspec:VDF_LMUL1 + [ + (unspec:VDF_LMUL1 + [ + (unspec: + [ + (match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI FRM_REGNUM) + ] UNSPEC_VPREDICATE + ) + (plus:VDF + (vec_duplicate:VDF + (vec_select: + (match_operand:VDF_LMUL1 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]) + ) + ) + (match_operand:VDF 3 "register_operand" " vr, vr") + ) + (match_operand:VDF_LMUL1 2 "vector_merge_operand" " vu, 0") + ] UNSPEC_REDUC + ) + ] ORDER + ) + ) + ] + "TARGET_VECTOR" "vfredsum.vs\t%0,%3,%4%p1" - [(set_attr "type" "vfred") - (set_attr "mode" "")]) + [ + (set_attr "type" "vfred") + (set_attr "mode" "") + ] +) (define_insn "@pred_widen_reduc_plus" [(set (match_operand: 0 "register_operand" "=&vr, &vr") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c new file mode 100644 index 00000000000..24a4ba3b45f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zve32f_zvfh -mabi=ilp32f -O3 -Wno-psabi" } */ + +#include "pr110277-1.h" + +/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h new file mode 100644 index 00000000000..67c296c2213 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-1.h @@ -0,0 +1,33 @@ +#include "riscv_vector.h" + +vfloat16m1_t test_vfredmax_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredmax_vs_f16mf2_f16m1(vector, scalar, vl); +} + +vfloat32m1_t test_vfredmax_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) { + return __riscv_vfredmax_vs_f32m8_f32m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredmin_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredmin_vs_f16mf2_f16m1(vector, scalar, vl); +} + +vfloat32m1_t test_vfredmin_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) { + return __riscv_vfredmin_vs_f32m8_f32m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredosum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredosum_vs_f16mf2_f16m1(vector, scalar, vl); +} + +vfloat32m1_t test_vfredosum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) { + return __riscv_vfredosum_vs_f32m8_f32m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredusum_vs_f16mf2_f16m1(vfloat16mf2_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredusum_vs_f16mf2_f16m1(vector, scalar, vl); +} + +vfloat32m1_t test_vfredusum_vs_f32m8_f32m1(vfloat32m8_t vector, vfloat32m1_t scalar, size_t vl) { + return __riscv_vfredusum_vs_f32m8_f32m1(vector, scalar, vl); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c new file mode 100644 index 00000000000..23d7361488a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zve64d_zvfh -mabi=ilp32d -O3 -Wno-psabi" } */ + +#include "pr110277-1.h" +#include "pr110277-2.h" + +/* { dg-final { scan-assembler-times {vfredmax\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vfredmin\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vfredusum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h new file mode 100644 index 00000000000..7e5c81aa213 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110277-2.h @@ -0,0 +1,33 @@ +#include "riscv_vector.h" + +vfloat16m1_t test_vfredmax_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredmax_vs_f16mf4_f16m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredmin_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredmin_vs_f16mf4_f16m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredosum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredosum_vs_f16mf4_f16m1(vector, scalar, vl); +} + +vfloat16m1_t test_vfredusum_vs_f16mf4_f16m1(vfloat16mf4_t vector, vfloat16m1_t scalar, size_t vl) { + return __riscv_vfredusum_vs_f16mf4_f16m1(vector, scalar, vl); +} + +vfloat64m1_t test_vfredmax_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) { + return __riscv_vfredmax_vs_f64m8_f64m1(vector, scalar, vl); +} + +vfloat64m1_t test_vfredmin_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) { + return __riscv_vfredmin_vs_f64m8_f64m1(vector, scalar, vl); +} + +vfloat64m1_t test_vfredosum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) { + return __riscv_vfredosum_vs_f64m8_f64m1(vector, scalar, vl); +} + +vfloat64m1_t test_vfredusum_vs_f64m8_f64m1(vfloat64m8_t vector, vfloat64m1_t scalar, size_t vl) { + return __riscv_vfredusum_vs_f64m8_f64m1(vector, scalar, vl); +}