From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2093) id 8651D385B527; Fri, 28 Apr 2023 12:37:37 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8651D385B527 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1682685457; bh=/l7HTOUhIdbc9/Gwzq/XqJ7HQEDiZU1/ovWtqN3SALY=; h=From:To:Subject:Date:From; b=oirRTK8Zzgz++gE3B9jiaeBzeKRmapDBnJt/Nmpyu/8UclANKYdMIcnAH1TXJOGch 7CbyJc2mIaamUEtshOI7yjxNJqFItV4fs7+1zeAVrHVaoBnUNDd4YphPqLqtG65ufL QFuMq2zSrvBv8PuCVAFgmk7TAheTYii94t8K/2TQ= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kito Cheng To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-331] RISC-V: Allow RVV VMS{Compare}(V1, V1) simplify to VMCLR X-Act-Checkin: gcc X-Git-Author: Pan Li X-Git-Refname: refs/heads/master X-Git-Oldrev: d711f8f81fc5f3a5a0420337f414bce93e1cad1e X-Git-Newrev: 8b84d87969ef2443516a79a80c22d2b6dba04630 Message-Id: <20230428123737.8651D385B527@sourceware.org> Date: Fri, 28 Apr 2023 12:37:37 +0000 (GMT) List-Id: https://gcc.gnu.org/g:8b84d87969ef2443516a79a80c22d2b6dba04630 commit r14-331-g8b84d87969ef2443516a79a80c22d2b6dba04630 Author: Pan Li Date: Fri Apr 28 10:46:41 2023 +0800 RISC-V: Allow RVV VMS{Compare}(V1, V1) simplify to VMCLR When some RVV integer compare operators act on the same vector registers without mask. They can be simplified to VMCLR. This PATCH allow the ne, lt, ltu, gt, gtu to perform such kind of the simplification by adding one new define_split. Given we have: vbool1_t test_shortcut_for_riscv_vmslt_case_0(vint8m8_t v1, size_t vl) { return __riscv_vmslt_vv_i8m8_b1(v1, v1, vl); } Before this patch: vsetvli zero,a2,e8,m8,ta,ma vl8re8.v v24,0(a1) vmslt.vv v8,v24,v24 vsetvli a5,zero,e8,m8,ta,ma vsm.v v8,0(a0) ret After this patch: vsetvli zero,a2,e8,mf8,ta,ma vmclr.m v24 <- optimized to vmclr.m vsetvli zero,a5,e8,mf8,ta,ma vsm.v v24,0(a0) ret As above, we may have one instruction eliminated and require less vector registers. gcc/ChangeLog: * config/riscv/vector.md: Add new define split to perform the simplification. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c: New test. Signed-off-by: Pan Li Co-authored-by: kito-cheng Diff: --- gcc/config/riscv/vector.md | 32 +++ .../riscv/rvv/base/integer_compare_insn_shortcut.c | 291 +++++++++++++++++++++ 2 files changed, 323 insertions(+) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index b3d23441679..1642822d098 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -7689,3 +7689,35 @@ "vleff.v\t%0,%3%p1" [(set_attr "type" "vldff") (set_attr "mode" "")]) + +;; ----------------------------------------------------------------------------- +;; ---- Integer Compare Instructions Simplification +;; ----------------------------------------------------------------------------- +;; Simplify to VMCLR.m Includes: +;; - 1. VMSNE +;; - 2. VMSLT +;; - 3. VMSLTU +;; - 4. VMSGT +;; - 5. VMSGTU +;; ----------------------------------------------------------------------------- +(define_split + [(set (match_operand:VB 0 "register_operand") + (if_then_else:VB + (unspec:VB + [(match_operand:VB 1 "vector_all_trues_mask_operand") + (match_operand 4 "vector_length_operand") + (match_operand 5 "const_int_operand") + (match_operand 6 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VB 3 "vector_move_operand") + (match_operand:VB 2 "vector_undef_operand")))] + "TARGET_VECTOR" + [(const_int 0)] + { + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mode), + RVV_VUNDEF (mode), operands[3], + operands[4], operands[5])); + DONE; + } +) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c b/gcc/testsuite/gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c new file mode 100644 index 00000000000..8954adad09d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/integer_compare_insn_shortcut.c @@ -0,0 +1,291 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */ + +#include "riscv_vector.h" + +vbool1_t test_shortcut_for_riscv_vmseq_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmseq_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmseq_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmseq_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmseq_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmseq_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmseq_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmseq_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmseq_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmseq_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmseq_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmseq_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmseq_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmseq_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsne_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmsne_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsne_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmsne_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsne_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmsne_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsne_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmsne_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsne_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmsne_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsne_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmsne_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsne_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmsne_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmslt_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmslt_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmslt_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmslt_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmslt_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmslt_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmslt_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmslt_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmslt_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmslt_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmslt_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmslt_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmslt_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmslt_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsltu_case_0(vuint8m8_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsltu_case_1(vuint8m4_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsltu_case_2(vuint8m2_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsltu_case_3(vuint8m1_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsltu_case_4(vuint8mf2_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsltu_case_5(vuint8mf4_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsltu_case_6(vuint8mf8_t v1, size_t vl) { + return __riscv_vmsltu_vv_u8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsle_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmsle_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsle_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmsle_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsle_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmsle_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsle_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmsle_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsle_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmsle_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsle_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmsle_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsle_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmsle_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsleu_case_0(vuint8m8_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsleu_case_1(vuint8m4_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsleu_case_2(vuint8m2_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsleu_case_3(vuint8m1_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsleu_case_4(vuint8mf2_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsleu_case_5(vuint8mf4_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsleu_case_6(vuint8mf8_t v1, size_t vl) { + return __riscv_vmsleu_vv_u8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsgt_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsgt_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsgt_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsgt_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsgt_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsgt_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsgt_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmsgt_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsgtu_case_0(vuint8m8_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsgtu_case_1(vuint8m4_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsgtu_case_2(vuint8m2_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsgtu_case_3(vuint8m1_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsgtu_case_4(vuint8mf2_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsgtu_case_5(vuint8mf4_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsgtu_case_6(vuint8mf8_t v1, size_t vl) { + return __riscv_vmsgtu_vv_u8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsge_case_0(vint8m8_t v1, size_t vl) { + return __riscv_vmsge_vv_i8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsge_case_1(vint8m4_t v1, size_t vl) { + return __riscv_vmsge_vv_i8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsge_case_2(vint8m2_t v1, size_t vl) { + return __riscv_vmsge_vv_i8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsge_case_3(vint8m1_t v1, size_t vl) { + return __riscv_vmsge_vv_i8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsge_case_4(vint8mf2_t v1, size_t vl) { + return __riscv_vmsge_vv_i8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsge_case_5(vint8mf4_t v1, size_t vl) { + return __riscv_vmsge_vv_i8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsge_case_6(vint8mf8_t v1, size_t vl) { + return __riscv_vmsge_vv_i8mf8_b64(v1, v1, vl); +} + +vbool1_t test_shortcut_for_riscv_vmsgeu_case_0(vuint8m8_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8m8_b1(v1, v1, vl); +} + +vbool2_t test_shortcut_for_riscv_vmsgeu_case_1(vuint8m4_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8m4_b2(v1, v1, vl); +} + +vbool4_t test_shortcut_for_riscv_vmsgeu_case_2(vuint8m2_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8m2_b4(v1, v1, vl); +} + +vbool8_t test_shortcut_for_riscv_vmsgeu_case_3(vuint8m1_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8m1_b8(v1, v1, vl); +} + +vbool16_t test_shortcut_for_riscv_vmsgeu_case_4(vuint8mf2_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8mf2_b16(v1, v1, vl); +} + +vbool32_t test_shortcut_for_riscv_vmsgeu_case_5(vuint8mf4_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8mf4_b32(v1, v1, vl); +} + +vbool64_t test_shortcut_for_riscv_vmsgeu_case_6(vuint8mf8_t v1, size_t vl) { + return __riscv_vmsgeu_vv_u8mf8_b64(v1, v1, vl); +} + +/* { dg-final { scan-assembler-times {vmseq\.vv\sv[0-9],\s*v[0-9],\s*v[0-9]} 7 } } */ +/* { dg-final { scan-assembler-times {vmsle\.vv\sv[0-9],\s*v[0-9],\s*v[0-9]} 7 } } */ +/* { dg-final { scan-assembler-times {vmsleu\.vv\sv[0-9],\s*v[0-9],\s*v[0-9]} 7 } } */ +/* { dg-final { scan-assembler-times {vmsge\.vv\sv[0-9],\s*v[0-9],\s*v[0-9]} 7 } } */ +/* { dg-final { scan-assembler-times {vmsgeu\.vv\sv[0-9],\s*v[0-9],\s*v[0-9]} 7 } } */ +/* { dg-final { scan-assembler-times {vmclr\.m\sv[0-9]} 35 } } */