UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P, UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P, UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P, Are they still necessary ? juzhe.zhong@rivai.ai From: pan2.li Date: 2023-10-23 09:26 To: gcc-patches CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng Subject: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math From: Pan Li For math function autovec, there will be one step like rtx tmp = gen_reg_rtx (vec_int_mode); emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode); The MU will leave the tmp (aka dest register) register unmasked elements unchanged and it is undefined here. This patch would like to adjust the MU to MA. gcc/ChangeLog: * config/riscv/riscv-protos.h (enum insn_type): Add new type values. * config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge operand handling. (expand_vec_ceil): Take MA instead of MU for tmp register. (expand_vec_floor): Ditto. (expand_vec_nearbyint): Ditto. (expand_vec_rint): Ditto. (expand_vec_round): Ditto. (expand_vec_roundeven): Ditto. Signed-off-by: Pan Li --- gcc/config/riscv/riscv-protos.h | 5 +++++ gcc/config/riscv/riscv-v.cc | 24 ++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f7a9a02f1f9..5dc97c2adc0 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -306,6 +306,11 @@ enum insn_type : unsigned int UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P, UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P, UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P, + UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P, + UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P, + UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P, + UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P, + UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P, UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P, UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P, UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P, diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 383af55fe3a..91ad6a61fa8 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4108,10 +4108,18 @@ static void emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask, insn_type type, machine_mode vec_mode) { - rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src}; insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode); - emit_vlmax_insn (icode, type, cvt_x_ops); + if (type & USE_VUNDEF_MERGE_P) + { + rtx cvt_x_ops[] = {op_dest, mask, op_src}; + emit_vlmax_insn (icode, type, cvt_x_ops); + } + else + { + rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src}; + emit_vlmax_insn (icode, type, cvt_x_ops); + } } static void @@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-3: Convert to integer on mask, with rounding up (aka ceil). */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode); /* Step-4: Convert to floating-point on mask for the final result. To avoid unnecessary frm register access, we use RUP here and it will @@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-3: Convert to integer on mask, with rounding down (aka floor). */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode); /* Step-4: Convert to floating-point on mask for the floor result. */ emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode); @@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint). */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode); /* Step-5: Convert to floating-point on mask for the nearbyint result. */ emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode); @@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-3: Convert to integer on mask, with dyn rounding (aka rint). */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode); /* Step-4: Convert to floating-point on mask for the rint result. */ emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode); @@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-3: Convert to integer on mask, rounding to nearest (aka round). */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode); /* Step-4: Convert to floating-point on mask for the round result. */ emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode); @@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode, /* Step-3: Convert to integer on mask, rounding to nearest, ties to even. */ rtx tmp = gen_reg_rtx (vec_int_mode); - emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode); + emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode); /* Step-4: Convert to floating-point on mask for the rint result. */ emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode); -- 2.34.1