public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
@ 2023-10-23  1:26 pan2.li
  2023-10-23  1:35 ` juzhe.zhong
  0 siblings, 1 reply; 5+ messages in thread
From: pan2.li @ 2023-10-23  1:26 UTC (permalink / raw)
  To: gcc-patches; +Cc: juzhe.zhong, pan2.li, yanzhang.wang, kito.cheng

From: Pan Li <pan2.li@intel.com>

For math function autovec, there will be one step like

rtx tmp = gen_reg_rtx (vec_int_mode);
emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);

The MU will leave the tmp (aka dest register) register unmasked elements
unchanged and it is undefined here. This patch would like to adjust the
MU to MA.

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (enum insn_type): Add new type
	values.
	* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge
	operand handling.
	(expand_vec_ceil): Take MA instead of MU for tmp register.
	(expand_vec_floor): Ditto.
	(expand_vec_nearbyint): Ditto.
	(expand_vec_rint): Ditto.
	(expand_vec_round): Ditto.
	(expand_vec_roundeven): Ditto.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/config/riscv/riscv-protos.h |  5 +++++
 gcc/config/riscv/riscv-v.cc     | 24 ++++++++++++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f7a9a02f1f9..5dc97c2adc0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -306,6 +306,11 @@ enum insn_type : unsigned int
   UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P,
   UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P,
+  UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P,
+  UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P,
+  UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P,
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 383af55fe3a..91ad6a61fa8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4108,10 +4108,18 @@ static void
 emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
 		  insn_type type, machine_mode vec_mode)
 {
-  rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
   insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
 
-  emit_vlmax_insn (icode, type, cvt_x_ops);
+  if (type & USE_VUNDEF_MERGE_P)
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
+  else
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
 }
 
 static void
@@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode);
 
   /* Step-4: Convert to floating-point on mask for the final result.
      To avoid unnecessary frm register access, we use RUP here and it will
@@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode);
 
   /* Step-4: Convert to floating-point on mask for the floor result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
@@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
 
   /* Step-5: Convert to floating-point on mask for the nearbyint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-3: Convert to integer on mask, with dyn rounding (aka rint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
 
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-3: Convert to integer on mask, rounding to nearest (aka round).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode);
 
   /* Step-4: Convert to floating-point on mask for the round result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
@@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 
   /* Step-3: Convert to integer on mask, rounding to nearest, ties to even.  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode);
 
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
-- 
2.34.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
  2023-10-23  1:26 [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math pan2.li
@ 2023-10-23  1:35 ` juzhe.zhong
  2023-10-23  1:42   ` Li, Pan2
  0 siblings, 1 reply; 5+ messages in thread
From: juzhe.zhong @ 2023-10-23  1:35 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: pan2.li, yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 6082 bytes --]

UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,

Are they still necessary ?


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-10-23 09:26
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
From: Pan Li <pan2.li@intel.com>
 
For math function autovec, there will be one step like
 
rtx tmp = gen_reg_rtx (vec_int_mode);
emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
 
The MU will leave the tmp (aka dest register) register unmasked elements
unchanged and it is undefined here. This patch would like to adjust the
MU to MA.
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (enum insn_type): Add new type
values.
* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge
operand handling.
(expand_vec_ceil): Take MA instead of MU for tmp register.
(expand_vec_floor): Ditto.
(expand_vec_nearbyint): Ditto.
(expand_vec_rint): Ditto.
(expand_vec_round): Ditto.
(expand_vec_roundeven): Ditto.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-protos.h |  5 +++++
gcc/config/riscv/riscv-v.cc     | 24 ++++++++++++++++--------
2 files changed, 21 insertions(+), 8 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f7a9a02f1f9..5dc97c2adc0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -306,6 +306,11 @@ enum insn_type : unsigned int
   UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P,
   UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P,
+  UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P,
+  UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P,
+  UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P,
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 383af55fe3a..91ad6a61fa8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4108,10 +4108,18 @@ static void
emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
  insn_type type, machine_mode vec_mode)
{
-  rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
   insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
-  emit_vlmax_insn (icode, type, cvt_x_ops);
+  if (type & USE_VUNDEF_MERGE_P)
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
+  else
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
}
static void
@@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the final result.
      To avoid unnecessary frm register access, we use RUP here and it will
@@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the floor result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
@@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-5: Convert to floating-point on mask for the nearbyint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with dyn rounding (aka rint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest (aka round).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the round result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
@@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest, ties to even.  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
  2023-10-23  1:35 ` juzhe.zhong
@ 2023-10-23  1:42   ` Li, Pan2
  2023-10-23  1:44     ` juzhe.zhong
  0 siblings, 1 reply; 5+ messages in thread
From: Li, Pan2 @ 2023-10-23  1:42 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 6813 bytes --]

Yes, it is required by the second cvt. The unmasked elements keep the original values.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Monday, October 23, 2023 9:35 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math

UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,

Are they still necessary ?
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-10-23 09:26
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

For math function autovec, there will be one step like

rtx tmp = gen_reg_rtx (vec_int_mode);
emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);

The MU will leave the tmp (aka dest register) register unmasked elements
unchanged and it is undefined here. This patch would like to adjust the
MU to MA.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (enum insn_type): Add new type
values.
* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge
operand handling.
(expand_vec_ceil): Take MA instead of MU for tmp register.
(expand_vec_floor): Ditto.
(expand_vec_nearbyint): Ditto.
(expand_vec_rint): Ditto.
(expand_vec_round): Ditto.
(expand_vec_roundeven): Ditto.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-protos.h |  5 +++++
gcc/config/riscv/riscv-v.cc     | 24 ++++++++++++++++--------
2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f7a9a02f1f9..5dc97c2adc0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -306,6 +306,11 @@ enum insn_type : unsigned int
   UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P,
   UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P,
+  UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P,
+  UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P,
+  UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P,
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 383af55fe3a..91ad6a61fa8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4108,10 +4108,18 @@ static void
emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
  insn_type type, machine_mode vec_mode)
{
-  rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
   insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
-  emit_vlmax_insn (icode, type, cvt_x_ops);
+  if (type & USE_VUNDEF_MERGE_P)
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
+  else
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
}
static void
@@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the final result.
      To avoid unnecessary frm register access, we use RUP here and it will
@@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the floor result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
@@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-5: Convert to floating-point on mask for the nearbyint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with dyn rounding (aka rint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest (aka round).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the round result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
@@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest, ties to even.  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
--
2.34.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
  2023-10-23  1:42   ` Li, Pan2
@ 2023-10-23  1:44     ` juzhe.zhong
  2023-10-23  1:57       ` Li, Pan2
  0 siblings, 1 reply; 5+ messages in thread
From: juzhe.zhong @ 2023-10-23  1:44 UTC (permalink / raw)
  To: pan2.li, gcc-patches; +Cc: yanzhang.wang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 6782 bytes --]

OK。 LGTM。



juzhe.zhong@rivai.ai
 
From: Li, Pan2
Date: 2023-10-23 09:42
To: juzhe.zhong@rivai.ai; gcc-patches
CC: Wang, Yanzhang; kito.cheng
Subject: RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
Yes, it is required by the second cvt. The unmasked elements keep the original values.
 
Pan
 
From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> 
Sent: Monday, October 23, 2023 9:35 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
 
UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
 
Are they still necessary ?


juzhe.zhong@rivai.ai
 
From: pan2.li
Date: 2023-10-23 09:26
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
From: Pan Li <pan2.li@intel.com>
 
For math function autovec, there will be one step like
 
rtx tmp = gen_reg_rtx (vec_int_mode);
emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
 
The MU will leave the tmp (aka dest register) register unmasked elements
unchanged and it is undefined here. This patch would like to adjust the
MU to MA.
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (enum insn_type): Add new type
values.
* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge
operand handling.
(expand_vec_ceil): Take MA instead of MU for tmp register.
(expand_vec_floor): Ditto.
(expand_vec_nearbyint): Ditto.
(expand_vec_rint): Ditto.
(expand_vec_round): Ditto.
(expand_vec_roundeven): Ditto.
 
Signed-off-by: Pan Li <pan2.li@intel.com>
---
gcc/config/riscv/riscv-protos.h |  5 +++++
gcc/config/riscv/riscv-v.cc     | 24 ++++++++++++++++--------
2 files changed, 21 insertions(+), 8 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f7a9a02f1f9..5dc97c2adc0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -306,6 +306,11 @@ enum insn_type : unsigned int
   UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P,
   UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P,
+  UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P,
+  UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P,
+  UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P,
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 383af55fe3a..91ad6a61fa8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4108,10 +4108,18 @@ static void
emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
  insn_type type, machine_mode vec_mode)
{
-  rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
   insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
-  emit_vlmax_insn (icode, type, cvt_x_ops);
+  if (type & USE_VUNDEF_MERGE_P)
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
+  else
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
}
static void
@@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the final result.
      To avoid unnecessary frm register access, we use RUP here and it will
@@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the floor result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
@@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-5: Convert to floating-point on mask for the nearbyint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with dyn rounding (aka rint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest (aka round).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the round result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
@@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest, ties to even.  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
-- 
2.34.1
 
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
  2023-10-23  1:44     ` juzhe.zhong
@ 2023-10-23  1:57       ` Li, Pan2
  0 siblings, 0 replies; 5+ messages in thread
From: Li, Pan2 @ 2023-10-23  1:57 UTC (permalink / raw)
  To: juzhe.zhong, gcc-patches; +Cc: Wang, Yanzhang, kito.cheng

[-- Attachment #1: Type: text/plain, Size: 7841 bytes --]

Committed, thanks Juzhe.

Pan

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: Monday, October 23, 2023 9:44 AM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Wang, Yanzhang <yanzhang.wang@intel.com>; kito.cheng <kito.cheng@gmail.com>
Subject: Re: RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math

OK。 LGTM。

________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: Li, Pan2<mailto:pan2.li@intel.com>
Date: 2023-10-23 09:42
To: juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>; gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: Wang, Yanzhang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: RE: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
Yes, it is required by the second cvt. The unmasked elements keep the original values.

Pan

From: juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai> <juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>>
Sent: Monday, October 23, 2023 9:35 AM
To: Li, Pan2 <pan2.li@intel.com<mailto:pan2.li@intel.com>>; gcc-patches <gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>
Cc: Li, Pan2 <pan2.li@intel.com<mailto:pan2.li@intel.com>>; Wang, Yanzhang <yanzhang.wang@intel.com<mailto:yanzhang.wang@intel.com>>; kito.cheng <kito.cheng@gmail.com<mailto:kito.cheng@gmail.com>>
Subject: Re: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math

UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,

Are they still necessary ?
________________________________
juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>

From: pan2.li<mailto:pan2.li@intel.com>
Date: 2023-10-23 09:26
To: gcc-patches<mailto:gcc-patches@gcc.gnu.org>
CC: juzhe.zhong<mailto:juzhe.zhong@rivai.ai>; pan2.li<mailto:pan2.li@intel.com>; yanzhang.wang<mailto:yanzhang.wang@intel.com>; kito.cheng<mailto:kito.cheng@gmail.com>
Subject: [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math
From: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>

For math function autovec, there will be one step like

rtx tmp = gen_reg_rtx (vec_int_mode);
emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);

The MU will leave the tmp (aka dest register) register unmasked elements
unchanged and it is undefined here. This patch would like to adjust the
MU to MA.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (enum insn_type): Add new type
values.
* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): Add undef merge
operand handling.
(expand_vec_ceil): Take MA instead of MU for tmp register.
(expand_vec_floor): Ditto.
(expand_vec_nearbyint): Ditto.
(expand_vec_rint): Ditto.
(expand_vec_round): Ditto.
(expand_vec_roundeven): Ditto.

Signed-off-by: Pan Li <pan2.li@intel.com<mailto:pan2.li@intel.com>>
---
gcc/config/riscv/riscv-protos.h |  5 +++++
gcc/config/riscv/riscv-v.cc     | 24 ++++++++++++++++--------
2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f7a9a02f1f9..5dc97c2adc0 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -306,6 +306,11 @@ enum insn_type : unsigned int
   UNARY_OP_FRM_RMM = UNARY_OP | FRM_RMM_P,
   UNARY_OP_FRM_RUP = UNARY_OP | FRM_RUP_P,
   UNARY_OP_FRM_RDN = UNARY_OP | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_DYN = UNARY_OP_TAMA | FRM_DYN_P,
+  UNARY_OP_TAMA_FRM_RUP = UNARY_OP_TAMA | FRM_RUP_P,
+  UNARY_OP_TAMA_FRM_RDN = UNARY_OP_TAMA | FRM_RDN_P,
+  UNARY_OP_TAMA_FRM_RMM = UNARY_OP_TAMA | FRM_RMM_P,
+  UNARY_OP_TAMA_FRM_RNE = UNARY_OP_TAMA | FRM_RNE_P,
   UNARY_OP_TAMU_FRM_DYN = UNARY_OP_TAMU | FRM_DYN_P,
   UNARY_OP_TAMU_FRM_RUP = UNARY_OP_TAMU | FRM_RUP_P,
   UNARY_OP_TAMU_FRM_RDN = UNARY_OP_TAMU | FRM_RDN_P,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 383af55fe3a..91ad6a61fa8 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4108,10 +4108,18 @@ static void
emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
  insn_type type, machine_mode vec_mode)
{
-  rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
   insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
-  emit_vlmax_insn (icode, type, cvt_x_ops);
+  if (type & USE_VUNDEF_MERGE_P)
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
+  else
+    {
+      rtx cvt_x_ops[] = {op_dest, mask, op_dest, op_src};
+      emit_vlmax_insn (icode, type, cvt_x_ops);
+    }
}
static void
@@ -4157,7 +4165,7 @@ expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding up (aka ceil).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RUP, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RUP, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the final result.
      To avoid unnecessary frm register access, we use RUP here and it will
@@ -4182,7 +4190,7 @@ expand_vec_floor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with rounding down (aka floor).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RDN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the floor result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RDN, vec_fp_mode);
@@ -4208,7 +4216,7 @@ expand_vec_nearbyint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-4: Convert to integer on mask, with rounding down (aka nearbyint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-5: Convert to floating-point on mask for the nearbyint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4233,7 +4241,7 @@ expand_vec_rint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, with dyn rounding (aka rint).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_DYN, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_DYN, vec_fp_mode);
@@ -4255,7 +4263,7 @@ expand_vec_round (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest (aka round).  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RMM, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the round result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RMM, vec_fp_mode);
@@ -4299,7 +4307,7 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
   /* Step-3: Convert to integer on mask, rounding to nearest, ties to even.  */
   rtx tmp = gen_reg_rtx (vec_int_mode);
-  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
+  emit_vec_cvt_x_f (tmp, op_1, mask, UNARY_OP_TAMA_FRM_RNE, vec_fp_mode);
   /* Step-4: Convert to floating-point on mask for the rint result.  */
   emit_vec_cvt_f_x (op_0, tmp, mask, UNARY_OP_TAMU_FRM_RNE, vec_fp_mode);
--
2.34.1



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-10-23  1:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-23  1:26 [PATCH v1] RISC-V: Bugfix for merging undefined tmp register in math pan2.li
2023-10-23  1:35 ` juzhe.zhong
2023-10-23  1:42   ` Li, Pan2
2023-10-23  1:44     ` juzhe.zhong
2023-10-23  1:57       ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).