public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFC 1/2] RISC-V: Add support for _Bfloat16.
@ 2023-09-19  8:44 Jin Ma
  2023-09-19  8:46 ` [RFC 2/2] RISC-V: Add 'Zfbfmin' extension Jin Ma
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Jin Ma @ 2023-09-19  8:44 UTC (permalink / raw)
  To: gcc-patches
  Cc: jeffreyalaw, palmer, richard.sandiford, kito.cheng,
	philipp.tomsich, christoph.muellner, rdapp.gcc, juzhe.zhong,
	jinma.contrib, Jin Ma

gcc/ChangeLog:

	* config/riscv/iterators.md (HFBF): New.
	* config/riscv/riscv-builtins.cc (riscv_init_builtin_types):
	Initialize data type_Bfloat16.
	* config/riscv/riscv-modes.def (FLOAT_MODE): New.
	(ADJUST_FLOAT_FORMAT): New.
	* config/riscv/riscv.cc (riscv_mangle_type): Support for BFmode.
	(riscv_scalar_mode_supported_p): Ditto.
	(riscv_libgcc_floating_mode_supported_p): Ditto.
	(riscv_block_arith_comp_libfuncs_for_mode): New.
	(riscv_init_libfuncs): Opening and closing some libfuncs for BFmode.
	* config/riscv/riscv.md (mode" ): Add BF.
	(truncdfbf2): New.
	(movhf): Support for BFmode.
	(mov<mode>): Ditto.
	(*mov<mode>_softfloat):  Ditto.
	(fix_truncbf<GPR:mode>2): New.
	(fixuns_truncbf<GPR:mode>2): New.
	(float<mode>bf2): New.
	(floatuns<mode>bf2): New.

libgcc/ChangeLog:

	* config/riscv/sfp-machine.h (_FP_NANFRAC_B): New.
	(_FP_NANSIGN_B): New.
	* config/riscv/t-softfp32: Add support for BF libfuncs.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/bf16_arithmetic.c: New test.
	* gcc.target/riscv/bf16_call.c: New test.
	* gcc.target/riscv/bf16_comparisons.c: New test.
	* gcc.target/riscv/bf16_convert-1.c: New test.
	* gcc.target/riscv/bf16_convert-2.c: New test.
	* gcc.target/riscv/bf16_convert_run.c: New test.
---
 gcc/config/riscv/iterators.md                 |   2 +
 gcc/config/riscv/riscv-builtins.cc            |  16 ++
 gcc/config/riscv/riscv-modes.def              |   4 +
 gcc/config/riscv/riscv.cc                     |  93 ++++++++--
 gcc/config/riscv/riscv.md                     |  94 ++++++++--
 .../gcc.target/riscv/bf16_arithmetic.c        |  36 ++++
 gcc/testsuite/gcc.target/riscv/bf16_call.c    |  17 ++
 .../gcc.target/riscv/bf16_comparisons.c       |  25 +++
 .../gcc.target/riscv/bf16_convert-1.c         |  39 +++++
 .../gcc.target/riscv/bf16_convert-2.c         |  38 ++++
 .../gcc.target/riscv/bf16_convert_run.c       | 163 ++++++++++++++++++
 libgcc/config/riscv/sfp-machine.h             |   3 +
 libgcc/config/riscv/t-softfp32                |   7 +-
 13 files changed, 503 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_call.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_comparisons.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_convert-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_convert-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/bf16_convert_run.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index ecf033f2fa7..73523b73fdd 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -84,6 +84,8 @@ (define_mode_iterator SOFTF [SF (DF "TARGET_64BIT") (HF "TARGET_ZFHMIN")])
 ;; instruction.
 (define_mode_attr size [(QI "b") (HI "h")])
 
+(define_mode_iterator HFBF [HF BF])
+
 ;; Mode attributes for loads.
 (define_mode_attr load [(QI "lb") (HI "lh") (SI "lw") (DI "ld") (HF "flh") (SF "flw") (DF "fld")])
 
diff --git a/gcc/config/riscv/riscv-builtins.cc b/gcc/config/riscv/riscv-builtins.cc
index 3fe3a89dcc2..b7bb89794f7 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -192,6 +192,7 @@ static GTY(()) int riscv_builtin_decl_index[NUM_INSN_CODES];
   riscv_builtin_decls[riscv_builtin_decl_index[(CODE)]]
 
 tree riscv_float16_type_node = NULL_TREE;
+tree riscv_bfloat16_type_node = NULL_TREE;
 
 /* Return the function type associated with function prototype TYPE.  */
 
@@ -235,6 +236,21 @@ riscv_init_builtin_types (void)
   if (!maybe_get_identifier ("_Float16"))
     lang_hooks.types.register_builtin_type (riscv_float16_type_node,
 					    "_Float16");
+
+  /* Provide the _Bfloat16 type and bfloat16_type_node if needed.  */
+  if (!bfloat16_type_node)
+    {
+      riscv_bfloat16_type_node = make_node (REAL_TYPE);
+      TYPE_PRECISION (riscv_bfloat16_type_node) = 16;
+      SET_TYPE_MODE (riscv_bfloat16_type_node, BFmode);
+      layout_type (riscv_bfloat16_type_node);
+    }
+  else
+    riscv_bfloat16_type_node = bfloat16_type_node;
+
+  if (!maybe_get_identifier ("_Bfloat16"))
+    lang_hooks.types.register_builtin_type (riscv_bfloat16_type_node,
+					    "_Bfloat16");
 }
 
 /* Implement TARGET_INIT_BUILTINS.  */
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index e3c6ccb2809..723bfaee42d 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -22,6 +22,10 @@ along with GCC; see the file COPYING3.  If not see
 FLOAT_MODE (HF, 2, ieee_half_format);
 FLOAT_MODE (TF, 16, ieee_quad_format);
 
+FLOAT_MODE (BF, 2, 0);
+/* Reuse definition from arm.  */
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+
 /* Vector modes.  */
 
 /* Encode the ratio of SEW/LMUL into the mask types. There are the following
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8c766e2e2be..910523ee2b9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8727,9 +8727,17 @@ riscv_asan_shadow_offset (void)
 static const char *
 riscv_mangle_type (const_tree type)
 {
-  /* Half-precision float, _Float16 is "DF16_".  */
+  /* Half-precision float, _Float16 is "DF16_" and _Bfloat16 is "DF16b".  */
   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
-    return "DF16_";
+    {
+      if (TYPE_MODE (type) == HFmode)
+	return "DF16_";
+
+      if (TYPE_MODE (type) == BFmode)
+	return "DF16b";
+
+      gcc_unreachable ();
+    }
 
   /* Mangle all vector type for vector extension.  */
   /* The mangle name follows the rule of RVV LLVM
@@ -8750,19 +8758,19 @@ riscv_mangle_type (const_tree type)
 static bool
 riscv_scalar_mode_supported_p (scalar_mode mode)
 {
-  if (mode == HFmode)
+  if (mode == HFmode || mode == BFmode)
     return true;
   else
     return default_scalar_mode_supported_p (mode);
 }
 
 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
-   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+   if MODE is HFmode or BFmode, and punt to the generic implementation otherwise.  */
 
 static bool
 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
 {
-  if (mode == HFmode)
+  if (mode == HFmode || mode == BFmode)
     return true;
   else
     return default_libgcc_floating_mode_supported_p (mode);
@@ -8813,27 +8821,74 @@ riscv_floatn_mode (int n, bool extended)
   return default_floatn_mode (n, extended);
 }
 
+/* Record that we have no arithmetic or comparison libfuncs for
+   machine mode MODE.  */
+
+static void
+riscv_block_arith_comp_libfuncs_for_mode (machine_mode mode)
+{
+  /* Arithmetic.  */
+  set_optab_libfunc (add_optab, mode, NULL);
+  set_optab_libfunc (sdiv_optab, mode, NULL);
+  set_optab_libfunc (smul_optab, mode, NULL);
+  set_optab_libfunc (neg_optab, mode, NULL);
+  set_optab_libfunc (sub_optab, mode, NULL);
+
+  /* Comparisons.  */
+  set_optab_libfunc (eq_optab, mode, NULL);
+  set_optab_libfunc (ne_optab, mode, NULL);
+  set_optab_libfunc (lt_optab, mode, NULL);
+  set_optab_libfunc (le_optab, mode, NULL);
+  set_optab_libfunc (ge_optab, mode, NULL);
+  set_optab_libfunc (gt_optab, mode, NULL);
+  set_optab_libfunc (unord_optab, mode, NULL);
+}
+
 static void
 riscv_init_libfuncs (void)
 {
+  machine_mode mode_iter;
   /* Half-precision float operations.  The compiler handles all operations
      with NULL libfuncs by converting to SFmode.  */
 
-  /* Arithmetic.  */
-  set_optab_libfunc (add_optab, HFmode, NULL);
-  set_optab_libfunc (sdiv_optab, HFmode, NULL);
-  set_optab_libfunc (smul_optab, HFmode, NULL);
-  set_optab_libfunc (neg_optab, HFmode, NULL);
-  set_optab_libfunc (sub_optab, HFmode, NULL);
+  riscv_block_arith_comp_libfuncs_for_mode (HFmode);
 
-  /* Comparisons.  */
-  set_optab_libfunc (eq_optab, HFmode, NULL);
-  set_optab_libfunc (ne_optab, HFmode, NULL);
-  set_optab_libfunc (lt_optab, HFmode, NULL);
-  set_optab_libfunc (le_optab, HFmode, NULL);
-  set_optab_libfunc (ge_optab, HFmode, NULL);
-  set_optab_libfunc (gt_optab, HFmode, NULL);
-  set_optab_libfunc (unord_optab, HFmode, NULL);
+  /* For all possible libcalls in BFmode, record NULL.  */
+  riscv_block_arith_comp_libfuncs_for_mode (BFmode);
+  FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
+    {
+      set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
+      set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
+      set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
+      set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
+    }
+
+  FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
+    {
+      set_conv_libfunc (sfix_optab, BFmode, mode_iter, NULL);
+      set_conv_libfunc (sfix_optab, mode_iter, BFmode, NULL);
+      set_conv_libfunc (ufix_optab, BFmode, mode_iter, NULL);
+      set_conv_libfunc (ufix_optab, mode_iter, BFmode, NULL);
+
+      set_conv_libfunc (sfloat_optab, mode_iter, BFmode, NULL);
+      set_conv_libfunc (sfloat_optab, BFmode, mode_iter, NULL);
+      set_conv_libfunc (ufloat_optab, mode_iter, BFmode, NULL);
+      set_conv_libfunc (ufloat_optab, BFmode, mode_iter, NULL);
+    }
+
+  /* Enable libfuncs conversion for BFmode.  */
+  set_conv_libfunc (sext_optab, SFmode, BFmode, "__extendbfsf2");
+  set_conv_libfunc (trunc_optab, BFmode, SFmode, "__truncsfbf2");
+  set_conv_libfunc (trunc_optab, BFmode, DFmode, "__truncdfbf2");
+
+  set_conv_libfunc (sfloat_optab, BFmode, DImode, "__floatdibf");
+  set_conv_libfunc (ufloat_optab, BFmode, DImode, "__floatundibf");
+
+  /* Convert between BFmode and HFmode using only trunc libfunc if needed.  */
+  set_conv_libfunc (sext_optab, BFmode, HFmode, "__trunchfbf2");
+  set_conv_libfunc (sext_optab, HFmode, BFmode, "__truncbfhf2");
+  set_conv_libfunc (trunc_optab, BFmode, HFmode, "__trunchfbf2");
+  set_conv_libfunc (trunc_optab, HFmode, BFmode, "__truncbfhf2");
 }
 
 #if CHECKING_P
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index e00b8ee3579..5048628c784 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -189,7 +189,7 @@ (define_attr "move_type"
   (const_string "unknown"))
 
 ;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF,
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,BF,HF,SF,DF,TF,
   RVVMF64BI,RVVMF32BI,RVVMF16BI,RVVMF8BI,RVVMF4BI,RVVMF2BI,RVVM1BI,
   RVVM8QI,RVVM4QI,RVVM2QI,RVVM1QI,RVVMF2QI,RVVMF4QI,RVVMF8QI,
   RVVM8HI,RVVM4HI,RVVM2HI,RVVM1HI,RVVMF2HI,RVVMF4HI,
@@ -1631,6 +1631,20 @@ (define_insn "truncdfhf2"
   [(set_attr "type" "fcvt")
    (set_attr "mode" "HF")])
 
+;; The conversion of DF to BF needs to be done with SF if there is a
+;; chance to generate at least one instruction, otherwise just using
+;; libfunc __truncdfbf2.
+(define_expand "truncdfbf2"
+  [(set (match_operand:BF     0 "register_operand" "=f")
+       (float_truncate:BF
+           (match_operand:DF 1 "register_operand" " f")))]
+  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
+  {
+    convert_move (operands[0],
+		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
+    DONE;
+  })
+
 ;;
 ;;  ....................
 ;;
@@ -1784,12 +1798,12 @@ (define_insn "extendhfdf2"
    (set_attr "mode" "DF")])
 
 ;; 16-bit floating point moves
-(define_expand "movhf"
-  [(set (match_operand:HF 0 "")
-	(match_operand:HF 1 ""))]
+(define_expand "mov<mode>"
+  [(set (match_operand:HFBF 0 "")
+	(match_operand:HFBF 1 ""))]
   ""
 {
-  if (riscv_legitimize_move (HFmode, operands[0], operands[1]))
+  if (riscv_legitimize_move (<MODE>mode, operands[0], operands[1]))
     DONE;
 })
 
@@ -1804,16 +1818,16 @@ (define_insn "*movhf_hardfloat"
    (set_attr "type" "fmove")
    (set_attr "mode" "HF")])
 
-(define_insn "*movhf_softfloat"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=f, r,r,m,*f,*r")
-	(match_operand:HF 1 "move_operand"         " f,Gr,m,r,*r,*f"))]
-  "!TARGET_ZFHMIN
-   && (register_operand (operands[0], HFmode)
-       || reg_or_0_operand (operands[1], HFmode))"
+(define_insn "*mov<mode>_softfloat"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=f, r,r,m,*f,*r")
+	(match_operand:HFBF 1 "move_operand"         " f,Gr,m,r,*r,*f"))]
+  "(!(TARGET_ZFHMIN && <MODE>mode == HFmode) || (<MODE>mode == BFmode))
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
   { return riscv_output_move (operands[0], operands[1]); }
   [(set_attr "move_type" "fmove,move,load,store,mtc,mfc")
    (set_attr "type" "fmove")
-   (set_attr "mode" "HF")])
+   (set_attr "mode" "<MODE>")])
 
 ;;
 ;;  ....................
@@ -1858,6 +1872,62 @@ (define_insn "floatuns<GPR:mode><ANYF:mode>2"
   [(set_attr "type" "fcvt")
    (set_attr "mode" "<ANYF:MODE>")])
 
+;; The conversion of BF to SI/DI needs to be done with SF.
+(define_expand "fix_truncbf<GPR:mode>2"
+  [(set (match_operand:GPR      0 "register_operand" "=r")
+	(fix:GPR
+	    (match_operand:BF 1 "register_operand" " f")))]
+  ""
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    convert_move (op1, operands[1], 0);
+    expand_fix (operands[0], op1, 0);
+    DONE;
+  })
+
+(define_expand "fixuns_truncbf<GPR:mode>2"
+  [(set (match_operand:GPR      0 "register_operand" "=r")
+	(unsigned_fix:GPR
+	    (match_operand:BF 1 "register_operand" " f")))]
+  ""
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    convert_move (op1, operands[1], 1);
+    expand_fix (operands[0], op1, 1);
+    DONE;
+  })
+
+;; The conversion of SI to BF needs to be done with SF.
+;; The conversion of DI to BF needs to be done with libfuncs
+;; __floatdibf and __floatundibf directly if there is no F
+;; extension, because we have not yet enabled __floatdisf
+;; and __floatundisf.
+(define_expand "float<mode>bf2"
+  [(set (match_operand:BF    0 "register_operand" "= f")
+	(float:BF
+	    (match_operand:GPR 1 "reg_or_0_operand" " rJ")))]
+  "(<MODE>mode == SImode)
+   || (<MODE>mode == DImode && (TARGET_HARD_FLOAT || TARGET_ZFINX))"
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 0);
+    convert_move (operands[0], op1, 0);
+    DONE;
+  })
+
+(define_expand "floatuns<mode>bf2"
+  [(set (match_operand:BF    0 "register_operand" "= f")
+	(unsigned_float:BF
+	    (match_operand:GPR 1 "reg_or_0_operand" " rJ")))]
+  "(<MODE>mode == SImode)
+   || (<MODE>mode == DImode && (TARGET_HARD_FLOAT || TARGET_ZFINX))"
+  {
+    rtx op1 = gen_reg_rtx (SFmode);
+    expand_float (op1, operands[1], 1);
+    convert_move (operands[0], op1, 1);
+    DONE;
+  })
+
 (define_insn "l<rint_pattern><ANYF:mode><GPR:mode>2"
   [(set (match_operand:GPR       0 "register_operand" "=r")
 	(unspec:GPR
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c b/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
new file mode 100644
index 00000000000..9e67b2babc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+
+/* Arithmetic.  */
+void bf_add_bf () { bf = bf1 + bf2; }
+
+void bf_sub_bf () { bf = bf1 - bf2; }
+
+void bf_mul_bf () { bf = bf1 * bf2; }
+
+void bf_div_bf () { bf = bf1 / bf2; }
+
+void bf_add_const () { bf = bf1 + 3.14; }
+
+void const_sub_bf () { bf = 3.14 - bf2; }
+
+void bf_mul_const () { bf = bf1 *3.14; }
+
+void const_div_bf () { bf = 3.14 / bf2; }
+
+void bf_inc () { ++bf; }
+
+void bf_dec () { --bf; }
+
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 16 } } */
+/* { dg-final { scan-assembler-times "call\t__truncsfbf2" 6 } } */
+/* { dg-final { scan-assembler-times "call\t__truncdfbf2" 4 } } */
+/* { dg-final { scan-assembler-not "call\t__addbf3" } } */
+/* { dg-final { scan-assembler-not "call\t__subbf3" } } */
+/* { dg-final { scan-assembler-not "call\t__mulbf3" } } */
+/* { dg-final { scan-assembler-not "call\t__divbf3" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_call.c b/gcc/testsuite/gcc.target/riscv/bf16_call.c
new file mode 100644
index 00000000000..01576e38ac5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_call.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -O" { target { rv64 } } } */
+
+_Bfloat16 add (_Bfloat16 a, _Bfloat16 b) __attribute__ ((noinline));
+_Bfloat16 add (_Bfloat16 a, _Bfloat16 b)
+{
+  return a + b;
+}
+
+_Bfloat16 test(_Bfloat16 a, _Bfloat16 b)
+{
+  return add (a, b);
+}
+
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 2 } } */
+/* { dg-final { scan-assembler-times "call\t__truncsfbf2" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_comparisons.c b/gcc/testsuite/gcc.target/riscv/bf16_comparisons.c
new file mode 100644
index 00000000000..ff692378c00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_comparisons.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+
+/* Comparisons.  */
+void bf_lt_bf () { bf = (bf1 < bf2) ? bf1 : bf2; }
+
+void bf_gt_bf () { bf = (bf1 > bf2) ? bf1 : bf2; }
+
+void bf_eq_bf () { bf = (bf1 == bf2) ? bf1 : bf2; }
+
+void bf_lt_const () { bf = (bf1 < 3.14) ? bf1 : bf2; }
+
+void const_gt_bf () { bf = (3.14 > bf2) ? bf1 : bf2; }
+
+void bf_eq_const () { bf = (bf1 == 3.14) ? bf1 : bf2; }
+
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 9 } } */
+/* { dg-final { scan-assembler-not "call\t__ltbf2" } } */
+/* { dg-final { scan-assembler-not "call\t__gtbf2" } } */
+/* { dg-final { scan-assembler-not "call\t__eqbf2" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_convert-1.c b/gcc/testsuite/gcc.target/riscv/bf16_convert-1.c
new file mode 100644
index 00000000000..3b9a7434373
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_convert-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+extern _Float16 hf;
+extern float sf;
+extern double df;
+
+extern int si;
+extern long long  di;
+
+extern unsigned int usi;
+extern unsigned long long udi;
+
+/* Fp or gp Converts to bf.  */
+void hf_to_bf () { bf = hf; } /* { dg-final { scan-assembler-times "call\t__trunchfbf2" 1 } } */
+void sf_to_bf () { bf = sf; }
+void df_to_bf () { bf = df; }
+void si_to_bf () { bf = si; }
+void di_to_bf () { bf = di; } /* { dg-final { scan-assembler-times "call\t__floatdibf" 1 { target { rv32 } } } } */ 
+void usi_to_bf () { bf = usi; }
+void udi_to_bf () { bf = udi; } /* { dg-final { scan-assembler-times "call\t__floatundibf" 1 { target { rv32 } } } } */ 
+void const_to_bf () { __volatile__ const float temp = 3.14; bf = temp; }
+/* { dg-final { scan-assembler-times "call\t__truncsfbf2" 5 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "call\t__truncsfbf2" 7 { target { rv64 } } } } */
+
+/* Bf converts to fp or gp.  */
+void bf_to_hf () { hf = bf; } /* { dg-final { scan-assembler-times "call\t__truncsfhf2" 1 } } */
+void bf_to_sf () { sf = bf; }
+void bf_to_df () { df = bf; }
+void bf_to_si () { si = bf; }
+void bf_to_di () { di = bf; } /* { dg-final { scan-assembler-times "call\t__fixsfdi" 1 { target { rv32 } } } } */ 
+void bf_to_usi () { usi = bf; }
+void bf_to_udi () { udi = bf; } /* { dg-final { scan-assembler-times "call\t__fixunssfdi" 1 { target { rv32 } } } } */ 
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 4 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 6 { target { rv64 } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_convert-2.c b/gcc/testsuite/gcc.target/riscv/bf16_convert-2.c
new file mode 100644
index 00000000000..912b875bdd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_convert-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imac -mabi=ilp32 -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64imac -mabi=lp64 -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+extern _Float16 hf;
+extern float sf;
+extern double df;
+
+extern int si;
+extern long long  di;
+
+extern unsigned int usi;
+extern unsigned long long udi;
+
+/* Fp or gp Converts to bf.  */
+void hf_to_bf () { bf = hf; } /* { dg-final { scan-assembler-times "call\t__trunchfbf2" 1 } } */
+void sf_to_bf () { bf = sf; }
+void df_to_bf () { bf = df; } /* { dg-final { scan-assembler-times "call\t__truncdfbf2" 1 } } */
+void si_to_bf () { bf = si; } /* { dg-final { scan-assembler-times "call\t__floatsisf" 1 } } */
+void di_to_bf () { bf = di; } /* { dg-final { scan-assembler-times "call\t__floatdibf" 1 } } */
+void usi_to_bf () { bf = usi; }  /* { dg-final { scan-assembler-times "call\t__floatunsisf" 1 } } */
+void udi_to_bf () { bf = udi; }  /* { dg-final { scan-assembler-times "call\t__floatundibf" 1 } } */
+void const_to_bf () { __volatile__ const float temp = 3.14; bf = temp; }
+/* { dg-final { scan-assembler-times "call\t__truncsfbf2" 4 } } */
+
+/* Bf converts to fp or gp.  */
+void bf_to_hf () { hf = bf; } /* { dg-final { scan-assembler-times "call\t__truncsfhf2" 1 } } */
+void bf_to_sf () { sf = bf; }
+void bf_to_df () { df = bf; } /* { dg-final { scan-assembler-times "call\t__extendsfdf2" 1 } } */
+void bf_to_si () { si = bf; } /* { dg-final { scan-assembler-times "call\t__fixsfsi" 1 } } */
+void bf_to_di () { di = bf; } /* { dg-final { scan-assembler-times "call\t__fixsfdi" 1 } } */
+void bf_to_usi () { usi = bf; }  /* { dg-final { scan-assembler-times "call\t__fixunssfsi" 1 } } */
+void bf_to_udi () { udi = bf; }  /* { dg-final { scan-assembler-times "call\t__fixunssfdi" 1 } } */
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 4 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "call\t__extendbfsf2" 6 { target { rv64 } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_convert_run.c b/gcc/testsuite/gcc.target/riscv/bf16_convert_run.c
new file mode 100644
index 00000000000..d9b1f0f6298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/bf16_convert_run.c
@@ -0,0 +1,163 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O" { target { rv64 } } } */
+
+#include <stdio.h>
+
+#define NO_INLINE __attribute__((noinline))
+
+int NO_INLINE
+bf16_to_int ()
+{
+  int ret[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.1;
+  signed char a_char = 1;
+  short int a_short_int = 2;
+  int a_int = 3;
+  long a_long = 4;
+  long long a_long_long = 5;
+
+  a_bf16 = (_Bfloat16)a_char;
+  if (a_bf16 != (_Bfloat16)1)
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_short_int;
+  if (a_bf16 != (_Bfloat16)2)
+    ret[1] = 1;
+
+  a_bf16 = (_Bfloat16)a_int;
+  if (a_bf16 != (_Bfloat16)3)
+    ret[2] = 1;
+
+  a_bf16 = (_Bfloat16)a_long;
+  if (a_bf16 != (_Bfloat16)4)
+    ret[3] = 1;
+
+  a_bf16 = (_Bfloat16)a_long_long;
+  if (a_bf16 != (_Bfloat16)5)
+    ret[4] = 1;
+
+  a_char = (signed char)b_bf16;
+  if (a_char != (signed char)7.1)
+    ret[5] = 1;
+
+  a_short_int = (short int)b_bf16;
+  if (a_short_int != (short int)7.1)
+    ret[6] = 1;
+
+  a_int = (int)b_bf16;
+  if (a_int != (int)7.1)
+    ret[7] = 1;
+
+  a_long = (long)b_bf16;
+  if (a_long != (long)7.1)
+    ret[8] = 1;
+
+  a_long_long = (long long)b_bf16;
+  if (a_long_long != (long long)7.1)
+    ret[9] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1) || (ret[4] == 1) ||
+      (ret[5] == 1) || (ret[6] == 1) || (ret[7] == 1) || (ret[8] == 1) || (ret[9] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int NO_INLINE
+bf16_to_uint ()
+{
+  int ret[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.1;
+  unsigned char a_uchar = 1;
+  unsigned short int a_short_uint = 2;
+  unsigned int a_uint = 3;
+  unsigned long a_ulong = 4;
+  unsigned long long a_ulong_ulong = 5;
+
+  a_bf16 = (_Bfloat16)a_uchar;
+  if (a_bf16 != (_Bfloat16)1)
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_short_uint;
+  if (a_bf16 != (_Bfloat16)2)
+    ret[1] = 1;
+
+  a_bf16 = (_Bfloat16)a_uint;
+  if (a_bf16 != (_Bfloat16)3)
+    ret[2] = 1;
+
+  a_bf16 = (_Bfloat16)a_ulong;
+  if (a_bf16 != (_Bfloat16)4)
+    ret[3] = 1;
+
+  a_bf16 = (_Bfloat16)a_ulong_ulong;
+  if (a_bf16 != (_Bfloat16)5)
+    ret[4] = 1;
+
+  a_uchar = (unsigned char)b_bf16;
+  if (a_uchar != (unsigned char)7.1)
+    ret[5] = 1;
+
+  a_short_uint = (unsigned short int)b_bf16;
+  if (a_short_uint != (unsigned short int)7.1)
+    ret[6] = 1;
+
+  a_uint = (unsigned int)b_bf16;
+  if (a_uint != (unsigned int)7.1)
+    ret[7] = 1;
+
+  a_ulong = (unsigned long)b_bf16;
+  if (a_ulong != (unsigned long)7.1)
+    ret[8] = 1;
+
+  a_ulong_ulong = (unsigned long long)b_bf16;
+  if (a_ulong_ulong != (unsigned long long)7.1)
+    ret[9] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1) || (ret[4] == 1) ||
+      (ret[5] == 1) || (ret[6] == 1) || (ret[7] == 1) || (ret[8] == 1) || (ret[9] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int NO_INLINE
+bf16_to_float ()
+{
+  int ret[4] = {0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.5;
+  float a_float = 3.7;
+  double a_double = 5.8;
+  a_bf16 = (_Bfloat16)a_float;
+  if (a_bf16 != ((_Bfloat16)3.7))
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_double;
+  if (a_bf16 != ((_Bfloat16)5.8))
+    ret[1] = 1;
+
+  a_float = (float)b_bf16;
+  if (a_float != (float)7.5)
+    ret[2] = 1;
+
+  a_double = (double)b_bf16;
+  if (a_double != (double)7.5)
+    ret[3] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int main()
+{
+  if (bf16_to_int () || bf16_to_uint () || bf16_to_float ())
+    return 1;
+  else
+    return 0;
+}
diff --git a/libgcc/config/riscv/sfp-machine.h b/libgcc/config/riscv/sfp-machine.h
index 38e2817bffa..6e294b38783 100644
--- a/libgcc/config/riscv/sfp-machine.h
+++ b/libgcc/config/riscv/sfp-machine.h
@@ -41,6 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
 
+#define _FP_NANFRAC_B		_FP_QNANBIT_B
 #define _FP_NANFRAC_H		_FP_QNANBIT_H
 #define _FP_NANFRAC_S		_FP_QNANBIT_S
 #define _FP_NANFRAC_D		_FP_QNANBIT_D, 0
@@ -64,6 +65,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
 
+#define _FP_NANFRAC_B		_FP_QNANBIT_B
 #define _FP_NANFRAC_H		_FP_QNANBIT_H
 #define _FP_NANFRAC_S		_FP_QNANBIT_S
 #define _FP_NANFRAC_D		_FP_QNANBIT_D
@@ -82,6 +84,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
 typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
 #define CMPtype __gcc_CMPtype
 
+#define _FP_NANSIGN_B		0
 #define _FP_NANSIGN_H		0
 #define _FP_NANSIGN_S		0
 #define _FP_NANSIGN_D		0
diff --git a/libgcc/config/riscv/t-softfp32 b/libgcc/config/riscv/t-softfp32
index 1a3b1caa6b0..0c61f77714b 100644
--- a/libgcc/config/riscv/t-softfp32
+++ b/libgcc/config/riscv/t-softfp32
@@ -42,7 +42,8 @@ softfp_extras += divsf3 divdf3 divtf3
 
 endif
 
-softfp_extensions += hfsf hfdf hftf
-softfp_truncations += tfhf dfhf sfhf
+softfp_extensions += hfsf hfdf hftf bfsf
+softfp_truncations += tfhf dfhf sfhf tfbf dfbf sfbf bfhf hfbf
 softfp_extras += fixhfsi fixhfdi fixunshfsi fixunshfdi \
-                 floatsihf floatdihf floatunsihf floatundihf
+		 floatsihf floatdihf floatunsihf floatundihf \
+		 floatdibf floatundibf
-- 
2.17.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC 2/2] RISC-V: Add 'Zfbfmin' extension.
  2023-09-19  8:44 [RFC 1/2] RISC-V: Add support for _Bfloat16 Jin Ma
@ 2023-09-19  8:46 ` Jin Ma
  2023-09-29 17:52   ` Jeff Law
  2023-09-29 17:46 ` [RFC 1/2] RISC-V: Add support for _Bfloat16 Jeff Law
  2023-12-20 11:17 ` [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16 Jin Ma
  2 siblings, 1 reply; 12+ messages in thread
From: Jin Ma @ 2023-09-19  8:46 UTC (permalink / raw)
  To: gcc-patches
  Cc: jeffreyalaw, palmer, richard.sandiford, kito.cheng,
	philipp.tomsich, christoph.muellner, rdapp.gcc, juzhe.zhong,
	jinma.contrib, Jin Ma

This patch adds the 'Zfbfmin' extension for riscv, which is based on spec of bfloat16:
https://github.com/riscv/riscv-bfloat16/commit/5578e34e15a44e9ad13246072a29f51274b4d999

The 'Zfbfmin' extension of binutils-gdb (REVIEW ONLY):
https://sourceware.org/pipermail/binutils/2023-August/128773.html

The 'Zfbfmin' extension of qemu:
https://github.com/qemu/qemu/commit/5d1270caac2ef7b8c887d4cb5a2444ba6d237516

Because the binutils does not yet support the 'Zfbfmin' extension, test case
zfbfmin_convert_run.c is invalidated with '#if 0' and '#endif'.

gcc/ChangeLog:

	* common/config/riscv/riscv-common.cc: Add 'Zfbfmin' extension.
	* config/riscv/riscv-opts.h (MASK_ZFBFMIN): New.
	(TARGET_ZFBFMIN): New.
	* config/riscv/riscv.cc (riscv_output_move): Enable FMV.X.H, and FMV.H.X
	for 'Zfbfmin' extension.
	(riscv_excess_precision): Likewise.
	* config/riscv/riscv.md (truncsfbf2): New.
	(extendbfsf2):  New.
	(*mov<mode>_hardfloat): Support for BFmode.
	(*mov<mode>_softfloat): Disable for BFmode  when 'Zfbfmin' extension is
	enabled.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/zfbfmin_arithmetic.c: New test.
	* gcc.target/riscv/zfbfmin_call.c: New test.
	* gcc.target/riscv/zfbfmin_comparisons.c: New test.
	* gcc.target/riscv/zfbfmin_convert.c: New test.
	* gcc.target/riscv/zfbfmin_convert_run.c: New test.
	* gcc.target/riscv/zfbfmin_fsh_and_flh.c: New test.
---
 gcc/common/config/riscv/riscv-common.cc       |   3 +
 gcc/config/riscv/riscv-opts.h                 |   2 +
 gcc/config/riscv/riscv.cc                     |   4 +-
 gcc/config/riscv/riscv.md                     |  40 ++--
 .../gcc.target/riscv/zfbfmin_arithmetic.c     |  31 ++++
 gcc/testsuite/gcc.target/riscv/zfbfmin_call.c |  17 ++
 .../gcc.target/riscv/zfbfmin_comparisons.c    |  22 +++
 .../gcc.target/riscv/zfbfmin_convert.c        |  38 ++++
 .../gcc.target/riscv/zfbfmin_convert_run.c    | 173 ++++++++++++++++++
 .../gcc.target/riscv/zfbfmin_fsh_and_flh.c    |  12 ++
 10 files changed, 329 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_arithmetic.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_call.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_comparisons.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_convert.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_convert_run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfbfmin_fsh_and_flh.c

diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc
index 9a0a68fe5db..1fcbb862aa4 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -123,6 +123,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
 
   {"zfh", "zfhmin"},
   {"zfhmin", "f"},
+  {"zfbfmin", "f"},
 
   {"zfa", "f"},
 
@@ -284,6 +285,7 @@ static const struct riscv_ext_version riscv_ext_version_table[] =
   {"zfhmin",    ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfhmin",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfh",      ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zfbfmin",     ISA_SPEC_CLASS_NONE, 0, 8},
 
   {"zfa",     ISA_SPEC_CLASS_NONE, 0, 1},
 
@@ -1461,6 +1463,7 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] =
   {"zfh",       &gcc_options::x_riscv_zf_subext, MASK_ZFH},
   {"zvfhmin",   &gcc_options::x_riscv_zf_subext, MASK_ZVFHMIN},
   {"zvfh",      &gcc_options::x_riscv_zf_subext, MASK_ZVFH},
+  {"zfbfmin",      &gcc_options::x_riscv_zf_subext, MASK_ZFBFMIN},
 
   {"zfa",       &gcc_options::x_riscv_zfa_subext, MASK_ZFA},
 
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index a525f679683..900a46fcae0 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -256,11 +256,13 @@ enum riscv_entity
 #define MASK_ZFH      (1 << 1)
 #define MASK_ZVFHMIN  (1 << 2)
 #define MASK_ZVFH     (1 << 3)
+#define MASK_ZFBFMIN  (1 << 4)
 
 #define TARGET_ZFHMIN  ((riscv_zf_subext & MASK_ZFHMIN) != 0)
 #define TARGET_ZFH     ((riscv_zf_subext & MASK_ZFH) != 0)
 #define TARGET_ZVFHMIN ((riscv_zf_subext & MASK_ZVFHMIN) != 0)
 #define TARGET_ZVFH    ((riscv_zf_subext & MASK_ZVFH) != 0)
+#define TARGET_ZFBFMIN    ((riscv_zf_subext & MASK_ZFBFMIN) != 0)
 
 #define MASK_ZMMUL      (1 << 0)
 #define TARGET_ZMMUL    ((riscv_zm_subext & MASK_ZMMUL) != 0)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 910523ee2b9..6362c3f83c8 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3372,7 +3372,7 @@ riscv_output_move (rtx dest, rtx src)
 	switch (width)
 	  {
 	  case 2:
-	    if (TARGET_ZFHMIN)
+	    if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
 	      return "fmv.x.h\t%0,%1";
 	    /* Using fmv.x.s + sign-extend to emulate fmv.x.h.  */
 	    return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
@@ -3428,7 +3428,7 @@ riscv_output_move (rtx dest, rtx src)
 	    switch (width)
 	      {
 	      case 2:
-		if (TARGET_ZFHMIN)
+		if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
 		  return "fmv.h.x\t%0,%z1";
 		/* High 16 bits should be all-1, otherwise HW will treated
 		   as a n-bit canonical NaN, but isn't matter for softfloat.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5048628c784..ef0c38cb633 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1631,14 +1631,23 @@ (define_insn "truncdfhf2"
   [(set_attr "type" "fcvt")
    (set_attr "mode" "HF")])
 
+(define_insn "truncsfbf2"
+  [(set (match_operand:BF     0 "register_operand" "=f")
+       (float_truncate:BF
+           (match_operand:SF 1 "register_operand" " f")))]
+  "TARGET_ZFBFMIN"
+  "fcvt.bf16.s\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "BF")])
+
 ;; The conversion of DF to BF needs to be done with SF if there is a
-;; chance to generate at least one instruction, otherwise just using
-;; libfunc __truncdfbf2.
+;; chance to generate at least one instruction, whether it is 'fcvt.s.d'
+;; or 'fcvt.bf16.s'. Otherwise just using libfunc __truncdfbf2.
 (define_expand "truncdfbf2"
   [(set (match_operand:BF     0 "register_operand" "=f")
        (float_truncate:BF
            (match_operand:DF 1 "register_operand" " f")))]
-  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
+  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX || TARGET_ZFBFMIN"
   {
     convert_move (operands[0],
 		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
@@ -1797,6 +1806,15 @@ (define_insn "extendhfdf2"
   [(set_attr "type" "fcvt")
    (set_attr "mode" "DF")])
 
+(define_insn "extendbfsf2"
+  [(set (match_operand:SF     0 "register_operand" "=f")
+       (float_extend:SF
+           (match_operand:BF 1 "register_operand" " f")))]
+  "TARGET_ZFBFMIN"
+  "fcvt.s.bf16\t%0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "SF")])
+
 ;; 16-bit floating point moves
 (define_expand "mov<mode>"
   [(set (match_operand:HFBF 0 "")
@@ -1807,21 +1825,21 @@ (define_expand "mov<mode>"
     DONE;
 })
 
-(define_insn "*movhf_hardfloat"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=f,   f,f,f,m,m,*f,*r,  *r,*r,*m")
-	(match_operand:HF 1 "move_operand"         " f,zfli,G,m,f,G,*r,*f,*G*r,*m,*r"))]
-  "TARGET_ZFHMIN
-   && (register_operand (operands[0], HFmode)
-       || reg_or_0_operand (operands[1], HFmode))"
+(define_insn "*mov<mode>_hardfloat"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "=f,   f,f,f,m,m,*f,*r,  *r,*r,*m")
+	(match_operand:HFBF 1 "move_operand"         " f,zfli,G,m,f,G,*r,*f,*G*r,*m,*r"))]
+  "((TARGET_ZFHMIN && <MODE>mode == HFmode) || (TARGET_ZFBFMIN && <MODE>mode == BFmode))
+   && (register_operand (operands[0], <MODE>mode)
+       || reg_or_0_operand (operands[1], <MODE>mode))"
   { return riscv_output_move (operands[0], operands[1]); }
   [(set_attr "move_type" "fmove,fmove,mtc,fpload,fpstore,store,mtc,mfc,move,load,store")
    (set_attr "type" "fmove")
-   (set_attr "mode" "HF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*mov<mode>_softfloat"
   [(set (match_operand:HFBF 0 "nonimmediate_operand" "=f, r,r,m,*f,*r")
 	(match_operand:HFBF 1 "move_operand"         " f,Gr,m,r,*r,*f"))]
-  "(!(TARGET_ZFHMIN && <MODE>mode == HFmode) || (<MODE>mode == BFmode))
+  "!((TARGET_ZFHMIN && <MODE>mode == HFmode) || (TARGET_ZFBFMIN && <MODE>mode == BFmode))
    && (register_operand (operands[0], <MODE>mode)
        || reg_or_0_operand (operands[1], <MODE>mode))"
   { return riscv_output_move (operands[0], operands[1]); }
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_arithmetic.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_arithmetic.c
new file mode 100644
index 00000000000..529e9b40daa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_arithmetic.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+
+/* Arithmetic.  */
+void bf_add_bf () { bf = bf1 + bf2; }
+
+void bf_sub_bf () { bf = bf1 - bf2; }
+
+void bf_mul_bf () { bf = bf1 * bf2; }
+
+void bf_div_bf () { bf = bf1 / bf2; }
+
+void bf_add_const () { bf = bf1 + 3.14; }
+
+void const_sub_bf () { bf = 3.14 - bf2; }
+
+void bf_mul_const () { bf = bf1 *3.14; }
+
+void const_div_bf () { bf = 3.14 / bf2; }
+
+void bf_inc () { ++bf; }
+
+void bf_dec () { --bf; }
+
+/* { dg-final { scan-assembler-times "fcvt.s.bf16" 14 } } */
+/* { dg-final { scan-assembler-times "fcvt.bf16.s" 10 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_call.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_call.c
new file mode 100644
index 00000000000..6cda430020e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_call.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O" { target { rv64 } } } */
+
+_Bfloat16 add (_Bfloat16 a, _Bfloat16 b) __attribute__ ((noinline));
+_Bfloat16 add (_Bfloat16 a, _Bfloat16 b)
+{
+  return a + b;
+}
+
+_Bfloat16 foo(_Bfloat16 a, _Bfloat16 b)
+{
+  return add (a, b);
+}
+
+/* { dg-final { scan-assembler-times "fcvt.s.bf16" 2 } } */
+/* { dg-final { scan-assembler-times "fcvt.bf16.s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_comparisons.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_comparisons.c
new file mode 100644
index 00000000000..3dd2874a8bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_comparisons.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+
+/* Comparisons.  */
+void bf_lt_bf () { bf = (bf1 < bf2) ? bf1 : bf2; }
+
+void bf_gt_bf () { bf = (bf1 > bf2) ? bf1 : bf2; }
+
+void bf_eq_bf () { bf = (bf1 == bf2) ? bf1 : bf2; }
+
+void bf_lt_const () { bf = (bf1 < 3.14) ? bf1 : bf2; }
+
+void const_gt_bf () { bf = (3.14 > bf2) ? bf1 : bf2; }
+
+void bf_eq_const () { bf = (bf1 == 3.14) ? bf1 : bf2; }
+
+/* { dg-final { scan-assembler-times "fcvt.s.bf16" 9 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_convert.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_convert.c
new file mode 100644
index 00000000000..b9b2a1ca6b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_convert.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O" { target { rv64 } } } */
+
+extern _Bfloat16 bf;
+extern _Bfloat16 bf1;
+extern _Bfloat16 bf2;
+extern _Float16 hf;
+extern float sf;
+extern double df;
+
+extern int si;
+extern long long  di;
+
+extern unsigned int usi;
+extern unsigned long long udi;
+
+/* Fp or gp Converts to bf.  */
+void hf_to_bf () { bf = hf; } /* { dg-final { scan-assembler-times "call\t__trunchfbf2" 1 } } */
+void sf_to_bf () { bf = sf; }
+void df_to_bf () { bf = df; }
+void si_to_bf () { bf = si; }
+void di_to_bf () { bf = di; } /* { dg-final { scan-assembler-times "call\t__floatdibf" 1 { target { rv32 } } } } */ 
+void usi_to_bf () { bf = usi; }
+void udi_to_bf () { bf = udi; } /* { dg-final { scan-assembler-times "call\t__floatundibf" 1 { target { rv32 } } } } */ 
+void const_to_bf () { __volatile__ const float temp = 3.14; bf = temp; }
+/* { dg-final { scan-assembler-times "fcvt.bf16.s" 5 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "fcvt.bf16.s" 7 { target { rv64 } } } } */
+
+/* Bf converts to fp or gp.  */
+void bf_to_hf () { hf = bf; } /* { dg-final { scan-assembler-times "call\t__truncsfhf2" 1 } } */
+void bf_to_sf () { sf = bf; }
+void bf_to_df () { df = bf; }
+void bf_to_si () { si = bf; }
+void bf_to_di () { di = bf; }
+void bf_to_usi () { usi = bf; }
+void bf_to_udi () { udi = bf; }
+/* { dg-final { scan-assembler-times "fcvt.s.bf16" 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_convert_run.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_convert_run.c
new file mode 100644
index 00000000000..e97c71b8595
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_convert_run.c
@@ -0,0 +1,173 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O" { target { rv64 } } } */
+
+/* Need to wait for binutils and qemu or other emulators or hardware to support
+   zfbfmin extensions.  */
+#if 0
+#include <stdio.h>
+
+#define NO_INLINE __attribute__((noinline))
+
+int NO_INLINE
+bf16_to_int ()
+{
+  int ret[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.1;
+  signed char a_char = 1;
+  short int a_short_int = 2;
+  int a_int = 3;
+  long a_long = 4;
+  long long a_long_long = 5;
+
+  a_bf16 = (_Bfloat16)a_char;
+  if (a_bf16 != (_Bfloat16)1)
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_short_int;
+  if (a_bf16 != (_Bfloat16)2)
+    ret[1] = 1;
+
+  a_bf16 = (_Bfloat16)a_int;
+  if (a_bf16 != (_Bfloat16)3)
+    ret[2] = 1;
+
+  a_bf16 = (_Bfloat16)a_long;
+  if (a_bf16 != (_Bfloat16)4)
+    ret[3] = 1;
+
+  a_bf16 = (_Bfloat16)a_long_long;
+  if (a_bf16 != (_Bfloat16)5)
+    ret[4] = 1;
+
+  a_char = (signed char)b_bf16;
+  if (a_char != (signed char)7.1)
+    ret[5] = 1;
+
+  a_short_int = (short int)b_bf16;
+  if (a_short_int != (short int)7.1)
+    ret[6] = 1;
+
+  a_int = (int)b_bf16;
+  if (a_int != (int)7.1)
+    ret[7] = 1;
+
+  a_long = (long)b_bf16;
+  if (a_long != (long)7.1)
+    ret[8] = 1;
+
+  a_long_long = (long long)b_bf16;
+  if (a_long_long != (long long)7.1)
+    ret[9] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1) || (ret[4] == 1) ||
+      (ret[5] == 1) || (ret[6] == 1) || (ret[7] == 1) || (ret[8] == 1) || (ret[9] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int NO_INLINE
+bf16_to_uint ()
+{
+  int ret[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.1;
+  unsigned char a_uchar = 1;
+  unsigned short int a_short_uint = 2;
+  unsigned int a_uint = 3;
+  unsigned long a_ulong = 4;
+  unsigned long long a_ulong_ulong = 5;
+
+  a_bf16 = (_Bfloat16)a_uchar;
+  if (a_bf16 != (_Bfloat16)1)
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_short_uint;
+  if (a_bf16 != (_Bfloat16)2)
+    ret[1] = 1;
+
+  a_bf16 = (_Bfloat16)a_uint;
+  if (a_bf16 != (_Bfloat16)3)
+    ret[2] = 1;
+
+  a_bf16 = (_Bfloat16)a_ulong;
+  if (a_bf16 != (_Bfloat16)4)
+    ret[3] = 1;
+
+  a_bf16 = (_Bfloat16)a_ulong_ulong;
+  if (a_bf16 != (_Bfloat16)5)
+    ret[4] = 1;
+
+  a_uchar = (unsigned char)b_bf16;
+  if (a_uchar != (unsigned char)7.1)
+    ret[5] = 1;
+
+  a_short_uint = (unsigned short int)b_bf16;
+  if (a_short_uint != (unsigned short int)7.1)
+    ret[6] = 1;
+
+  a_uint = (unsigned int)b_bf16;
+  if (a_uint != (unsigned int)7.1)
+    ret[7] = 1;
+
+  a_ulong = (unsigned long)b_bf16;
+  if (a_ulong != (unsigned long)7.1)
+    ret[8] = 1;
+
+  a_ulong_ulong = (unsigned long long)b_bf16;
+  if (a_ulong_ulong != (unsigned long long)7.1)
+    ret[9] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1) || (ret[4] == 1) ||
+      (ret[5] == 1) || (ret[6] == 1) || (ret[7] == 1) || (ret[8] == 1) || (ret[9] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int NO_INLINE
+bf16_to_float ()
+{
+  int ret[4] = {0, 0, 0, 0};
+  _Bfloat16 a_bf16 = 1.2;
+  _Bfloat16 b_bf16 = 7.5;
+  float a_float = 3.7;
+  double a_double = 5.8;
+  a_bf16 = (_Bfloat16)a_float;
+  if (a_bf16 != ((_Bfloat16)3.7))
+    ret[0] = 1;
+
+  a_bf16 = (_Bfloat16)a_double;
+  if (a_bf16 != ((_Bfloat16)5.8))
+    ret[1] = 1;
+
+  a_float = (float)b_bf16;
+  if (a_float != (float)7.5)
+    ret[2] = 1;
+
+  a_double = (double)b_bf16;
+  if (a_double != (double)7.5)
+    ret[3] = 1;
+
+  if ((ret[0] == 1) || (ret[1] == 1) || (ret[2] == 1) || (ret[3] == 1))
+    return 1;
+  else
+    return 0;
+}
+
+int main()
+{
+  if (bf16_to_int () || bf16_to_uint () || bf16_to_float ())
+    return 1;
+  else
+    return 0;
+}
+#else
+int main()
+{
+  return 0;
+}
+#endif
+
diff --git a/gcc/testsuite/gcc.target/riscv/zfbfmin_fsh_and_flh.c b/gcc/testsuite/gcc.target/riscv/zfbfmin_fsh_and_flh.c
new file mode 100644
index 00000000000..0255f27f3ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zfbfmin_fsh_and_flh.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zfbfmin -mabi=ilp32d -O2" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zfbfmin -mabi=lp64d -O2" { target { rv64 } } } */
+
+void
+foo (int a, _Bfloat16 *b)
+{
+  *b += a;
+}
+
+/* { dg-final { scan-assembler-times "fsh\t" 1 } } */
+/* { dg-final { scan-assembler-times "flh\t" 1 } } */
-- 
2.17.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] RISC-V: Add support for _Bfloat16.
  2023-09-19  8:44 [RFC 1/2] RISC-V: Add support for _Bfloat16 Jin Ma
  2023-09-19  8:46 ` [RFC 2/2] RISC-V: Add 'Zfbfmin' extension Jin Ma
@ 2023-09-29 17:46 ` Jeff Law
  2023-10-09  6:18   ` Jin Ma
  2023-12-20 11:17 ` [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16 Jin Ma
  2 siblings, 1 reply; 12+ messages in thread
From: Jeff Law @ 2023-09-29 17:46 UTC (permalink / raw)
  To: Jin Ma, gcc-patches
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib



On 9/19/23 02:44, Jin Ma wrote:
> gcc/ChangeLog:
> 
> 	* config/riscv/iterators.md (HFBF): New.
> 	* config/riscv/riscv-builtins.cc (riscv_init_builtin_types):
> 	Initialize data type_Bfloat16.
> 	* config/riscv/riscv-modes.def (FLOAT_MODE): New.
> 	(ADJUST_FLOAT_FORMAT): New.
> 	* config/riscv/riscv.cc (riscv_mangle_type): Support for BFmode.
> 	(riscv_scalar_mode_supported_p): Ditto.
> 	(riscv_libgcc_floating_mode_supported_p): Ditto.
> 	(riscv_block_arith_comp_libfuncs_for_mode): New.
> 	(riscv_init_libfuncs): Opening and closing some libfuncs for BFmode.
> 	* config/riscv/riscv.md (mode" ): Add BF.
> 	(truncdfbf2): New.
> 	(movhf): Support for BFmode.
> 	(mov<mode>): Ditto.
> 	(*mov<mode>_softfloat):  Ditto.
> 	(fix_truncbf<GPR:mode>2): New.
> 	(fixuns_truncbf<GPR:mode>2): New.
> 	(float<mode>bf2): New.
> 	(floatuns<mode>bf2): New.
> 
> libgcc/ChangeLog:
> 
> 	* config/riscv/sfp-machine.h (_FP_NANFRAC_B): New.
> 	(_FP_NANSIGN_B): New.
> 	* config/riscv/t-softfp32: Add support for BF libfuncs.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/bf16_arithmetic.c: New test.
> 	* gcc.target/riscv/bf16_call.c: New test.
> 	* gcc.target/riscv/bf16_comparisons.c: New test.
> 	* gcc.target/riscv/bf16_convert-1.c: New test.
> 	* gcc.target/riscv/bf16_convert-2.c: New test.
> 	* gcc.target/riscv/bf16_convert_run.c: New test.
So this can't go in the tree until the extension has moved into a frozen 
state.  Hopefully that'll happen before we close stage1 development in Nov.



> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index e00b8ee3579..5048628c784 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -1631,6 +1631,20 @@ (define_insn "truncdfhf2"
>     [(set_attr "type" "fcvt")
>      (set_attr "mode" "HF")])
>   
> +;; The conversion of DF to BF needs to be done with SF if there is a
> +;; chance to generate at least one instruction, otherwise just using
> +;; libfunc __truncdfbf2.
> +(define_expand "truncdfbf2"
> +  [(set (match_operand:BF     0 "register_operand" "=f")
> +       (float_truncate:BF
> +           (match_operand:DF 1 "register_operand" " f")))]
> +  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
> +  {
> +    convert_move (operands[0],
> +		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
> +    DONE;
> +  })
So for conversions to/from BFmode, doesn't generic code take care of 
this for us?  Search for convert_mode_scalar in expr.cc. That code will 
utilize SFmode as an intermediate step just like your expander.   Is 
there some reason that generic code is insufficient?

Similarly for the the other conversions.

Otherwise it looks pretty good.

Jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 2/2] RISC-V: Add 'Zfbfmin' extension.
  2023-09-19  8:46 ` [RFC 2/2] RISC-V: Add 'Zfbfmin' extension Jin Ma
@ 2023-09-29 17:52   ` Jeff Law
  0 siblings, 0 replies; 12+ messages in thread
From: Jeff Law @ 2023-09-29 17:52 UTC (permalink / raw)
  To: Jin Ma, gcc-patches
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib



On 9/19/23 02:46, Jin Ma wrote:
> This patch adds the 'Zfbfmin' extension for riscv, which is based on spec of bfloat16:
> https://github.com/riscv/riscv-bfloat16/commit/5578e34e15a44e9ad13246072a29f51274b4d999
> 
> The 'Zfbfmin' extension of binutils-gdb (REVIEW ONLY):
> https://sourceware.org/pipermail/binutils/2023-August/128773.html
> 
> The 'Zfbfmin' extension of qemu:
> https://github.com/qemu/qemu/commit/5d1270caac2ef7b8c887d4cb5a2444ba6d237516
> 
> Because the binutils does not yet support the 'Zfbfmin' extension, test case
> zfbfmin_convert_run.c is invalidated with '#if 0' and '#endif'.
> 
> gcc/ChangeLog:
> 
> 	* common/config/riscv/riscv-common.cc: Add 'Zfbfmin' extension.
> 	* config/riscv/riscv-opts.h (MASK_ZFBFMIN): New.
> 	(TARGET_ZFBFMIN): New.
> 	* config/riscv/riscv.cc (riscv_output_move): Enable FMV.X.H, and FMV.H.X
> 	for 'Zfbfmin' extension.
> 	(riscv_excess_precision): Likewise.
> 	* config/riscv/riscv.md (truncsfbf2): New.
> 	(extendbfsf2):  New.
> 	(*mov<mode>_hardfloat): Support for BFmode.
> 	(*mov<mode>_softfloat): Disable for BFmode  when 'Zfbfmin' extension is
> 	enabled.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/zfbfmin_arithmetic.c: New test.
> 	* gcc.target/riscv/zfbfmin_call.c: New test.
> 	* gcc.target/riscv/zfbfmin_comparisons.c: New test.
> 	* gcc.target/riscv/zfbfmin_convert.c: New test.
> 	* gcc.target/riscv/zfbfmin_convert_run.c: New test.
> 	* gcc.target/riscv/zfbfmin_fsh_and_flh.c: New test.
So as with 1/2 in this series, it can't go into the trunk until the 
relevant spec reaches a frozen state.

> 
> +/* { dg-final { scan-assembler-times "fcvt.s.bf16" 14 } } */
> +/* { dg-final { scan-assembler-times "fcvt.bf16.s" 10 } } */
So I think these have the potential to run afoul of unexpected matching 
of LTO bits.  Joern has an approach to tackle this problem that was 
recently pushed into the tree:

> https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631485.html

The gist is wrap the assembly instruction inside a {\m \M} construct. 
So concretely

 > +/* { dg-final { scan-assembler-times {\mfcvt.s.bf16\M} 14 } } */
 > +/* { dg-final { scan-assembler-times {\mfcvt.bf16.s\M} 10 } } */

Similarly for the other new tests where you actually match an instruction.


Overall it looks pretty good.

Jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] RISC-V: Add support for _Bfloat16.
  2023-09-29 17:46 ` [RFC 1/2] RISC-V: Add support for _Bfloat16 Jeff Law
@ 2023-10-09  6:18   ` Jin Ma
  2023-10-09 19:16     ` Jeff Law
  0 siblings, 1 reply; 12+ messages in thread
From: Jin Ma @ 2023-10-09  6:18 UTC (permalink / raw)
  To: gcc-patches, Jeff Law
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib

> On 9/19/23 02:44, Jin Ma wrote:
> > gcc/ChangeLog:
> > 
> > 	* config/riscv/iterators.md (HFBF): New.
> > 	* config/riscv/riscv-builtins.cc (riscv_init_builtin_types):
> > 	Initialize data type_Bfloat16.
> > 	* config/riscv/riscv-modes.def (FLOAT_MODE): New.
> > 	(ADJUST_FLOAT_FORMAT): New.
> > 	* config/riscv/riscv.cc (riscv_mangle_type): Support for BFmode.
> > 	(riscv_scalar_mode_supported_p): Ditto.
> > 	(riscv_libgcc_floating_mode_supported_p): Ditto.
> > 	(riscv_block_arith_comp_libfuncs_for_mode): New.
> > 	(riscv_init_libfuncs): Opening and closing some libfuncs for BFmode.
> > 	* config/riscv/riscv.md (mode" ): Add BF.
> > 	(truncdfbf2): New.
> > 	(movhf): Support for BFmode.
> > 	(mov<mode>): Ditto.
> > 	(*mov<mode>_softfloat):  Ditto.
> > 	(fix_truncbf<GPR:mode>2): New.
> > 	(fixuns_truncbf<GPR:mode>2): New.
> > 	(float<mode>bf2): New.
> > 	(floatuns<mode>bf2): New.
> > 
> > libgcc/ChangeLog:
> > 
> > 	* config/riscv/sfp-machine.h (_FP_NANFRAC_B): New.
> > 	(_FP_NANSIGN_B): New.
> > 	* config/riscv/t-softfp32: Add support for BF libfuncs.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 	* gcc.target/riscv/bf16_arithmetic.c: New test.
> > 	* gcc.target/riscv/bf16_call.c: New test.
> > 	* gcc.target/riscv/bf16_comparisons.c: New test.
> > 	* gcc.target/riscv/bf16_convert-1.c: New test.
> > 	* gcc.target/riscv/bf16_convert-2.c: New test.
> > 	* gcc.target/riscv/bf16_convert_run.c: New test.
> So this can't go in the tree until the extension has moved into a frozen 
> state.  Hopefully that'll happen before we close stage1 development in Nov.

Ok, this is very reasonable.

> 
> 
> > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> > index e00b8ee3579..5048628c784 100644
> > --- a/gcc/config/riscv/riscv.md
> > +++ b/gcc/config/riscv/riscv.md
> > @@ -1631,6 +1631,20 @@ (define_insn "truncdfhf2"
> >     [(set_attr "type" "fcvt")
> >      (set_attr "mode" "HF")])
> >   
> > +;; The conversion of DF to BF needs to be done with SF if there is a
> > +;; chance to generate at least one instruction, otherwise just using
> > +;; libfunc __truncdfbf2.
> > +(define_expand "truncdfbf2"
> > +  [(set (match_operand:BF     0 "register_operand" "=f")
> > +       (float_truncate:BF
> > +           (match_operand:DF 1 "register_operand" " f")))]
> > +  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
> > +  {
> > +    convert_move (operands[0],
> > +		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
> > +    DONE;
> > +  })
> So for conversions to/from BFmode, doesn't generic code take care of 
> this for us?  Search for convert_mode_scalar in expr.cc. That code will 
> utilize SFmode as an intermediate step just like your expander.   Is 
> there some reason that generic code is insufficient?
>
> Similarly for the the other conversions.

As far as I can see, the function 'convert_mode_scalar' doesn't seem to be perfect for
dealing with the conversions to/from BFmode. It can only handle BF to HF, SF, DF and
SF to BF well, but the rest of the conversion without any processing, directly using
the libcall.

Maybe I should choose to enhance its functionality? This seems to be a
good choice, I'm not sure.

Jin

> 
> Otherwise it looks pretty good.
> 
> Jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] RISC-V: Add support for _Bfloat16.
  2023-10-09  6:18   ` Jin Ma
@ 2023-10-09 19:16     ` Jeff Law
  2023-10-25 10:15       ` Jin Ma
  0 siblings, 1 reply; 12+ messages in thread
From: Jeff Law @ 2023-10-09 19:16 UTC (permalink / raw)
  To: Jin Ma, gcc-patches
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib



On 10/9/23 00:18, Jin Ma wrote:

>>> +;; The conversion of DF to BF needs to be done with SF if there is a
>>> +;; chance to generate at least one instruction, otherwise just using
>>> +;; libfunc __truncdfbf2.
>>> +(define_expand "truncdfbf2"
>>> +  [(set (match_operand:BF     0 "register_operand" "=f")
>>> +       (float_truncate:BF
>>> +           (match_operand:DF 1 "register_operand" " f")))]
>>> +  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
>>> +  {
>>> +    convert_move (operands[0],
>>> +		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
>>> +    DONE;
>>> +  })
>> So for conversions to/from BFmode, doesn't generic code take care of
>> this for us?  Search for convert_mode_scalar in expr.cc. That code will
>> utilize SFmode as an intermediate step just like your expander.   Is
>> there some reason that generic code is insufficient?
>>
>> Similarly for the the other conversions.
> 
> As far as I can see, the function 'convert_mode_scalar' doesn't seem to be perfect for
> dealing with the conversions to/from BFmode. It can only handle BF to HF, SF, DF and
> SF to BF well, but the rest of the conversion without any processing, directly using
> the libcall.
> 
> Maybe I should choose to enhance its functionality? This seems to be a
> good choice, I'm not sure.My recollection was that BF could be converted to/from SF trivially and 
if we wanted BF->DF we'd first convert to SF, then to DF.

Direct BF<->DF conversions aren't actually important from a performance 
standpoint.  So it's OK if they have an extra step IMHO.

jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] RISC-V: Add support for _Bfloat16.
  2023-10-09 19:16     ` Jeff Law
@ 2023-10-25 10:15       ` Jin Ma
  2023-11-10 21:21         ` Jeff Law
  0 siblings, 1 reply; 12+ messages in thread
From: Jin Ma @ 2023-10-25 10:15 UTC (permalink / raw)
  To: gcc-patches, Jeff Law
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib

> >>> +;; The conversion of DF to BF needs to be done with SF if there is a
> >>> +;; chance to generate at least one instruction, otherwise just using
> >>> +;; libfunc __truncdfbf2.
> >>> +(define_expand "truncdfbf2"
> >>> +  [(set (match_operand:BF     0 "register_operand" "=f")
> >>> +       (float_truncate:BF
> >>> +           (match_operand:DF 1 "register_operand" " f")))]
> >>> +  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
> >>> +  {
> >>> +    convert_move (operands[0],
> >>> +		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
> >>> +    DONE;
> >>> +  })
> >> So for conversions to/from BFmode, doesn't generic code take care of
> >> this for us?  Search for convert_mode_scalar in expr.cc. That code will
> >> utilize SFmode as an intermediate step just like your expander.   Is
> >> there some reason that generic code is insufficient?
> >>
> >> Similarly for the the other conversions.
> > 
> > As far as I can see, the function 'convert_mode_scalar' doesn't seem to be perfect for
> > dealing with the conversions to/from BFmode. It can only handle BF to HF, SF, DF and
> > SF to BF well, but the rest of the conversion without any processing, directly using
> > the libcall.
> > 
> > Maybe I should choose to enhance its functionality? This seems to be a
> > good choice, I'm not sure.My recollection was that BF could be converted to/from SF trivially and 
> if we wanted BF->DF we'd first convert to SF, then to DF.
> 
> Direct BF<->DF conversions aren't actually important from a performance 
> standpoint.  So it's OK if they have an extra step IMHO.

Thank you very much for your review and detailed reply. Maybe there are some problems with my expression
and I am a little confused about your guidance. My understanding is that you also think that it is reasonable to
convert through SF, right? In fact, this is what I did.

In this patch, my thoughts are as follows:

The general principle is to use the real instructions instead of libcall as much as possible for conversions,
while minimizing the definition of libcall(only reusing which has been defined by other architectures such
as aarch64). If SF can be used as a transit, it is preferred to convert to SF, otherwise libcall is directly used.

1. For the conversions between floating points

For BF->DF, as you said, the function 'convert_mode_scalar' in the general code has been well implemented,
which will be expressed as BF->SF->DF. And the generated instruction list may be as follows:
  'call __extendbfsf2' + 'call __extendsfdf2' (when only soft floating point support);
  'call __extendbfsf2' + 'fcvt.d.s'           (when (TARGET_DOUBLE_FLOAT || TARGET_ZDINX) is true);
  'fcvt.s.bf16'        + 'fcvt.d.s'           (when ((TARGET_DOUBLE_FLOAT || TARGET_ZDINX) && TARGET_ZFBFMIN) is true)

For DF->BF, if any of fcvt.s.d and fcvt.bf16.s cannot be generated, the 'call __truncdfbf2' is directly generated
by the function 'convert_mode_scalar'. Otherwise the new pattern(define_expand "truncdfbf2") is used. This
makes it possible to implement DF->BF by 'fcvt.s.d' + 'fcvt.bf16.s', which cannot be generated by the function
'convert_mode_scala'.

2. For the conversions between integer and BF, it seems that gcc only uses libcall to implement it, but this is
obviously wrong. For example, the conversion BF->SI directly calls the unimplemented libcall __fixunsbfsi.
So I added some new pattern to handle these transformations with SF.

Thanks,

Jin

> 
> jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC 1/2] RISC-V: Add support for _Bfloat16.
  2023-10-25 10:15       ` Jin Ma
@ 2023-11-10 21:21         ` Jeff Law
  0 siblings, 0 replies; 12+ messages in thread
From: Jeff Law @ 2023-11-10 21:21 UTC (permalink / raw)
  To: Jin Ma, gcc-patches
  Cc: palmer, richard.sandiford, kito.cheng, philipp.tomsich,
	christoph.muellner, rdapp.gcc, juzhe.zhong, jinma.contrib



On 10/25/23 04:15, Jin Ma wrote:
>>>>> +;; The conversion of DF to BF needs to be done with SF if there is a
>>>>> +;; chance to generate at least one instruction, otherwise just using
>>>>> +;; libfunc __truncdfbf2.
>>>>> +(define_expand "truncdfbf2"
>>>>> +  [(set (match_operand:BF     0 "register_operand" "=f")
>>>>> +       (float_truncate:BF
>>>>> +           (match_operand:DF 1 "register_operand" " f")))]
>>>>> +  "TARGET_DOUBLE_FLOAT || TARGET_ZDINX"
>>>>> +  {
>>>>> +    convert_move (operands[0],
>>>>> +		  convert_modes (SFmode, DFmode, operands[1], 0), 0);
>>>>> +    DONE;
>>>>> +  })
>>>> So for conversions to/from BFmode, doesn't generic code take care of
>>>> this for us?  Search for convert_mode_scalar in expr.cc. That code will
>>>> utilize SFmode as an intermediate step just like your expander.   Is
>>>> there some reason that generic code is insufficient?
>>>>
>>>> Similarly for the the other conversions.
>>>
>>> As far as I can see, the function 'convert_mode_scalar' doesn't seem to be perfect for
>>> dealing with the conversions to/from BFmode. It can only handle BF to HF, SF, DF and
>>> SF to BF well, but the rest of the conversion without any processing, directly using
>>> the libcall.
>>>
>>> Maybe I should choose to enhance its functionality? This seems to be a
>>> good choice, I'm not sure.My recollection was that BF could be converted to/from SF trivially and
>> if we wanted BF->DF we'd first convert to SF, then to DF.
>>
>> Direct BF<->DF conversions aren't actually important from a performance
>> standpoint.  So it's OK if they have an extra step IMHO.
> 
> Thank you very much for your review and detailed reply. Maybe there are some problems with my expression
> and I am a little confused about your guidance. My understanding is that you also think that it is reasonable to
> convert through SF, right? In fact, this is what I did.
My point was that I would expect the generic code to handle the 
conversion and that we didn't need to handle it explicitly in the RISC-V 
backend.

Meaning that I don't think we need a define_expand for truncdfbf2, 
fix_truncbf<GPR:mode>2, fixuns_truncbf<GPR:mode>2, float<mode>bf2, or 
floatuns<mode>bf2.


> 
> In this patch, my thoughts are as follows:
> 
> The general principle is to use the real instructions instead of libcall as much as possible for conversions,
> while minimizing the definition of libcall(only reusing which has been defined by other architectures such
> as aarch64). If SF can be used as a transit, it is preferred to convert to SF, otherwise libcall is directly used.
> 
> 1. For the conversions between floating points
> 
> For BF->DF, as you said, the function 'convert_mode_scalar' in the general code has been well implemented,
> which will be expressed as BF->SF->DF. And the generated instruction list may be as follows:
>    'call __extendbfsf2' + 'call __extendsfdf2' (when only soft floating point support);
>    'call __extendbfsf2' + 'fcvt.d.s'           (when (TARGET_DOUBLE_FLOAT || TARGET_ZDINX) is true);
>    'fcvt.s.bf16'        + 'fcvt.d.s'           (when ((TARGET_DOUBLE_FLOAT || TARGET_ZDINX) && TARGET_ZFBFMIN) is true)
> 
> For DF->BF, if any of fcvt.s.d and fcvt.bf16.s cannot be generated, the 'call __truncdfbf2' is directly generated
> by the function 'convert_mode_scalar'. Otherwise the new pattern(define_expand "truncdfbf2") is used. This
> makes it possible to implement DF->BF by 'fcvt.s.d' + 'fcvt.bf16.s', which cannot be generated by the function
> 'convert_mode_scala'.
But I would have expected convert_mode_scalar to generate DF->BF by 
first truncating to SF, then to BF.   If that is missing for truncation, 
then we should add it to convert_mode_scalar rather than expressing it 
as a backend expander.





> 
> 2. For the conversions between integer and BF, it seems that gcc only uses libcall to implement it, but this is
> obviously wrong. For example, the conversion BF->SI directly calls the unimplemented libcall __fixunsbfsi.
> So I added some new pattern to handle these transformations with SF.
I would suggest these move into target independent code as well. 
There's no reason I'm aware of that they should be implemented entirely 
in a target machine description.  We're not really doing anything target 
specific in here.

jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16
  2023-09-19  8:44 [RFC 1/2] RISC-V: Add support for _Bfloat16 Jin Ma
  2023-09-19  8:46 ` [RFC 2/2] RISC-V: Add 'Zfbfmin' extension Jin Ma
  2023-09-29 17:46 ` [RFC 1/2] RISC-V: Add support for _Bfloat16 Jeff Law
@ 2023-12-20 11:17 ` Jin Ma
  2024-01-09  9:32   ` Jin Ma
  2024-05-26 14:53   ` [PATCH] " Jeff Law
  2 siblings, 2 replies; 12+ messages in thread
From: Jin Ma @ 2023-12-20 11:17 UTC (permalink / raw)
  To: gcc-patches, jeffreyalaw
  Cc: richard.sandiford, kito.cheng, christoph.muellner, jinma.contrib, Jin Ma

We don't have SI -> BF library functions, use SI -> SF -> BF
instead. Although this can also be implemented in a target
machine description, it is more appropriate to move
into target independent code.

gcc/ChangeLog:

	* optabs.cc (expand_float): Split SI -> BF into SI -> SF -> BF.
---
 gcc/optabs.cc | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 6a34276c239..c58a0321bbd 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5727,6 +5727,19 @@ expand_float (rtx to, rtx from, int unsignedp)
       if (is_narrower_int_mode (GET_MODE (from), SImode))
 	from = convert_to_mode (SImode, from, unsignedp);
 
+#ifdef HAVE_SFmode
+      if (REAL_MODE_FORMAT (GET_MODE (to)) == &arm_bfloat_half_format
+	  && REAL_MODE_FORMAT (SFmode) == &ieee_single_format
+	  && GET_MODE (from) == SImode)
+	/* We don't have SI -> BF library functions, use SI -> SF -> BF
+	   instead.  */
+	{
+	  target = gen_reg_rtx (SFmode);
+	  expand_float (target, from, unsignedp);
+	  goto done;
+	}
+#endif
+
       libfunc = convert_optab_libfunc (tab, GET_MODE (to), GET_MODE (from));
       gcc_assert (libfunc);
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re:[PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16
  2023-12-20 11:17 ` [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16 Jin Ma
@ 2024-01-09  9:32   ` Jin Ma
  2024-01-09  9:43     ` Jin Ma
  2024-05-26 14:53   ` [PATCH] " Jeff Law
  1 sibling, 1 reply; 12+ messages in thread
From: Jin Ma @ 2024-01-09  9:32 UTC (permalink / raw)
  To: Jin Ma, gcc-patches, jeffreyalaw
  Cc: richard.sandiford, kito.cheng, christoph.muellner, jinma.contrib

ping

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re:[PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16
  2024-01-09  9:32   ` Jin Ma
@ 2024-01-09  9:43     ` Jin Ma
  0 siblings, 0 replies; 12+ messages in thread
From: Jin Ma @ 2024-01-09  9:43 UTC (permalink / raw)
  To: Jin Ma, gcc-patches, jeffreyalaw
  Cc: richard.sandiford, kito.cheng, christoph.muellner, jinma.contrib

I apologize for not attaching a reference link.

Ref:
https://patchwork.ozlabs.org/project/gcc/patch/20230919084444.2089-1-jinma@linux.alibaba.com/
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641119.html

BR
Jin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16
  2023-12-20 11:17 ` [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16 Jin Ma
  2024-01-09  9:32   ` Jin Ma
@ 2024-05-26 14:53   ` Jeff Law
  1 sibling, 0 replies; 12+ messages in thread
From: Jeff Law @ 2024-05-26 14:53 UTC (permalink / raw)
  To: Jin Ma, gcc-patches
  Cc: richard.sandiford, kito.cheng, christoph.muellner, jinma.contrib



On 12/20/23 4:17 AM, Jin Ma wrote:
> We don't have SI -> BF library functions, use SI -> SF -> BF
> instead. Although this can also be implemented in a target
> machine description, it is more appropriate to move
> into target independent code.
> 
> gcc/ChangeLog:
> 
> 	* optabs.cc (expand_float): Split SI -> BF into SI -> SF -> BF.
> ---
>   gcc/optabs.cc | 13 +++++++++++++
>   1 file changed, 13 insertions(+)
> 
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 6a34276c239..c58a0321bbd 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -5727,6 +5727,19 @@ expand_float (rtx to, rtx from, int unsignedp)
>         if (is_narrower_int_mode (GET_MODE (from), SImode))
>   	from = convert_to_mode (SImode, from, unsignedp);
>   
> +#ifdef HAVE_SFmode
> +      if (REAL_MODE_FORMAT (GET_MODE (to)) == &arm_bfloat_half_format
> +	  && REAL_MODE_FORMAT (SFmode) == &ieee_single_format
> +	  && GET_MODE (from) == SImode)
> +	/* We don't have SI -> BF library functions, use SI -> SF -> BF
> +	   instead.  */
> +	{
> +	  target = gen_reg_rtx (SFmode);
> +	  expand_float (target, from, unsignedp);
> +	  goto done;
> +	}
> +#endif
Why do you have the #ifdef HAVE_SFmode?  That seems odd, I think the 
only place we do anything like that is in targhooks.  Why did you add 
those cpp conditionals?

Bring the comment "We don't have SI -> BF ..." inside the open curly and 
indent it two more spaces.  That should be more consistent with GCC style.

So generally OK.  I suspect this can move forward once we figure out why 
you added those cpp conditionals and fix the formatting nit.

jeff

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-05-26 14:53 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-19  8:44 [RFC 1/2] RISC-V: Add support for _Bfloat16 Jin Ma
2023-09-19  8:46 ` [RFC 2/2] RISC-V: Add 'Zfbfmin' extension Jin Ma
2023-09-29 17:52   ` Jeff Law
2023-09-29 17:46 ` [RFC 1/2] RISC-V: Add support for _Bfloat16 Jeff Law
2023-10-09  6:18   ` Jin Ma
2023-10-09 19:16     ` Jeff Law
2023-10-25 10:15       ` Jin Ma
2023-11-10 21:21         ` Jeff Law
2023-12-20 11:17 ` [PATCH] Support libcall __float{,un}sibf by SF when it is not supported for _bf16 Jin Ma
2024-01-09  9:32   ` Jin Ma
2024-01-09  9:43     ` Jin Ma
2024-05-26 14:53   ` [PATCH] " Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).