[PATCH] rs6000: Add builtins for IEEE 128-bit floating point values

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
@ 2023-05-02 15:52 Carl Love
  2023-06-05  8:45 ` Kewen.Lin
  0 siblings, 1 reply; 9+ messages in thread
From: Carl Love @ 2023-05-02 15:52 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches; +Cc: cel, Peter Bergner

GCC maintainers:

The following patch adds three buitins for inserting and extracting the
exponent and significand for an IEEE 128-bit floating point values. 
The builtins are valid for Power 9 and Power 10.  

The patch has been tested on both Power 9 and Power 10.

Please let me know if this patch is acceptable for mainline.  Thanks.

            Carl 


----------------------
From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17 00:00:00 2001
From: Carl Love <cel@us.ibm.com>
Date: Wed, 12 Apr 2023 17:46:37 -0400
Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values

Add support for the following builtins:

 __vector unsigned long long int __builtin_extractf128_exp (__ieee128);
 __vector unsigned __int128 __builtin_extractf128_sig (__ieee128);
 __ieee128 __builtin_insertf128_exp (__vector unsigned __int128,
			             __vector unsigned long long);

gcc/
	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
	builtin definitions.
	* config/rs6000.md (extractf128_exp_<mode>, insertf128_exp_<mode>,
	extractf128_sig_<mode>): Add define_expand for new builtins.
	(xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>, siexpqpf_f128_<mode>):
	Add define_insn for new builtins.
	* doc/extend.texi (__builtin_extractf128_exp, __builtin_extractf128_sig,
	__builtin_insertf128_exp): Add documentation for new builtins.

gcc/testsuite/
	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
---
 gcc/config/rs6000/rs6000-builtins.def         |  9 +++
 gcc/config/rs6000/vsx.md                      | 66 ++++++++++++++++++-
 gcc/doc/extend.texi                           | 28 ++++++++
 .../powerpc/bfp/extract-exp-ieee128.c         | 49 ++++++++++++++
 .../powerpc/bfp/extract-sig-ieee128.c         | 56 ++++++++++++++++
 .../powerpc/bfp/insert-exp-ieee128.c          | 58 ++++++++++++++++
 6 files changed, 265 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c

diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 638d0bc72ca..3247a7f7673 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2876,6 +2876,15 @@
   pure vsc __builtin_vsx_xl_len_r (void *, signed long);
     XL_LEN_R xl_len_r {}
 
+  vull __builtin_extractf128_exp (_Float128);
+    EEXPKF extractf128_exp_kf {}
+
+  vuq __builtin_extractf128_sig (_Float128);
+    ESIGKF extractf128_sig_kf {}
+
+  _Float128 __builtin_insertf128_exp (vuq, vull);
+    IEXPKF_VULL insertf128_exp_kf {}
+
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..2a9f875ba57 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -369,7 +369,10 @@
    UNSPEC_XXSPLTI32DX
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
-  ])
+   UNSPEC_EXTRACTEXPIEEE
+   UNSPEC_EXTRACTSIGIEEE
+   UNSPEC_INSERTEXPIEEE
+])
 
 (define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
 				 UNSPEC_VSX_XVCVBF16SPN])
@@ -4155,6 +4158,38 @@
  "vins<wd>rx %0,%1,%2"
  [(set_attr "type" "vecsimple")])
 
+(define_expand "extractf128_exp_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand")
+  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
+		  UNSPEC_EXTRACTEXPIEEE))]
+"TARGET_P9_VECTOR"
+{
+  emit_insn (gen_xsxexpqp_f128_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "insertf128_exp_<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand")
+  (unspec:IEEE128 [(match_operand:V1TI 1 "altivec_register_operand")
+		   (match_operand:V2DI 2 "altivec_register_operand")]
+		  UNSPEC_INSERTEXPIEEE))]
+"TARGET_P9_VECTOR"
+{
+  emit_insn (gen_xsiexpqpf_f128_<mode> (operands[0], operands[1],
+				        operands[2]));
+  DONE;
+})
+
+(define_expand "extractf128_sig_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand")
+  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
+		  UNSPEC_EXTRACTSIGIEEE))]
+"TARGET_P9_VECTOR"
+{
+  emit_insn (gen_xsxsigqp_f128_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 (define_expand "vreplace_elt_<mode>"
   [(set (match_operand:REPLACE_ELT 0 "register_operand")
   (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
@@ -5016,6 +5051,15 @@
   "xsxexpqp %0,%1"
   [(set_attr "type" "vecmove")])
 
+;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating point format
+(define_insn "xsxexpqp_f128_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+	(unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SXEXPDP))]
+  "TARGET_P9_VECTOR"
+  "xsxexpqp %0,%1"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Extract Exponent Double-Precision
 (define_insn "xsxexpdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5034,6 +5078,15 @@
   "xsxsigqp %0,%1"
   [(set_attr "type" "vecmove")])
 
+;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
+(define_insn "xsxsigqp_f128_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+	(unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SXSIG))]
+  "TARGET_P9_VECTOR"
+  "xsxsigqp %0,%1"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Extract Significand Double-Precision
 (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5054,6 +5107,17 @@
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
+;; VSX Insert Exponent IEEE 128-bit Floating point format
+(define_insn "xsiexpqpf_f128_<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
+	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SIEXPQP))]
+  "TARGET_P9_VECTOR"
+  "xsiexpqp %0,%1,%2"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Insert Exponent Quad-Precision
 (define_insn "xsiexpqp_<mode>"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e426a2eb7d8..456e5a03d69 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18511,6 +18511,34 @@ the FPSCR.  The instruction is a lower latency version of the @code{mffs}
 instruction.  If the @code{mffsl} instruction is not available, then the
 builtin uses the older @code{mffs} instruction to read the FPSCR.
 
+@smallexample
+@exdent vector unsigned long long int
+@exdent __builtin_extractf128_exp (__ieee128);
+@end smallexample
+
+The exponent from the IEEE 128-bit floating point value is returned in the
+lower bits of vector element 1 on Little Endian systems.
+
+@smallexample
+@exdent vector unsigned __int128
+@exdent __builtin_extractf128_sig (__ieee128);
+@end smallexample
+
+The significand from the IEEE 128-bit floating point value is returned in the
+vector element 0 bits [111:0]. Bit 112 is set to 0 if the input value was zero
+or Denormal, 1 otherwise.  Bits [127:113] are set to zero.
+
+@smallexample
+@exdent _ieee128
+@exdent __builtin_insertf128_exp (vector unsigned __int128,
+vector unsigned long long int);
+@end smallexample
+
+The first argument contains the significand in bits [111:0] for the IEEE
+128-bi floating point tresult and the sign bit [127] for the result.  The
+second argument contains the exponenet bits for the result in bits [14:0] of
+vector element 1.
+
 @node Basic PowerPC Built-in Functions Available on ISA 3.1
 @subsubsection Basic PowerPC Built-in Functions Available on ISA 3.1
 
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
new file mode 100644
index 00000000000..47e2c43962f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
@@ -0,0 +1,49 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned long long int
+get_exponents (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_extractf128_exp (source);
+}
+
+int
+main ()
+{
+  vector unsigned long long int result, exp_result;
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __int128  val_int128;
+  } source;
+  
+  exp_result[0] = 0x0ULL;
+  exp_result[1] = 0x1234ULL;
+  source.val_int128 = 0x923456789ABCDEF0ULL;
+  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
+
+  result = get_exponents (&source.val_ieee128);
+
+  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result[0], exp_result[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result[1], exp_result[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
new file mode 100644
index 00000000000..4bd50ca2281
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
@@ -0,0 +1,56 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned __int128
+get_significand (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_extractf128_sig (source);
+}
+
+int
+main ()
+{
+  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
+
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     unsigned long long int val_ull[2];
+     unsigned __int128  val_uint128;
+     __vector unsigned __int128  val_vuint128;
+  } source, result, exp_result;
+  
+  /* Result is not zero or denormal.  */
+  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
+  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
+  source.val_uint128 = 0x923456789ABCDEF0ULL;
+  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
+
+  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
+     Denormal, 1 otherwise.  */
+  result.val_vuint128 = get_significand (&source.val_ieee128);
+
+  if ((result.val_ull[0] != exp_result.val_ull[0])
+      || (result.val_ull[1] != exp_result.val_ull[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result.val_ull[0], exp_result.val_ull[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result.val_ull[1], exp_result.val_ull[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
new file mode 100644
index 00000000000..5bd33ed5b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+__ieee128
+insert_exponent (__vector unsigned __int128 *significand_p,
+		 __vector unsigned long long int *exponent_p)
+{
+  __vector unsigned __int128 significand = *significand_p;
+  __vector unsigned long long int exponent = *exponent_p;
+
+  return __builtin_insertf128_exp (significand, exponent);
+}
+
+
+int
+main ()
+{
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __vector unsigned __int128 val_vint128;
+     __vector unsigned long long int  val_vull;
+  } result, exp_result, significand;
+
+  __vector unsigned long long int exponent;
+
+  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
+  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
+
+  exponent[0] = 0x5678;
+  exponent[1] = 0x1234;
+
+  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
+  exp_result.val_vull[1] = 0x123412345678ABCDULL;
+
+  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
+  
+  if (result.val_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+    {
+      printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0] = 0x%llx\n",
+	     result.val_vull[0], exp_result.val_vull[0]);
+      printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1] = 0x%llx\n",
+	     result.val_vull[1], exp_result.val_vull[1]);
+    }
+#else
+    abort ();
+#endif
+
+}
-- 
2.37.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-05-02 15:52 [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values Carl Love
@ 2023-06-05  8:45 ` Kewen.Lin
  2023-06-06 19:54   ` [PATCH ver 2] " Carl Love
  2023-06-06 19:54   ` [PATCH] " Carl Love
  0 siblings, 2 replies; 9+ messages in thread
From: Kewen.Lin @ 2023-06-05  8:45 UTC (permalink / raw)
  To: Carl Love; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches

Hi Carl,

on 2023/5/2 23:52, Carl Love via Gcc-patches wrote:
> GCC maintainers:
> 
> The following patch adds three buitins for inserting and extracting the
> exponent and significand for an IEEE 128-bit floating point values. 
> The builtins are valid for Power 9 and Power 10.  

We already have:

unsigned long long int scalar_extract_exp (__ieee128 source);
unsigned __int128 scalar_extract_sig (__ieee128 source);
ieee_128 scalar_insert_exp (unsigned __int128 significand,
                            unsigned long long int exponent);
ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent);

you need to say something about the requirements or the justification for
adding more, for this patch itself, some comments are inline below, thanks!

> 
> The patch has been tested on both Power 9 and Power 10.
> 
> Please let me know if this patch is acceptable for mainline.  Thanks.
> 
>             Carl 
> 
> 
> ----------------------
> From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17 00:00:00 2001
> From: Carl Love <cel@us.ibm.com>
> Date: Wed, 12 Apr 2023 17:46:37 -0400
> Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
> 
> Add support for the following builtins:
> 
>  __vector unsigned long long int __builtin_extractf128_exp (__ieee128);

Could you make the name similar to the existing one?  The existing one
  
  unsigned long long int scalar_extract_exp (__ieee128 source);

has nothing like f128 on its name, this variant is just to change the
return type to vector type, how about scalar_extract_exp_to_vec?

>  __vector unsigned __int128 __builtin_extractf128_sig (__ieee128);

Ditto.

>  __ieee128 __builtin_insertf128_exp (__vector unsigned __int128,
> 			             __vector unsigned long long);

This one can just overload the existing scalar_insert_exp?

> 
> gcc/
> 	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
> 	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
> 	builtin definitions.
> 	* config/rs6000.md (extractf128_exp_<mode>, insertf128_exp_<mode>,
> 	extractf128_sig_<mode>): Add define_expand for new builtins.
> 	(xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>, siexpqpf_f128_<mode>):
> 	Add define_insn for new builtins.
> 	* doc/extend.texi (__builtin_extractf128_exp, __builtin_extractf128_sig,
> 	__builtin_insertf128_exp): Add documentation for new builtins.
> 
> gcc/testsuite/
> 	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
> 	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
> 	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
> ---
>  gcc/config/rs6000/rs6000-builtins.def         |  9 +++
>  gcc/config/rs6000/vsx.md                      | 66 ++++++++++++++++++-
>  gcc/doc/extend.texi                           | 28 ++++++++
>  .../powerpc/bfp/extract-exp-ieee128.c         | 49 ++++++++++++++
>  .../powerpc/bfp/extract-sig-ieee128.c         | 56 ++++++++++++++++
>  .../powerpc/bfp/insert-exp-ieee128.c          | 58 ++++++++++++++++
>  6 files changed, 265 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
> index 638d0bc72ca..3247a7f7673 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2876,6 +2876,15 @@
>    pure vsc __builtin_vsx_xl_len_r (void *, signed long);
>      XL_LEN_R xl_len_r {}
>  
> +  vull __builtin_extractf128_exp (_Float128);
> +    EEXPKF extractf128_exp_kf {}
> +
> +  vuq __builtin_extractf128_sig (_Float128);
> +    ESIGKF extractf128_sig_kf {}
> +
> +  _Float128 __builtin_insertf128_exp (vuq, vull);
> +    IEXPKF_VULL insertf128_exp_kf {}
> +

Put them to be near its related ones like __builtin_vsx_scalar_extract_expq
etc.

>  
>  ; Builtins requiring hardware support for IEEE-128 floating-point.
>  [ieee128-hw]
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 7d845df5c2d..2a9f875ba57 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -369,7 +369,10 @@
>     UNSPEC_XXSPLTI32DX
>     UNSPEC_XXBLEND
>     UNSPEC_XXPERMX
> -  ])
> +   UNSPEC_EXTRACTEXPIEEE
> +   UNSPEC_EXTRACTSIGIEEE
> +   UNSPEC_INSERTEXPIEEE

These are not necessary, just use the existing UNSPEC_VSX_SXEXPDP etc.

> +])
>  
>  (define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
>  				 UNSPEC_VSX_XVCVBF16SPN])
> @@ -4155,6 +4158,38 @@
>   "vins<wd>rx %0,%1,%2"
>   [(set_attr "type" "vecsimple")])
>  
> +(define_expand "extractf128_exp_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
> +		  UNSPEC_EXTRACTEXPIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsxexpqp_f128_<mode> (operands[0], operands[1]));
> +  DONE;
> +})
> +
> +(define_expand "insertf128_exp_<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:V1TI 1 "altivec_register_operand")
> +		   (match_operand:V2DI 2 "altivec_register_operand")]
> +		  UNSPEC_INSERTEXPIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsiexpqpf_f128_<mode> (operands[0], operands[1],
> +				        operands[2]));
> +  DONE;
> +})
> +
> +(define_expand "extractf128_sig_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> +  (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand")]
> +		  UNSPEC_EXTRACTSIGIEEE))]
> +"TARGET_P9_VECTOR"
> +{
> +  emit_insn (gen_xsxsigqp_f128_<mode> (operands[0], operands[1]));
> +  DONE;
> +})

These define_expands are not necessary, the called gen functions from the
define_insns can be directly used as bif pattern in rs6000-builtins.def.

> +
>  (define_expand "vreplace_elt_<mode>"
>    [(set (match_operand:REPLACE_ELT 0 "register_operand")
>    (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand")
> @@ -5016,6 +5051,15 @@
>    "xsxexpqp %0,%1"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating point format
> +(define_insn "xsxexpqp_f128_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +	(unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +	 UNSPEC_VSX_SXEXPDP))]
> +  "TARGET_P9_VECTOR"
> +  "xsxexpqp %0,%1"
> +  [(set_attr "type" "vecmove")])

I think you can just merge this into the existing xsxexpqp_<mode> by extending
with one mode like xsxexpqp_<IEEE128:mode>_<xxx:mode> for unspec:xxx.

xxx can be one mode iterator for DI and V2DI.

> +
>  ;; VSX Scalar Extract Exponent Double-Precision
>  (define_insn "xsxexpdp"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> @@ -5034,6 +5078,15 @@
>    "xsxsigqp %0,%1"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
> +(define_insn "xsxsigqp_f128_<mode>"
> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> +	(unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +	 UNSPEC_VSX_SXSIG))]
> +  "TARGET_P9_VECTOR"
> +  "xsxsigqp %0,%1"
> +  [(set_attr "type" "vecmove")])

Ditto, the mode V2DI looks unexpected, V1TI instead?

> +
>  ;; VSX Scalar Extract Significand Double-Precision
>  (define_insn "xsxsigdp"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> @@ -5054,6 +5107,17 @@
>    "xsiexpqp %0,%1,%2"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Insert Exponent IEEE 128-bit Floating point format
> +(define_insn "xsiexpqpf_f128_<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> +	(unspec:IEEE128
> +	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
> +	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
> +	 UNSPEC_VSX_SIEXPQP))]
> +  "TARGET_P9_VECTOR"
> +  "xsiexpqp %0,%1,%2"
> +  [(set_attr "type" "vecmove")]> +
>  ;; VSX Scalar Insert Exponent Quad-Precision
>  (define_insn "xsiexpqp_<mode>"
>    [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e426a2eb7d8..456e5a03d69 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -18511,6 +18511,34 @@ the FPSCR.  The instruction is a lower latency version of the @code{mffs}
>  instruction.  If the @code{mffsl} instruction is not available, then the
>  builtin uses the older @code{mffs} instruction to read the FPSCR.
>  
> +@smallexample
> +@exdent vector unsigned long long int
> +@exdent __builtin_extractf128_exp (__ieee128);
> +@end smallexample
> +
> +The exponent from the IEEE 128-bit floating point value is returned in the
> +lower bits of vector element 1 on Little Endian systems.
> +
> +@smallexample
> +@exdent vector unsigned __int128
> +@exdent __builtin_extractf128_sig (__ieee128);
> +@end smallexample
> +
> +The significand from the IEEE 128-bit floating point value is returned in the
> +vector element 0 bits [111:0]. Bit 112 is set to 0 if the input value was zero
> +or Denormal, 1 otherwise.  Bits [127:113] are set to zero.
> +
> +@smallexample
> +@exdent _ieee128
> +@exdent __builtin_insertf128_exp (vector unsigned __int128,
> +vector unsigned long long int);
> +@end smallexample
> +
> +The first argument contains the significand in bits [111:0] for the IEEE
> +128-bi floating point tresult and the sign bit [127] for the result.  The
> +second argument contains the exponenet bits for the result in bits [14:0] of
> +vector element 1.

Just put the new prototypes after the existing ones and extend the current related
documentation if needed.

> +
>  @node Basic PowerPC Built-in Functions Available on ISA 3.1
>  @subsubsection Basic PowerPC Built-in Functions Available on ISA 3.1
>  
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> new file mode 100644
> index 00000000000..47e2c43962f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */> +/* { dg-require-effective-target power10_ok } */

It needs _hw for run and it doesn't require power10, so p9vector_hw.

> +/* { dg-options "-mdejagnu-cpu=power9" } */

You can refer to the conditions for the existing test cases on ieee128
extract_{exp,sig}, insert_..., the underlying insns of these newly
introduced are the same, the condition should be the same.

BR,
Kewen

> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned long long int
> +get_exponents (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_extractf128_exp (source);
> +}
> +
> +int
> +main ()
> +{
> +  vector unsigned long long int result, exp_result;
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __int128  val_int128;
> +  } source;
> +  
> +  exp_result[0] = 0x0ULL;
> +  exp_result[1] = 0x1234ULL;
> +  source.val_int128 = 0x923456789ABCDEF0ULL;
> +  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
> +
> +  result = get_exponents (&source.val_ieee128);
> +
> +  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +	     result[0], exp_result[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +	     result[1], exp_result[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> new file mode 100644
> index 00000000000..4bd50ca2281
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> @@ -0,0 +1,56 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned __int128
> +get_significand (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_extractf128_sig (source);
> +}
> +
> +int
> +main ()
> +{
> +  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
> +
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     unsigned long long int val_ull[2];
> +     unsigned __int128  val_uint128;
> +     __vector unsigned __int128  val_vuint128;
> +  } source, result, exp_result;
> +  
> +  /* Result is not zero or denormal.  */
> +  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
> +  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
> +  source.val_uint128 = 0x923456789ABCDEF0ULL;
> +  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
> +
> +  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
> +     Denormal, 1 otherwise.  */
> +  result.val_vuint128 = get_significand (&source.val_ieee128);
> +
> +  if ((result.val_ull[0] != exp_result.val_ull[0])
> +      || (result.val_ull[1] != exp_result.val_ull[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +	     result.val_ull[0], exp_result.val_ull[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +	     result.val_ull[1], exp_result.val_ull[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> new file mode 100644
> index 00000000000..5bd33ed5b7d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> @@ -0,0 +1,58 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +__ieee128
> +insert_exponent (__vector unsigned __int128 *significand_p,
> +		 __vector unsigned long long int *exponent_p)
> +{
> +  __vector unsigned __int128 significand = *significand_p;
> +  __vector unsigned long long int exponent = *exponent_p;
> +
> +  return __builtin_insertf128_exp (significand, exponent);
> +}
> +
> +
> +int
> +main ()
> +{
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __vector unsigned __int128 val_vint128;
> +     __vector unsigned long long int  val_vull;
> +  } result, exp_result, significand;
> +
> +  __vector unsigned long long int exponent;
> +
> +  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
> +
> +  exponent[0] = 0x5678;
> +  exponent[1] = 0x1234;
> +
> +  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  exp_result.val_vull[1] = 0x123412345678ABCDULL;
> +
> +  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
> +  
> +  if (result.val_ieee128 != exp_result.val_ieee128)
> +#ifdef DEBUG
> +    {
> +      printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0] = 0x%llx\n",
> +	     result.val_vull[0], exp_result.val_vull[0]);
> +      printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1] = 0x%llx\n",
> +	     result.val_vull[1], exp_result.val_vull[1]);
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH ver 2] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-05  8:45 ` Kewen.Lin
@ 2023-06-06 19:54   ` Carl Love
  2023-06-06 19:54   ` [PATCH] " Carl Love
  1 sibling, 0 replies; 9+ messages in thread
From: Carl Love @ 2023-06-06 19:54 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches, cel

Kewen, GCC maintainers:

Version 2, I have addressed the various comments from Kewen.  I had
issues with adding an additional overloaded version of
scalar_insert_exp with vector arguments.  The overload infrastructure
didn't work with a mix of scalar and vector arguments.  I did rename
the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp make
it similar to the existing builtin.  I also wasn't able to get the
suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to work so
I left the two simpler definitiions.

The patch add three new builtins to extract the significand and
exponent of an IEEE float 128-bit value where the builtin argument is a
vector.  Additionally, a builtin to insert the exponent into an IEEE
float 128-bit vector argument is added.  These builtins were requested
since there is no clean and optimal way to transfer between a vector
and a scalar IEEE 128 bit value.

The patch has been tested on Power 10 with no regressions.  Please let
me know if the patch is acceptable or not.  Thanks.

               Carl

---------------------------------------
rs6000: Add builtins for IEEE 128-bit floating point values

Add support for the following builtins:

 __vector unsigned long long int
     __builtin_scalar_extract_exp_to_vec (__ieee128);
 __vector unsigned __int128
     __builtin_scalar_extract_sig_to_vec (__ieee128);
 __ieee128 __builtin_vsx_scalar_insert_exp_vqp (__vector unsigned __int128,
 			                     __vector unsigned long long);

These builtins were requesed since there is no clean and performant way to
transfer between a vector type and the ieee128 scalar, despite the fact
that both reside in vector registers. Also a union transfer does not work
correctly on most GCC versions.

gcc/
	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
	builtin definitions.
	* config/rs6000.md (extractf128_exp_<mode>, insertf128_exp_<mode>,
	extractf128_sig_<mode>): Add define_expand for new builtins.
	(xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>, siexpqpf_f128_<mode>):
	Add define_insn for new builtins.
	* doc/extend.texi (__builtin_extractf128_exp, __builtin_extractf128_sig,
	__builtin_insertf128_exp): Add documentation for new builtins.

gcc/testsuite/
	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
---
 gcc/config/rs6000/rs6000-builtins.def         |  9 +++
 gcc/config/rs6000/rs6000-overload.def         |  2 +
 gcc/config/rs6000/vsx.md                      | 31 +++++++++-
 gcc/doc/extend.texi                           | 10 ++++
 .../powerpc/bfp/extract-exp-ieee128.c         | 50 ++++++++++++++++
 .../powerpc/bfp/extract-sig-ieee128.c         | 57 ++++++++++++++++++
 .../powerpc/bfp/insert-exp-ieee128.c          | 58 +++++++++++++++++++
 7 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c

diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 638d0bc72ca..92f22481687 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2901,6 +2901,12 @@
   fpmath double __builtin_truncf128_round_to_odd (_Float128);
     TRUNCF128_ODD trunckfdf2_odd {}
 
+  vull __builtin_scalar_extract_exp_to_vec (_Float128);
+    EEXPKF xsxexpqp_f128_kf {}
+
+  vuq __builtin_scalar_extract_sig_to_vec (_Float128);
+    ESIGKF xsxsigqp_f128_kf {}
+
   const signed long long __builtin_vsx_scalar_extract_expq (_Float128);
     VSEEQP xsxexpqp_kf {}
 
@@ -2915,6 +2921,9 @@
                                                       unsigned long long);
     VSIEQPF xsiexpqpf_kf {}
 
+  const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
+    VSIEDP_VULL xsiexpqpf_f128_kf {}
+
   const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
                                                             const int<7>);
     VSTDCQP xststdcqp_kf {}
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..102ead9f80b 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4515,6 +4515,8 @@
     VSIEQP
   _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long);
     VSIEQPF
+  _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
+    VSIEDP_VULL
 
 [VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class]
   unsigned int __builtin_vec_scalar_test_data_class (float, const int);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..0f6df4bbcf5 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -369,7 +369,7 @@
    UNSPEC_XXSPLTI32DX
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
-  ])
+])
 
 (define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
 				 UNSPEC_VSX_XVCVBF16SPN])
@@ -5016,6 +5016,15 @@
   "xsxexpqp %0,%1"
   [(set_attr "type" "vecmove")])
 
+;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating point format
+(define_insn "xsxexpqp_f128_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+	(unspec:V2DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SXEXPDP))]
+  "TARGET_P9_VECTOR"
+  "xsxexpqp %0,%1"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Extract Exponent Double-Precision
 (define_insn "xsxexpdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5034,6 +5043,15 @@
   "xsxsigqp %0,%1"
   [(set_attr "type" "vecmove")])
 
+;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
+(define_insn "xsxsigqp_f128_<mode>"
+  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(unspec:V1TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SXSIG))]
+  "TARGET_P9_VECTOR"
+  "xsxsigqp %0,%1"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Extract Significand Double-Precision
 (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5054,6 +5072,17 @@
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
+;; VSX Insert Exponent IEEE 128-bit Floating point format
+(define_insn "xsiexpqpf_f128_<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
+	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SIEXPQP))]
+  "TARGET_P9_VECTOR"
+  "xsiexpqp %0,%1,%2"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Insert Exponent Quad-Precision
 (define_insn "xsiexpqp_<mode>"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e426a2eb7d8..1d3372d322d 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19724,6 +19724,10 @@ double scalar_insert_exp (double significand, unsigned long long int exponent);
 ieee_128 scalar_insert_exp (unsigned __int128 significand,
                             unsigned long long int exponent);
 ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent);
+vector unsigned long long __builtin_scalar_extract_exp_to_vec (ieee_128);
+vector unsigned __int128  __builtin_scalar_extract_sig_to_vec (ieee_128);
+vector ieee_128 __builtin_scalar_insert_exp_vqp (vector unsigned __int128,
+                                                 vector unsigned long long);
 
 int scalar_cmp_exp_gt (double arg1, double arg2);
 int scalar_cmp_exp_lt (double arg1, double arg2);
@@ -19777,6 +19781,12 @@ The significand and exponent components of the result are composed of
 the least significant 15 bits of the @code{exponent} argument and the
 least significant 112 bits of the @code{significand} argument respectively.
 
+The @code{__builtin_scalar_extract_exp_to_vec},
+@code{__builtin_scalar_extract_sig_to_vec} and
+@code{__builtin_scalar_insert_exp_vqp} are similar to
+@code{scalar_extract_exp}, @code{scalar_extract_sig} and
+@code{scalar_insert_exp} except they operate on vector arguments.
+
 The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
 @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in
 functions return a non-zero value if @code{arg1} is greater than, less
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
new file mode 100644
index 00000000000..39981f0a274
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned long long int
+get_exponents (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_scalar_extract_exp_to_vec (source);
+}
+
+int
+main ()
+{
+  vector unsigned long long int result, exp_result;
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __int128  val_int128;
+  } source;
+  
+  exp_result[0] = 0x0ULL;
+  exp_result[1] = 0x1234ULL;
+  source.val_int128 = 0x923456789ABCDEF0ULL;
+  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
+
+  result = get_exponents (&source.val_ieee128);
+
+  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result[0], exp_result[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result[1], exp_result[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
new file mode 100644
index 00000000000..f7b3aedb832
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned __int128
+get_significand (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_scalar_extract_sig_to_vec(source);
+}
+
+int
+main ()
+{
+  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
+
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     unsigned long long int val_ull[2];
+     unsigned __int128  val_uint128;
+     __vector unsigned __int128  val_vuint128;
+  } source, result, exp_result;
+  
+  /* Result is not zero or denormal.  */
+  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
+  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
+  source.val_uint128 = 0x923456789ABCDEF0ULL;
+  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
+
+  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
+     Denormal, 1 otherwise.  */
+  result.val_vuint128 = get_significand (&source.val_ieee128);
+
+  if ((result.val_ull[0] != exp_result.val_ull[0])
+      || (result.val_ull[1] != exp_result.val_ull[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result.val_ull[0], exp_result.val_ull[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result.val_ull[1], exp_result.val_ull[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
new file mode 100644
index 00000000000..8ec1006f8b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
@@ -0,0 +1,58 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+__ieee128
+insert_exponent (__vector unsigned __int128 *significand_p,
+		 __vector unsigned long long int *exponent_p)
+{
+  __vector unsigned __int128 significand = *significand_p;
+  __vector unsigned long long int exponent = *exponent_p;
+
+  return __builtin_vsx_scalar_insert_exp_vqp (significand, exponent);
+}
+
+int
+main ()
+{
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __vector unsigned __int128 val_vint128;
+     __vector unsigned long long int  val_vull;
+  } result, exp_result, significand;
+
+  __vector unsigned long long int exponent;
+
+  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
+  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
+
+  exponent[0] = 0x5678;
+  exponent[1] = 0x1234;
+
+  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
+  exp_result.val_vull[1] = 0x123412345678ABCDULL;
+
+  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
+  
+  if (result.val_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+    {
+      printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0] = 0x%llx\n",
+	     result.val_vull[0], exp_result.val_vull[0]);
+      printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1] = 0x%llx\n",
+	     result.val_vull[1], exp_result.val_vull[1]);
+    }
+#else
+    abort ();
+#endif
+
+}
-- 
2.37.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-05  8:45 ` Kewen.Lin
  2023-06-06 19:54   ` [PATCH ver 2] " Carl Love
@ 2023-06-06 19:54   ` Carl Love
  2023-06-07  9:36     ` Kewen.Lin
  1 sibling, 1 reply; 9+ messages in thread
From: Carl Love @ 2023-06-06 19:54 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches

On Mon, 2023-06-05 at 16:45 +0800, Kewen.Lin wrote:
> Hi Carl,
> 
> on 2023/5/2 23:52, Carl Love via Gcc-patches wrote:
> > GCC maintainers:
> > 
> > The following patch adds three buitins for inserting and extracting
> > the
> > exponent and significand for an IEEE 128-bit floating point
> > values. 
> > The builtins are valid for Power 9 and Power 10.  
> 
> We already have:
> 
> unsigned long long int scalar_extract_exp (__ieee128 source);
> unsigned __int128 scalar_extract_sig (__ieee128 source);
> ieee_128 scalar_insert_exp (unsigned __int128 significand,
>                             unsigned long long int exponent);
> ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long
> int exponent);
> 
> you need to say something about the requirements or the justification
> for
> adding more, for this patch itself, some comments are inline below,
> thanks!

I implemented the patch based on a request for the builtins.  It didn't
include any justification so I reached out to Steve Monroe who
requested the builtins to understand why he wanted them.  Here is his
reply:

   Basically there is no clean and performant way to transfer between a
   vector type and the ieee128 scalar, despite the fact that both
   reside in vector registers. Also a union transfer does not work
   correctly on most GCC versions (and will likely break again in the
   next release). I offer the long sad history of the IBM long double
   float runtime.

   Also there are __ieee128 operations that are provided by builtins
   for POWER9 but are not provided by libgcc (for POWER8).

   Finally I can prove that a softfloat __ieee128 implementation using
   VMX integer operations, out-performs the current libgcc
   implementation using DW GPRs.

   The details are in the PVECLIB documentation
   pveclib/vec__f128__ppc.h


> 
> > The patch has been tested on both Power 9 and Power 10.
> > 
> > Please let me know if this patch is acceptable for
> > mainline.  Thanks.
> > 
> >             Carl 
> > 
> > 
> > ----------------------
> > From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17 00:00:00
> > 2001
> > From: Carl Love <cel@us.ibm.com>
> > Date: Wed, 12 Apr 2023 17:46:37 -0400
> > Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating
> > point values
> > 
> > Add support for the following builtins:
> > 
> >  __vector unsigned long long int __builtin_extractf128_exp
> > (__ieee128);
> 
> Could you make the name similar to the existing one?  The existing
> one
>   
>   unsigned long long int scalar_extract_exp (__ieee128 source);
> 
> has nothing like f128 on its name, this variant is just to change the
> return type to vector type, how about scalar_extract_exp_to_vec?

I changed the name  __builtin_extractf128_exp  to
__builtin_scalar_extract_exp_to_vec.

> 
> >  __vector unsigned __int128 __builtin_extractf128_sig (__ieee128);
> 
> Ditto.

I changed the name  __builtin_extractf128_sig to
__builtin_scalar_extract_sig_to_vec.

> 
> >  __ieee128 __builtin_insertf128_exp (__vector unsigned __int128,
> > 			             __vector unsigned long long);
> 
> This one can just overload the existing scalar_insert_exp?


I tried making this one an overloaded version of
scalar_insert_exp.  However, the overload with the vector arguments
isn't recognized when I put the overload definition at the end of the
list of overloads.  When I tried putting the vector version as the
first overloaded definition, I get an internal error
on  __builtin_vsx_scalar_insert_exp_q which is has the same arguments
types but as scalars not vectors.  Best I can tell, there is an issue
with mixing scalar and vector arguments in an overloaded builtin.  

I renamed __builtin_insertf128_exp as
__builtin_vsx_scalar_insert_exp_vqp which is just the vector version of
  the existing __builtin_vsx_scalar_insert_exp_qp builtin.
> 
> gcc/
> > 	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
> > 	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
> > 	builtin definitions.
> > 	* config/rs6000.md (extractf128_exp_<mode>,
> > insertf128_exp_<mode>,
> > 	extractf128_sig_<mode>): Add define_expand for new builtins.
> > 	(xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>,
> > siexpqpf_f128_<mode>):
> > 	Add define_insn for new builtins.
> > 	* doc/extend.texi (__builtin_extractf128_exp,
> > __builtin_extractf128_sig,
> > 	__builtin_insertf128_exp): Add documentation for new builtins.
> > 
> > gcc/testsuite/
> > 	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
> > 	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
> > 	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
> > ---
> >  gcc/config/rs6000/rs6000-builtins.def         |  9 +++
> >  gcc/config/rs6000/vsx.md                      | 66
> > ++++++++++++++++++-
> >  gcc/doc/extend.texi                           | 28 ++++++++
> >  .../powerpc/bfp/extract-exp-ieee128.c         | 49 ++++++++++++++
> >  .../powerpc/bfp/extract-sig-ieee128.c         | 56
> > ++++++++++++++++
> >  .../powerpc/bfp/insert-exp-ieee128.c          | 58
> > ++++++++++++++++
> >  6 files changed, 265 insertions(+), 1 deletion(-)
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
> > exp-ieee128.c
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
> > sig-ieee128.c
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-
> > exp-ieee128.c
> > 
> > diff --git a/gcc/config/rs6000/rs6000-builtins.def
> > b/gcc/config/rs6000/rs6000-builtins.def
> > index 638d0bc72ca..3247a7f7673 100644
> > --- a/gcc/config/rs6000/rs6000-builtins.def
> > +++ b/gcc/config/rs6000/rs6000-builtins.def
> > @@ -2876,6 +2876,15 @@
> >    pure vsc __builtin_vsx_xl_len_r (void *, signed long);
> >      XL_LEN_R xl_len_r {}
> >  
> > +  vull __builtin_extractf128_exp (_Float128);
> > +    EEXPKF extractf128_exp_kf {}
> > +
> > +  vuq __builtin_extractf128_sig (_Float128);
> > +    ESIGKF extractf128_sig_kf {}
> > +
> > +  _Float128 __builtin_insertf128_exp (vuq, vull);
> > +    IEXPKF_VULL insertf128_exp_kf {}
> > +
> 
> Put them to be near its related ones like
> __builtin_vsx_scalar_extract_expq
> etc.
> 

I moved the definitions to be near the other definitions.

> >  
> >  ; Builtins requiring hardware support for IEEE-128 floating-point.
> >  [ieee128-hw]
> > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> > index 7d845df5c2d..2a9f875ba57 100644
> > --- a/gcc/config/rs6000/vsx.md
> > +++ b/gcc/config/rs6000/vsx.md
> > @@ -369,7 +369,10 @@
> >     UNSPEC_XXSPLTI32DX
> >     UNSPEC_XXBLEND
> >     UNSPEC_XXPERMX
> > -  ])
> > +   UNSPEC_EXTRACTEXPIEEE
> > +   UNSPEC_EXTRACTSIGIEEE
> > +   UNSPEC_INSERTEXPIEEE
> 
> These are not necessary, just use the existing UNSPEC_VSX_SXEXPDP
> etc.

I removed these, as they are not needed.

> 
> > +])
> >  
> >  (define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
> >  				 UNSPEC_VSX_XVCVBF16SPN])
> > @@ -4155,6 +4158,38 @@
> >   "vins<wd>rx %0,%1,%2"
> >   [(set_attr "type" "vecsimple")])
> >  
> > +(define_expand "extractf128_exp_<mode>"
> > +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> > +  (unspec:IEEE128 [(match_operand:IEEE128 1
> > "altivec_register_operand")]
> > +		  UNSPEC_EXTRACTEXPIEEE))]
> > +"TARGET_P9_VECTOR"
> > +{
> > +  emit_insn (gen_xsxexpqp_f128_<mode> (operands[0], operands[1]));
> > +  DONE;
> > +})
> > +
> > +(define_expand "insertf128_exp_<mode>"
> > +  [(set (match_operand:IEEE128 0 "altivec_register_operand")
> > +  (unspec:IEEE128 [(match_operand:V1TI 1
> > "altivec_register_operand")
> > +		   (match_operand:V2DI 2 "altivec_register_operand")]
> > +		  UNSPEC_INSERTEXPIEEE))]
> > +"TARGET_P9_VECTOR"
> > +{
> > +  emit_insn (gen_xsiexpqpf_f128_<mode> (operands[0], operands[1],
> > +				        operands[2]));
> > +  DONE;
> > +})
> > +
> > +(define_expand "extractf128_sig_<mode>"
> > +  [(set (match_operand:V2DI 0 "altivec_register_operand")
> > +  (unspec:IEEE128 [(match_operand:IEEE128 1
> > "altivec_register_operand")]
> > +		  UNSPEC_EXTRACTSIGIEEE))]
> > +"TARGET_P9_VECTOR"
> > +{
> > +  emit_insn (gen_xsxsigqp_f128_<mode> (operands[0], operands[1]));
> > +  DONE;
> > +})
> 
> These define_expands are not necessary, the called gen functions from
> the
> define_insns can be directly used as bif pattern in rs6000-
> builtins.def.

Right, I just mapped directly to the instruction the above were
calling.
> 
> > +
> >  (define_expand "vreplace_elt_<mode>"
> >    [(set (match_operand:REPLACE_ELT 0 "register_operand")
> >    (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1
> > "register_operand")
> > @@ -5016,6 +5051,15 @@
> >    "xsxexpqp %0,%1"
> >    [(set_attr "type" "vecmove")])
> >  
> > +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating
> > point format
> > +(define_insn "xsxexpqp_f128_<mode>"
> > +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> > +	(unspec:V2DI [(match_operand:IEEE128 1
> > "altivec_register_operand" "v")]
> > +	 UNSPEC_VSX_SXEXPDP))]
> > +  "TARGET_P9_VECTOR"
> > +  "xsxexpqp %0,%1"
> > +  [(set_attr "type" "vecmove")])
> 
> I think you can just merge this into the existing xsxexpqp_<mode> by
> extending
> with one mode like xsxexpqp_<IEEE128:mode>_<xxx:mode> for unspec:xxx.
> 
> xxx can be one mode iterator for DI and V2DI.


OK, I played with this, it is rather sophisticated syntax.  I didn't
seem to be able to get the syntax correct and thus get all this to
work.  It also seemed to require updating some case statements for the
instructions in various files.  I opted to not change this and just
keep it simple.  Not sure if that is OK or not?

> 
> > +
> >  ;; VSX Scalar Extract Exponent Double-Precision
> >  (define_insn "xsxexpdp"
> >    [(set (match_operand:DI 0 "register_operand" "=r")
> > @@ -5034,6 +5078,15 @@
> >    "xsxsigqp %0,%1"
> >    [(set_attr "type" "vecmove")])
> >  
> > +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating
> > point format
> > +(define_insn "xsxsigqp_f128_<mode>"
> > +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> > +	(unspec:V2DI [(match_operand:IEEE128 1
> > "altivec_register_operand" "v")]
> > +	 UNSPEC_VSX_SXSIG))]
> > +  "TARGET_P9_VECTOR"
> > +  "xsxsigqp %0,%1"
> > +  [(set_attr "type" "vecmove")])
> 
> Ditto, the mode V2DI looks unexpected, V1TI instead?

Yes, that is wrong.  Fixed.

> 
> > +
> >  ;; VSX Scalar Extract Significand Double-Precision
> >  (define_insn "xsxsigdp"
> >    [(set (match_operand:DI 0 "register_operand" "=r")
> > @@ -5054,6 +5107,17 @@
> >    "xsiexpqp %0,%1,%2"
> >    [(set_attr "type" "vecmove")])
> >  
> > +;; VSX Insert Exponent IEEE 128-bit Floating point format
> > +(define_insn "xsiexpqpf_f128_<mode>"
> > +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > +	(unspec:IEEE128
> > +	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
> > +	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
> > +	 UNSPEC_VSX_SIEXPQP))]
> > +  "TARGET_P9_VECTOR"
> > +  "xsiexpqp %0,%1,%2"
> > +  [(set_attr "type" "vecmove")]> +
> >  ;; VSX Scalar Insert Exponent Quad-Precision
> >  (define_insn "xsiexpqp_<mode>"
> >    [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index e426a2eb7d8..456e5a03d69 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -18511,6 +18511,34 @@ the FPSCR.  The instruction is a lower
> > latency version of the @code{mffs}
> >  instruction.  If the @code{mffsl} instruction is not available,
> > then the
> >  builtin uses the older @code{mffs} instruction to read the FPSCR.
> >  
> > +@smallexample
> > +@exdent vector unsigned long long int
> > +@exdent __builtin_extractf128_exp (__ieee128);
> > +@end smallexample
> > +
> > +The exponent from the IEEE 128-bit floating point value is
> > returned in the
> > +lower bits of vector element 1 on Little Endian systems.
> > +
> > +@smallexample
> > +@exdent vector unsigned __int128
> > +@exdent __builtin_extractf128_sig (__ieee128);
> > +@end smallexample
> > +
> > +The significand from the IEEE 128-bit floating point value is
> > returned in the
> > +vector element 0 bits [111:0]. Bit 112 is set to 0 if the input
> > value was zero
> > +or Denormal, 1 otherwise.  Bits [127:113] are set to zero.
> > +
> > +@smallexample
> > +@exdent _ieee128
> > +@exdent __builtin_insertf128_exp (vector unsigned __int128,
> > +vector unsigned long long int);
> > +@end smallexample
> > +
> > +The first argument contains the significand in bits [111:0] for
> > the IEEE
> > +128-bi floating point tresult and the sign bit [127] for the
> > result.  The
> > +second argument contains the exponenet bits for the result in bits
> > [14:0] of
> > +vector element 1.
> 
> Just put the new prototypes after the existing ones and extend the
> current related
> documentation if needed.


Updated the documentation with the new names putting it in the same
place as the related builtins.

> 
> > +
> >  @node Basic PowerPC Built-in Functions Available on ISA 3.1
> >  @subsubsection Basic PowerPC Built-in Functions Available on ISA
> > 3.1
> >  
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..47e2c43962f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> > @@ -0,0 +1,49 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */> +/* { dg-require-
> > effective-target power10_ok } */
> 
> It needs _hw for run and it doesn't require power10, so p9vector_hw.
> 
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> 
> You can refer to the conditions for the existing test cases on
> ieee128
> extract_{exp,sig}, insert_..., the underlying insns of these newly
> introduced are the same, the condition should be the same.
> 

Took a look at the other test cases and made the conditions the
same.  Did that for all three new test cases.


> BR,
> Kewen
> 
> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#if DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +vector unsigned long long int
> > +get_exponents (__ieee128 *p)
> > +{
> > +  __ieee128 source = *p;
> > +
> > +  return __builtin_extractf128_exp (source);
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  vector unsigned long long int result, exp_result;
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     __int128  val_int128;
> > +  } source;
> > +  
> > +  exp_result[0] = 0x0ULL;
> > +  exp_result[1] = 0x1234ULL;
> > +  source.val_int128 = 0x923456789ABCDEF0ULL;
> > +  source.val_int128 = (source.val_int128 << 64) |
> > 0x123456789ABCDEFULL;
> > +
> > +  result = get_exponents (&source.val_ieee128);
> > +
> > +  if ((result[0] != exp_result[0]) || (result[1] !=
> > exp_result[1]))
> > +#if DEBUG
> > +    {
> > +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> > +	     result[0], exp_result[0]);
> > +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> > +	     result[1], exp_result[1]);
> > +    }
> > +#else
> > +    abort();
> > +#endif
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..4bd50ca2281
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> > @@ -0,0 +1,56 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +/* { dg-require-effective-target power10_ok } */
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#if DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +vector unsigned __int128
> > +get_significand (__ieee128 *p)
> > +{
> > +  __ieee128 source = *p;
> > +
> > +  return __builtin_extractf128_sig (source);
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
> > +
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     unsigned long long int val_ull[2];
> > +     unsigned __int128  val_uint128;
> > +     __vector unsigned __int128  val_vuint128;
> > +  } source, result, exp_result;
> > +  
> > +  /* Result is not zero or denormal.  */
> > +  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL |
> > NOT_ZERO_OR_DENORMAL;
> > +  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
> > +  source.val_uint128 = 0x923456789ABCDEF0ULL;
> > +  source.val_uint128 = (source.val_uint128 << 64) |
> > 0x123456789ABCDEFULL;
> > +
> > +  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was
> > zero or
> > +     Denormal, 1 otherwise.  */
> > +  result.val_vuint128 = get_significand (&source.val_ieee128);
> > +
> > +  if ((result.val_ull[0] != exp_result.val_ull[0])
> > +      || (result.val_ull[1] != exp_result.val_ull[1]))
> > +#if DEBUG
> > +    {
> > +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> > +	     result.val_ull[0], exp_result.val_ull[0]);
> > +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> > +	     result.val_ull[1], exp_result.val_ull[1]);
> > +    }
> > +#else
> > +    abort();
> > +#endif
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..5bd33ed5b7d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> > @@ -0,0 +1,58 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +/* { dg-require-effective-target power10_ok } */
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#ifdef DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +__ieee128
> > +insert_exponent (__vector unsigned __int128 *significand_p,
> > +		 __vector unsigned long long int *exponent_p)
> > +{
> > +  __vector unsigned __int128 significand = *significand_p;
> > +  __vector unsigned long long int exponent = *exponent_p;
> > +
> > +  return __builtin_insertf128_exp (significand, exponent);
> > +}
> > +
> > +
> > +int
> > +main ()
> > +{
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     __vector unsigned __int128 val_vint128;
> > +     __vector unsigned long long int  val_vull;
> > +  } result, exp_result, significand;
> > +
> > +  __vector unsigned long long int exponent;
> > +
> > +  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
> > +  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive
> > value */
> > +
> > +  exponent[0] = 0x5678;
> > +  exponent[1] = 0x1234;
> > +
> > +  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
> > +  exp_result.val_vull[1] = 0x123412345678ABCDULL;
> > +
> > +  result.val_ieee128 = insert_exponent(&significand.val_vint128,
> > &exponent);
> > +  
> > +  if (result.val_ieee128 != exp_result.val_ieee128)
> > +#ifdef DEBUG
> > +    {
> > +      printf("result.val_vull[0] = 0x%llx, exp_result.val_vull[0]
> > = 0x%llx\n",
> > +	     result.val_vull[0], exp_result.val_vull[0]);
> > +      printf("result.val_vull[1] = 0x%llx, exp_result.val_vull[1]
> > = 0x%llx\n",
> > +	     result.val_vull[1], exp_result.val_vull[1]);
> > +    }
> > +#else
> > +    abort ();
> > +#endif
> > +
> > +}


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-06 19:54   ` [PATCH] " Carl Love
@ 2023-06-07  9:36     ` Kewen.Lin
  2023-06-08 15:21       ` Carl Love
  2023-06-08 15:21       ` [PATCH ver 3] " Carl Love
  0 siblings, 2 replies; 9+ messages in thread
From: Kewen.Lin @ 2023-06-07  9:36 UTC (permalink / raw)
  To: Carl Love; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches

Hi,

on 2023/6/7 03:54, Carl Love wrote:
> On Mon, 2023-06-05 at 16:45 +0800, Kewen.Lin wrote:
>> Hi Carl,
>>
>> on 2023/5/2 23:52, Carl Love via Gcc-patches wrote:
>>> GCC maintainers:
>>>
>>> The following patch adds three buitins for inserting and extracting
>>> the
>>> exponent and significand for an IEEE 128-bit floating point
>>> values. 
>>> The builtins are valid for Power 9 and Power 10.  
>>
>> We already have:
>>
>> unsigned long long int scalar_extract_exp (__ieee128 source);
>> unsigned __int128 scalar_extract_sig (__ieee128 source);
>> ieee_128 scalar_insert_exp (unsigned __int128 significand,
>>                             unsigned long long int exponent);
>> ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long
>> int exponent);
>>
>> you need to say something about the requirements or the justification
>> for
>> adding more, for this patch itself, some comments are inline below,
>> thanks!
> 
> I implemented the patch based on a request for the builtins.  It didn't
> include any justification so I reached out to Steve Monroe who
> requested the builtins to understand why he wanted them.  Here is his
> reply:
> 
>    Basically there is no clean and performant way to transfer between a
>    vector type and the ieee128 scalar, despite the fact that both
>    reside in vector registers. Also a union transfer does not work
>    correctly on most GCC versions (and will likely break again in the
>    next release). I offer the long sad history of the IBM long double
>    float runtime.

Thanks for clarifying this.  As the proposed changes, I think he meant
to say "Basically there is no clean and performant way to transfer between
a vector type and the scalar **types**". :) Because the proposed changes
are:
  scalar_extract_exp: unsigned long long => vector unsigned long long
  scalar_extract_sig: unsigned __int128  => vector unsigned __int128
  scalar_insert_exp: unsigned __int128 => vector unsigned __int128
                     unsigned long long => vector unsigned long long.

> 
>    Also there are __ieee128 operations that are provided by builtins
>    for POWER9 but are not provided by libgcc (for POWER8).
> 
>    Finally I can prove that a softfloat __ieee128 implementation using
>    VMX integer operations, out-performs the current libgcc
>    implementation using DW GPRs.
> 
>    The details are in the PVECLIB documentation
>    pveclib/vec__f128__ppc.h
> 
> 
>>
>>> The patch has been tested on both Power 9 and Power 10.
>>>
>>> Please let me know if this patch is acceptable for
>>> mainline.  Thanks.
>>>
>>>             Carl 
>>>
>>>
>>> ----------------------
>>> From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17 00:00:00
>>> 2001
>>> From: Carl Love <cel@us.ibm.com>
>>> Date: Wed, 12 Apr 2023 17:46:37 -0400
>>> Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating
>>> point values
>>>
>>> Add support for the following builtins:
>>>
>>>  __vector unsigned long long int __builtin_extractf128_exp
>>> (__ieee128);
>>
>> Could you make the name similar to the existing one?  The existing
>> one
>>   
>>   unsigned long long int scalar_extract_exp (__ieee128 source);
>>
>> has nothing like f128 on its name, this variant is just to change the
>> return type to vector type, how about scalar_extract_exp_to_vec?
> 
> I changed the name  __builtin_extractf128_exp  to
> __builtin_scalar_extract_exp_to_vec.
> 
>>
>>>  __vector unsigned __int128 __builtin_extractf128_sig (__ieee128);
>>
>> Ditto.
> 
> I changed the name  __builtin_extractf128_sig to
> __builtin_scalar_extract_sig_to_vec.
> 
>>
>>>  __ieee128 __builtin_insertf128_exp (__vector unsigned __int128,
>>> 			             __vector unsigned long long);
>>
>> This one can just overload the existing scalar_insert_exp?
> 
> 
> I tried making this one an overloaded version of
> scalar_insert_exp.  However, the overload with the vector arguments
> isn't recognized when I put the overload definition at the end of the
> list of overloads.  When I tried putting the vector version as the
> first overloaded definition, I get an internal error
> on  __builtin_vsx_scalar_insert_exp_q which is has the same arguments
> types but as scalars not vectors.  Best I can tell, there is an issue
> with mixing scalar and vector arguments in an overloaded builtin.

No, it's not due to mixing scalar and vector arguments, I looked into
this and found we have some special handling for this builtin in
altivec_resolve_overloaded_builtin, see the code:

    case RS6000_OVLD_VEC_VSIE:
      {
	machine_mode arg1_mode = TYPE_MODE (types[0]);

	/* If supplied first argument is wider than 64 bits, resolve to
	   128-bit variant of built-in function.  */
	if (GET_MODE_PRECISION (arg1_mode) > 64)
	  {
	    /* If first argument is of float variety, choose variant
	       that expects __ieee128 argument.  Otherwise, expect
	       __int128 argument.  */
	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
	      instance_code = RS6000_BIF_VSIEQPF;
	    else
	      instance_code = RS6000_BIF_VSIEQP;
	  }
          ....
 
When the 1st arg's precision is larger than 64, it just picks up
RS6000_BIF_VSIEQPF or RS6000_BIF_VSIEQP, you need to update this for
what you newly added.  Something like (against your v2):

   const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
-    VSIEDP_VULL xsiexpqpf_f128_kf {}
+    VSIEQPV xsiexpqpf_f128_kf {}


@@ -1934,6 +1934,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
                __int128 argument.  */
             if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
               instance_code = RS6000_BIF_VSIEQPF;
+            else if (VECTOR_MODE_P (arg1_mode))
+              instance_code = RS6000_BIF_VSIEQPV;
             else
               instance_code = RS6000_BIF_VSIEQP;
           }

diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index 102ead9f80b..84743114c8e 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4515,8 +4515,8 @@
     VSIEQP
   _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long);
     VSIEQPF
-  _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
-    VSIEDP_VULL
+  _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);
+    VSIEQPV

> 
> I renamed __builtin_insertf128_exp as
> __builtin_vsx_scalar_insert_exp_vqp which is just the vector version of
>   the existing __builtin_vsx_scalar_insert_exp_qp builtin.
>>
>> gcc/
>>> 	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
>>> 	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
>>> 	builtin definitions.
>>> 	* config/rs6000.md (extractf128_exp_<mode>,
>>> insertf128_exp_<mode>,
>>> 	extractf128_sig_<mode>): Add define_expand for new builtins.
>>> 	(xsxexpqp_f128_<mode>, xsxsigqp_f128_<mode>,
>>> siexpqpf_f128_<mode>):
>>> 	Add define_insn for new builtins.
>>> 	* doc/extend.texi (__builtin_extractf128_exp,
>>> __builtin_extractf128_sig,
>>> 	__builtin_insertf128_exp): Add documentation for new builtins.
>>>
>>> gcc/testsuite/
>>> 	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
>>> 	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
>>> 	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
>>> ---
>>>  gcc/config/rs6000/rs6000-builtins.def         |  9 +++
>>>  gcc/config/rs6000/vsx.md                      | 66
>>> ++++++++++++++++++-
>>>  gcc/doc/extend.texi                           | 28 ++++++++
>>>  .../powerpc/bfp/extract-exp-ieee128.c         | 49 ++++++++++++++
>>>  .../powerpc/bfp/extract-sig-ieee128.c         | 56
>>> ++++++++++++++++
>>>  .../powerpc/bfp/insert-exp-ieee128.c          | 58
>>> ++++++++++++++++
>>>  6 files changed, 265 insertions(+), 1 deletion(-)
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
>>> exp-ieee128.c
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
>>> sig-ieee128.c
>>>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-
>>> exp-ieee128.c
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-builtins.def
>>> b/gcc/config/rs6000/rs6000-builtins.def
>>> index 638d0bc72ca..3247a7f7673 100644
>>> --- a/gcc/config/rs6000/rs6000-builtins.def
>>> +++ b/gcc/config/rs6000/rs6000-builtins.def
>>> @@ -2876,6 +2876,15 @@
>>>    pure vsc __builtin_vsx_xl_len_r (void *, signed long);
>>>      XL_LEN_R xl_len_r {}
>>>  
>>> +  vull __builtin_extractf128_exp (_Float128);
>>> +    EEXPKF extractf128_exp_kf {}
>>> +
>>> +  vuq __builtin_extractf128_sig (_Float128);
>>> +    ESIGKF extractf128_sig_kf {}
>>> +
>>> +  _Float128 __builtin_insertf128_exp (vuq, vull);
>>> +    IEXPKF_VULL insertf128_exp_kf {}
>>> +
>>
>> Put them to be near its related ones like
>> __builtin_vsx_scalar_extract_expq
>> etc.
>>
> 
> I moved the definitions to be near the other definitions.
> 
>>>  
>>>  ; Builtins requiring hardware support for IEEE-128 floating-point.
>>>  [ieee128-hw]
>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>> index 7d845df5c2d..2a9f875ba57 100644
>>> --- a/gcc/config/rs6000/vsx.md
>>> +++ b/gcc/config/rs6000/vsx.md
>>> @@ -369,7 +369,10 @@
>>>     UNSPEC_XXSPLTI32DX
>>>     UNSPEC_XXBLEND
>>>     UNSPEC_XXPERMX
>>> -  ])
>>> +   UNSPEC_EXTRACTEXPIEEE
>>> +   UNSPEC_EXTRACTSIGIEEE
>>> +   UNSPEC_INSERTEXPIEEE
>>
>> These are not necessary, just use the existing UNSPEC_VSX_SXEXPDP
>> etc.
> 
> I removed these, as they are not needed.
> 
>>
>>> +])
>>>  
>>>  (define_int_iterator XVCVBF16	[UNSPEC_VSX_XVCVSPBF16
>>>  				 UNSPEC_VSX_XVCVBF16SPN])
>>> @@ -4155,6 +4158,38 @@
>>>   "vins<wd>rx %0,%1,%2"
>>>   [(set_attr "type" "vecsimple")])
>>>  
>>> +(define_expand "extractf128_exp_<mode>"
>>> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
>>> +  (unspec:IEEE128 [(match_operand:IEEE128 1
>>> "altivec_register_operand")]
>>> +		  UNSPEC_EXTRACTEXPIEEE))]
>>> +"TARGET_P9_VECTOR"
>>> +{
>>> +  emit_insn (gen_xsxexpqp_f128_<mode> (operands[0], operands[1]));
>>> +  DONE;
>>> +})
>>> +
>>> +(define_expand "insertf128_exp_<mode>"
>>> +  [(set (match_operand:IEEE128 0 "altivec_register_operand")
>>> +  (unspec:IEEE128 [(match_operand:V1TI 1
>>> "altivec_register_operand")
>>> +		   (match_operand:V2DI 2 "altivec_register_operand")]
>>> +		  UNSPEC_INSERTEXPIEEE))]
>>> +"TARGET_P9_VECTOR"
>>> +{
>>> +  emit_insn (gen_xsiexpqpf_f128_<mode> (operands[0], operands[1],
>>> +				        operands[2]));
>>> +  DONE;
>>> +})
>>> +
>>> +(define_expand "extractf128_sig_<mode>"
>>> +  [(set (match_operand:V2DI 0 "altivec_register_operand")
>>> +  (unspec:IEEE128 [(match_operand:IEEE128 1
>>> "altivec_register_operand")]
>>> +		  UNSPEC_EXTRACTSIGIEEE))]
>>> +"TARGET_P9_VECTOR"
>>> +{
>>> +  emit_insn (gen_xsxsigqp_f128_<mode> (operands[0], operands[1]));
>>> +  DONE;
>>> +})
>>
>> These define_expands are not necessary, the called gen functions from
>> the
>> define_insns can be directly used as bif pattern in rs6000-
>> builtins.def.
> 
> Right, I just mapped directly to the instruction the above were
> calling.
>>
>>> +
>>>  (define_expand "vreplace_elt_<mode>"
>>>    [(set (match_operand:REPLACE_ELT 0 "register_operand")
>>>    (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1
>>> "register_operand")
>>> @@ -5016,6 +5051,15 @@
>>>    "xsxexpqp %0,%1"
>>>    [(set_attr "type" "vecmove")])
>>>  
>>> +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating
>>> point format
>>> +(define_insn "xsxexpqp_f128_<mode>"
>>> +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
>>> +	(unspec:V2DI [(match_operand:IEEE128 1
>>> "altivec_register_operand" "v")]
>>> +	 UNSPEC_VSX_SXEXPDP))]
>>> +  "TARGET_P9_VECTOR"
>>> +  "xsxexpqp %0,%1"
>>> +  [(set_attr "type" "vecmove")])
>>
>> I think you can just merge this into the existing xsxexpqp_<mode> by
>> extending
>> with one mode like xsxexpqp_<IEEE128:mode>_<xxx:mode> for unspec:xxx.
>>
>> xxx can be one mode iterator for DI and V2DI.
> 
> 
> OK, I played with this, it is rather sophisticated syntax.  I didn't
> seem to be able to get the syntax correct and thus get all this to
> work.  It also seemed to require updating some case statements for the
> instructions in various files.  I opted to not change this and just
> keep it simple.  Not sure if that is OK or not?
> 

I meant something like:

(define_mode_iterator VSEEQP_DI  [V2DI DI])

;; VSX Scalar Extract Exponent Quad-Precision
(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
  [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" "=v")
        (unspec:VSEEQP_DI
          [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
          UNSPEC_VSX_SXEXPDP))]
  "TARGET_P9_VECTOR"
  "xsxexpqp %0,%1"
  [(set_attr "type" "vecmove")])

since what you need just one different mode from DI for the operand 0,
using one mode iterator can make you not need to duplicate it with
a different mode.  With above, you can use xsxexpqp_kf_di for the
previous xsxexpqp_kf, and xsxexpqp_kf_v2di for your proposed
__builtin_scalar_extract_exp_to_vec.

BR,
Kewen

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-07  9:36     ` Kewen.Lin
@ 2023-06-08 15:21       ` Carl Love
  2023-06-08 15:21       ` [PATCH ver 3] " Carl Love
  1 sibling, 0 replies; 9+ messages in thread
From: Carl Love @ 2023-06-08 15:21 UTC (permalink / raw)
  To: Kewen.Lin, cel; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches


Kewen:
On Wed, 2023-06-07 at 17:36 +0800, Kewen.Lin wrote:
> Hi,
> 
> on 2023/6/7 03:54, Carl Love wrote:
> > On Mon, 2023-06-05 at 16:45 +0800, Kewen.Lin wrote:
> > > Hi Carl,
> > > 
> > > on 2023/5/2 23:52, Carl Love via Gcc-patches wrote:
> > > > GCC maintainers:
> > > > 
> > > > The following patch adds three buitins for inserting and
> > > > extracting
> > > > the
> > > > exponent and significand for an IEEE 128-bit floating point
> > > > values. 
> > > > The builtins are valid for Power 9 and Power 10.  
> > > 
> > > We already have:
> > > 
> > > unsigned long long int scalar_extract_exp (__ieee128 source);
> > > unsigned __int128 scalar_extract_sig (__ieee128 source);
> > > ieee_128 scalar_insert_exp (unsigned __int128 significand,
> > >                             unsigned long long int exponent);
> > > ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long
> > > long
> > > int exponent);
> > > 
> > > you need to say something about the requirements or the
> > > justification
> > > for
> > > adding more, for this patch itself, some comments are inline
> > > below,
> > > thanks!
> > 
> > I implemented the patch based on a request for the builtins.  It
> > didn't
> > include any justification so I reached out to Steve Monroe who
> > requested the builtins to understand why he wanted them.  Here is
> > his
> > reply:
> > 
> >    Basically there is no clean and performant way to transfer
> > between a
> >    vector type and the ieee128 scalar, despite the fact that both
> >    reside in vector registers. Also a union transfer does not work
> >    correctly on most GCC versions (and will likely break again in
> > the
> >    next release). I offer the long sad history of the IBM long
> > double
> >    float runtime.
> 
> Thanks for clarifying this.  As the proposed changes, I think he
> meant
> to say "Basically there is no clean and performant way to transfer
> between
> a vector type and the scalar **types**". :) Because the proposed
> changes
> are:
>   scalar_extract_exp: unsigned long long => vector unsigned long long
>   scalar_extract_sig: unsigned __int128  => vector unsigned __int128
>   scalar_insert_exp: unsigned __int128 => vector unsigned __int128
>                      unsigned long long => vector unsigned long long.
> 
> >    Also there are __ieee128 operations that are provided by
> > builtins
> >    for POWER9 but are not provided by libgcc (for POWER8).
> > 
> >    Finally I can prove that a softfloat __ieee128 implementation
> > using
> >    VMX integer operations, out-performs the current libgcc
> >    implementation using DW GPRs.
> > 
> >    The details are in the PVECLIB documentation
> >    pveclib/vec__f128__ppc.h
> > 
> > 
> > > > The patch has been tested on both Power 9 and Power 10.
> > > > 
> > > > Please let me know if this patch is acceptable for
> > > > mainline.  Thanks.
> > > > 
> > > >             Carl 
> > > > 
> > > > 
> > > > ----------------------
> > > > From a20cc81f98cce1140fc95775a7c25b55d1ca7cba Mon Sep 17
> > > > 00:00:00
> > > > 2001
> > > > From: Carl Love <cel@us.ibm.com>
> > > > Date: Wed, 12 Apr 2023 17:46:37 -0400
> > > > Subject: [PATCH] rs6000: Add builtins for IEEE 128-bit floating
> > > > point values
> > > > 
> > > > Add support for the following builtins:
> > > > 
> > > >  __vector unsigned long long int __builtin_extractf128_exp
> > > > (__ieee128);
> > > 
> > > Could you make the name similar to the existing one?  The
> > > existing
> > > one
> > >   
> > >   unsigned long long int scalar_extract_exp (__ieee128 source);
> > > 
> > > has nothing like f128 on its name, this variant is just to change
> > > the
> > > return type to vector type, how about scalar_extract_exp_to_vec?
> > 
> > I changed the name  __builtin_extractf128_exp  to
> > __builtin_scalar_extract_exp_to_vec.
> > 
> > > >  __vector unsigned __int128 __builtin_extractf128_sig
> > > > (__ieee128);
> > > 
> > > Ditto.
> > 
> > I changed the name  __builtin_extractf128_sig to
> > __builtin_scalar_extract_sig_to_vec.
> > 
> > > >  __ieee128 __builtin_insertf128_exp (__vector unsigned
> > > > __int128,
> > > > 			             __vector unsigned long
> > > > long);
> > > 
> > > This one can just overload the existing scalar_insert_exp?
> > 
> > I tried making this one an overloaded version of
> > scalar_insert_exp.  However, the overload with the vector arguments
> > isn't recognized when I put the overload definition at the end of
> > the
> > list of overloads.  When I tried putting the vector version as the
> > first overloaded definition, I get an internal error
> > on  __builtin_vsx_scalar_insert_exp_q which is has the same
> > arguments
> > types but as scalars not vectors.  Best I can tell, there is an
> > issue
> > with mixing scalar and vector arguments in an overloaded builtin.
> 
> No, it's not due to mixing scalar and vector arguments, I looked into
> this and found we have some special handling for this builtin in
> altivec_resolve_overloaded_builtin, see the code:
> 
>     case RS6000_OVLD_VEC_VSIE:
>       {
> 	machine_mode arg1_mode = TYPE_MODE (types[0]);
> 
> 	/* If supplied first argument is wider than 64 bits, resolve to
> 	   128-bit variant of built-in function.  */
> 	if (GET_MODE_PRECISION (arg1_mode) > 64)
> 	  {
> 	    /* If first argument is of float variety, choose variant
> 	       that expects __ieee128 argument.  Otherwise, expect
> 	       __int128 argument.  */
> 	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
> 	      instance_code = RS6000_BIF_VSIEQPF;
> 	    else
> 	      instance_code = RS6000_BIF_VSIEQP;
> 	  }
>           ....
> 
> When the 1st arg's precision is larger than 64, it just picks up
> RS6000_BIF_VSIEQPF or RS6000_BIF_VSIEQP, you need to update this for
> what you newly added.  Something like (against your v2):
> 
>    const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
> -    VSIEDP_VULL xsiexpqpf_f128_kf {}
> +    VSIEQPV xsiexpqpf_f128_kf {}
> 
> 
> @@ -1934,6 +1934,8 @@ altivec_resolve_overloaded_builtin (location_t
> loc, tree fndecl,
>                 __int128 argument.  */
>              if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
>                instance_code = RS6000_BIF_VSIEQPF;
> +            else if (VECTOR_MODE_P (arg1_mode))
> +              instance_code = RS6000_BIF_VSIEQPV;
>              else
>                instance_code = RS6000_BIF_VSIEQP;
>            }
> 
> diff --git a/gcc/config/rs6000/rs6000-overload.def
> b/gcc/config/rs6000/rs6000-overload.def
> index 102ead9f80b..84743114c8e 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -4515,8 +4515,8 @@
>      VSIEQP
>    _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned
> long long);
>      VSIEQPF
> -  _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
> -    VSIEDP_VULL
> +  _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);

OK, I missed the special handling of the VSIE builtin in the function. 
With the suggested changes the overloading works.  Thanks for the help.

> 

<snip>

> > > > +
> > > >  (define_expand "vreplace_elt_<mode>"
> > > >    [(set (match_operand:REPLACE_ELT 0 "register_operand")
> > > >    (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1
> > > > "register_operand")
> > > > @@ -5016,6 +5051,15 @@
> > > >    "xsxexpqp %0,%1"
> > > >    [(set_attr "type" "vecmove")])
> > > >  
> > > > +;; VSX Scalar to Vector Extract Exponent IEEE 128-bit floating
> > > > point format
> > > > +(define_insn "xsxexpqp_f128_<mode>"
> > > > +  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
> > > > +	(unspec:V2DI [(match_operand:IEEE128 1
> > > > "altivec_register_operand" "v")]
> > > > +	 UNSPEC_VSX_SXEXPDP))]
> > > > +  "TARGET_P9_VECTOR"
> > > > +  "xsxexpqp %0,%1"
> > > > +  [(set_attr "type" "vecmove")])
> > > 
> > > I think you can just merge this into the existing xsxexpqp_<mode>
> > > by
> > > extending
> > > with one mode like xsxexpqp_<IEEE128:mode>_<xxx:mode> for
> > > unspec:xxx.
> > > 
> > > xxx can be one mode iterator for DI and V2DI.
> > 
> > OK, I played with this, it is rather sophisticated syntax.  I
> > didn't
> > seem to be able to get the syntax correct and thus get all this to
> > work.  It also seemed to require updating some case statements for
> > the
> > instructions in various files.  I opted to not change this and just
> > keep it simple.  Not sure if that is OK or not?
> > 
> 
> I meant something like:
> 
> (define_mode_iterator VSEEQP_DI  [V2DI DI])
> 
> ;; VSX Scalar Extract Exponent Quad-Precision
> (define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
>   [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" "=v")
>         (unspec:VSEEQP_DI
>           [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
>           UNSPEC_VSX_SXEXPDP))]
>   "TARGET_P9_VECTOR"
>   "xsxexpqp %0,%1"
>   [(set_attr "type" "vecmove")])

OK, I the define_insn part right.  I was using an existing iterator
that included both DI and V2DI as well as two additional types.  That
results in two unused patterns.  Buit it looks like the issue I got
stuck on was not getting the tweaks in file rs6000-builtins.def all
correct.  I thought the errors I was getting was due to incorrect
syntax in the define_insn, but were actually errors in rs6000-
builtins.def.  Once I sorted that out, it works.  

I did go with the define_mode iterator you suggested so we don't
generate additional unused patterns.  Again, thanks for the help on
that.
> 
> since what you need just one different mode from DI for the operand
> 0,
> using one mode iterator can make you not need to duplicate it with
> a different mode.  With above, you can use xsxexpqp_kf_di for the
> previous xsxexpqp_kf, and xsxexpqp_kf_v2di for your proposed
> __builtin_scalar_extract_exp_to_vec.
> 
> BR,
> Kewen

              Carl 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH ver 3] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-07  9:36     ` Kewen.Lin
  2023-06-08 15:21       ` Carl Love
@ 2023-06-08 15:21       ` Carl Love
  2023-06-13  3:10         ` Kewen.Lin
  1 sibling, 1 reply; 9+ messages in thread
From: Carl Love @ 2023-06-08 15:21 UTC (permalink / raw)
  To: Kewen.Lin, cel; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches

Kewen, GCC maintainers:

Version 3, was able to get the overloaded version of scalar_insert_exp
to work and the change to xsxexpqp_f128_<mode> define instruction to
work with the suggestions from Kewen.  

Version 2, I have addressed the various comments from Kewen.  I had
issues with adding an additional overloaded version of
scalar_insert_exp with vector arguments.  The overload infrastructure
didn't work with a mix of scalar and vector arguments.  I did rename
the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp make
it similar to the existing builtin.  I also wasn't able to get the
suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to work so
I left the two simpler definitiions.

The patch add three new builtins to extract the significand and
exponent of an IEEE float 128-bit value where the builtin argument is a
vector.  Additionally, a builtin to insert the exponent into an IEEE
float 128-bit vector argument is added.  These builtins were requested
since there is no clean and optimal way to transfer between a vector
and a scalar IEEE 128 bit value.

The patch has been tested on Power 10 with no regressions.  Please let
me know if the patch is acceptable or not.  Thanks.

               Carl

---------------------------------------
rs6000: Add builtins for IEEE 128-bit floating point values

Add support for the following builtins:

 __vector unsigned long long int
     __builtin_scalar_extract_exp_to_vec (__ieee128);
 __vector unsigned __int128
     __builtin_scalar_extract_sig_to_vec (__ieee128);
 __ieee128 scalar_insert_exp (__vector unsigned __int128,
 			      __vector unsigned long long);

These builtins were requesed since there is no clean and performant way to
transfer a value from a vector type and scalar type, despite the fact
that they both reside in vector registers.

gcc/
	* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin):
	Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di.
	Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di.
	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
	builtin definitions.
	Rename xsxexpqp_kf to xsxexpqp_kf_di.
	* config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin):
	Add else if for MODE_VECTOR_INT. Update comments.
	* config/rs6000/rs6000-overload.def
	(__builtin_vec_scalar_insert_exp): Add new overload definition with
	vector arguments.
	* config/vsx.md (VSEEQP_DI): New mode iterator.
	Rename define_insn xsxexpqp_<mode> to
	sxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>.
	(xsxsigqp_f128_<mode>, xsiexpqpf_f128_<mode>): Add define_insn for
	new builtins.
	* doc/extend.texi (__builtin_extractf128_exp,
	__builtin_extractf128_sig): Add documentation for new builtins.
	(scalar_insert_exp): Add new overloaded builtin definition.

gcc/testsuite/
	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
---
 gcc/config/rs6000/rs6000-builtin.cc           |  4 +-
 gcc/config/rs6000/rs6000-builtins.def         | 11 ++-
 gcc/config/rs6000/rs6000-c.cc                 | 10 +-
 gcc/config/rs6000/rs6000-overload.def         |  2 +
 gcc/config/rs6000/vsx.md                      | 28 +++++-
 gcc/doc/extend.texi                           |  9 ++
 .../powerpc/bfp/extract-exp-ieee128.c         | 50 ++++++++++
 .../powerpc/bfp/extract-sig-ieee128.c         | 57 ++++++++++++
 .../powerpc/bfp/insert-exp-ieee128.c          | 91 +++++++++++++++++++
 9 files changed, 253 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c

diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 534698e7d3e..d99f0ae5dda 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -3326,8 +3326,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
       case CODE_FOR_fmakf4_odd:
 	icode = CODE_FOR_fmatf4_odd;
 	break;
-      case CODE_FOR_xsxexpqp_kf:
-	icode = CODE_FOR_xsxexpqp_tf;
+      case CODE_FOR_xsxexpqp_kf_di:
+	icode = CODE_FOR_xsxexpqp_tf_di;
 	break;
       case CODE_FOR_xsxsigqp_kf:
 	icode = CODE_FOR_xsxsigqp_tf;
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 638d0bc72ca..dcd4a393906 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -2901,8 +2901,14 @@
   fpmath double __builtin_truncf128_round_to_odd (_Float128);
     TRUNCF128_ODD trunckfdf2_odd {}
 
+  vull __builtin_scalar_extract_exp_to_vec (_Float128);
+    EEXPKF xsxexpqp_kf_v2di {}
+
+  vuq __builtin_scalar_extract_sig_to_vec (_Float128);
+    ESIGKF xsxsigqp_f128_kf {}
+
   const signed long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
+    VSEEQP xsxexpqp_kf_di {}
 
   const signed __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
     VSESQP xsxsigqp_kf {}
@@ -2915,6 +2921,9 @@
                                                       unsigned long long);
     VSIEQPF xsiexpqpf_kf {}
 
+  const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
+    VSIEQPV xsiexpqpf_f128_kf {}
+
   const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
                                                             const int<7>);
     VSTDCQP xststdcqp_kf {}
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 8555174d36e..11060f697db 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	   128-bit variant of built-in function.  */
 	if (GET_MODE_PRECISION (arg1_mode) > 64)
 	  {
-	    /* If first argument is of float variety, choose variant
-	       that expects __ieee128 argument.  Otherwise, expect
-	       __int128 argument.  */
+	    /* If first argument is of float variety, choose the variant that
+	       expects __ieee128 argument.  If the first argument is vector
+	       int, choose the variant that expects vector unsigned
+	       __int128 argument.  Otherwise, expect scalar __int128 argument.
+	    */
 	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
 	      instance_code = RS6000_BIF_VSIEQPF;
+	    else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT)
+	      instance_code = RS6000_BIF_VSIEQPV;
 	    else
 	      instance_code = RS6000_BIF_VSIEQP;
 	  }
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..84743114c8e 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -4515,6 +4515,8 @@
     VSIEQP
   _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long);
     VSIEQPF
+  _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);
+    VSIEQPV
 
 [VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class]
   unsigned int __builtin_vec_scalar_test_data_class (float, const int);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d845df5c2d..92a34dec1be 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -396,6 +396,7 @@
 			   V4SF
 			   V2DF
 			   V2DI])
+(define_mode_iterator VSEEQP_DI  [V2DI DI])
 
 (define_mode_attr VM3_char [(V2DI "d")
 			   (V4SI "w")
@@ -5008,9 +5009,10 @@
 ;; ISA 3.0 Binary Floating-Point Support
 
 ;; VSX Scalar Extract Exponent Quad-Precision
-(define_insn "xsxexpqp_<mode>"
-  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
-	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
+  [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" "=v")
+	(unspec:VSEEQP_DI
+	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
 	 UNSPEC_VSX_SXEXPDP))]
   "TARGET_P9_VECTOR"
   "xsxexpqp %0,%1"
@@ -5034,6 +5036,15 @@
   "xsxsigqp %0,%1"
   [(set_attr "type" "vecmove")])
 
+;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
+(define_insn "xsxsigqp_f128_<mode>"
+  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
+	(unspec:V1TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SXSIG))]
+  "TARGET_P9_VECTOR"
+  "xsxsigqp %0,%1"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Extract Significand Double-Precision
 (define_insn "xsxsigdp"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -5054,6 +5065,17 @@
   "xsiexpqp %0,%1,%2"
   [(set_attr "type" "vecmove")])
 
+;; VSX Insert Exponent IEEE 128-bit Floating point format
+(define_insn "xsiexpqpf_f128_<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+	(unspec:IEEE128
+	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
+	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
+	 UNSPEC_VSX_SIEXPQP))]
+  "TARGET_P9_VECTOR"
+  "xsiexpqp %0,%1,%2"
+  [(set_attr "type" "vecmove")])
+
 ;; VSX Scalar Insert Exponent Quad-Precision
 (define_insn "xsiexpqp_<mode>"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e426a2eb7d8..625df24b62f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19724,6 +19724,10 @@ double scalar_insert_exp (double significand, unsigned long long int exponent);
 ieee_128 scalar_insert_exp (unsigned __int128 significand,
                             unsigned long long int exponent);
 ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent);
+vector ieee_128 scalar_insert_exp (vector unsigned __int128,
+                                   vector unsigned long long);
+vector unsigned long long __builtin_scalar_extract_exp_to_vec (ieee_128);
+vector unsigned __int128  __builtin_scalar_extract_sig_to_vec (ieee_128);
 
 int scalar_cmp_exp_gt (double arg1, double arg2);
 int scalar_cmp_exp_lt (double arg1, double arg2);
@@ -19777,6 +19781,11 @@ The significand and exponent components of the result are composed of
 the least significant 15 bits of the @code{exponent} argument and the
 least significant 112 bits of the @code{significand} argument respectively.
 
+The @code{__builtin_scalar_extract_exp_to_vec},
+and @code{__builtin_scalar_extract_sig_to_vec} are similar to
+@code{scalar_extract_exp}, @code{scalar_extract_sig} except they operate
+on vector arguments.
+
 The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
 @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in
 functions return a non-zero value if @code{arg1} is greater than, less
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
new file mode 100644
index 00000000000..39981f0a274
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
@@ -0,0 +1,50 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned long long int
+get_exponents (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_scalar_extract_exp_to_vec (source);
+}
+
+int
+main ()
+{
+  vector unsigned long long int result, exp_result;
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __int128  val_int128;
+  } source;
+  
+  exp_result[0] = 0x0ULL;
+  exp_result[1] = 0x1234ULL;
+  source.val_int128 = 0x923456789ABCDEF0ULL;
+  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
+
+  result = get_exponents (&source.val_ieee128);
+
+  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result[0], exp_result[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result[1], exp_result[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
new file mode 100644
index 00000000000..f7b3aedb832
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
@@ -0,0 +1,57 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#if DEBUG
+#include <stdio.h>
+#endif
+
+vector unsigned __int128
+get_significand (__ieee128 *p)
+{
+  __ieee128 source = *p;
+
+  return __builtin_scalar_extract_sig_to_vec(source);
+}
+
+int
+main ()
+{
+  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
+
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     unsigned long long int val_ull[2];
+     unsigned __int128  val_uint128;
+     __vector unsigned __int128  val_vuint128;
+  } source, result, exp_result;
+  
+  /* Result is not zero or denormal.  */
+  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
+  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
+  source.val_uint128 = 0x923456789ABCDEF0ULL;
+  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
+
+  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
+     Denormal, 1 otherwise.  */
+  result.val_vuint128 = get_significand (&source.val_ieee128);
+
+  if ((result.val_ull[0] != exp_result.val_ull[0])
+      || (result.val_ull[1] != exp_result.val_ull[1]))
+#if DEBUG
+    {
+      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
+	     result.val_ull[0], exp_result.val_ull[0]);
+      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
+	     result.val_ull[1], exp_result.val_ull[1]);
+    }
+#else
+    abort();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
new file mode 100644
index 00000000000..402dc48ad9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
@@ -0,0 +1,91 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p9vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power9" } */
+
+#include <altivec.h>
+#include <stdlib.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+__ieee128
+insert_exponent (__vector unsigned __int128 *significand_p,
+		 __vector unsigned long long int *exponent_p)
+{
+  __vector unsigned __int128 significand = *significand_p;
+  __vector unsigned long long int exponent = *exponent_p;
+
+  return scalar_insert_exp (significand, exponent);
+}
+
+__ieee128
+insert_exponent2 (unsigned __int128 significand,
+		 unsigned long long int exponent)
+{
+   return scalar_insert_exp (significand, exponent);
+}
+
+int
+main ()
+{
+  __ieee128 val_ieee128, result_ieee128, exp_result_ieee128;
+  unsigned __int128 val_int128;
+  unsigned long long int  val_ull;
+  union conv128_t
+   {
+     __ieee128 val_ieee128;
+     __vector unsigned __int128 val_vint128;
+     __vector unsigned long long int  val_vull;
+  } result, exp_result, significand;
+
+  __vector unsigned long long int exponent;
+
+  /* Scalar argument test */
+  val_ieee128 = 0xFEDCBA9876543210ULL;
+  val_ull = 0x5678;
+  exp_result.val_vull[1] = 0x5678000000000000ULL;
+  exp_result.val_vull[0] = 0xfedcba9876543210;  
+  result_ieee128 = insert_exponent2 (val_ieee128, val_ull);
+
+  if (result_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+    {
+      result.val_ieee128 = result_ieee128;
+      printf("Scalar argument ERROR:\n");
+      printf(" val_ieee128 = 0x%llx %llx\n",
+	     result.val_vull[1], result.val_vull[0]);
+      printf(" val_ieee128 = 0x%llx %llx\n",
+	     exp_result.val_vull[1], exp_result.val_vull[0]);
+    }
+#else
+    abort ();
+#endif
+
+  /* Vector argument test */
+  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
+  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
+
+  exponent[0] = 0x5678;
+  exponent[1] = 0x1234;
+
+  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
+  exp_result.val_vull[1] = 0x123412345678ABCDULL;
+
+  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
+  
+  if (result.val_ieee128 != exp_result.val_ieee128)
+#ifdef DEBUG
+    {
+      printf("Vector argument ERROR:\n");
+      printf(" result = 0x%llx %llx\n",
+	     result.val_vull[1], result.val_vull[0]);
+      printf(" exp_result = 0x%llx %llx\n",
+	     exp_result.val_vull[1], exp_result.val_vull[0]);
+    }
+#else
+    abort ();
+#endif
+
+}
-- 
2.37.2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH ver 3] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-08 15:21       ` [PATCH ver 3] " Carl Love
@ 2023-06-13  3:10         ` Kewen.Lin
  2023-06-14 16:17           ` Carl Love
  0 siblings, 1 reply; 9+ messages in thread
From: Kewen.Lin @ 2023-06-13  3:10 UTC (permalink / raw)
  To: Carl Love; +Cc: Peter Bergner, Segher Boessenkool, gcc-patches, David Edelsohn

Hi Carl,

on 2023/6/8 23:21, Carl Love wrote:
> Kewen, GCC maintainers:
> 
> Version 3, was able to get the overloaded version of scalar_insert_exp
> to work and the change to xsxexpqp_f128_<mode> define instruction to
> work with the suggestions from Kewen.  
> 
> Version 2, I have addressed the various comments from Kewen.  I had
> issues with adding an additional overloaded version of
> scalar_insert_exp with vector arguments.  The overload infrastructure
> didn't work with a mix of scalar and vector arguments.  I did rename
> the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp make
> it similar to the existing builtin.  I also wasn't able to get the
> suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to work so
> I left the two simpler definitiions.
> 
> The patch add three new builtins to extract the significand and
> exponent of an IEEE float 128-bit value where the builtin argument is a
> vector.  Additionally, a builtin to insert the exponent into an IEEE
> float 128-bit vector argument is added.  These builtins were requested
> since there is no clean and optimal way to transfer between a vector
> and a scalar IEEE 128 bit value.
> 
> The patch has been tested on Power 10 with no regressions.  Please let
> me know if the patch is acceptable or not.  Thanks.
> 
>                Carl
> 
> ---------------------------------------
> rs6000: Add builtins for IEEE 128-bit floating point values
> 
> Add support for the following builtins:
> 
>  __vector unsigned long long int
>      __builtin_scalar_extract_exp_to_vec (__ieee128);
>  __vector unsigned __int128
>      __builtin_scalar_extract_sig_to_vec (__ieee128);
>  __ieee128 scalar_insert_exp (__vector unsigned __int128,
>  			      __vector unsigned long long);
> 
> These builtins were requesed since there is no clean and performant way to
> transfer a value from a vector type and scalar type, despite the fact
> that they both reside in vector registers.
> 
> gcc/
> 	* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin):
> 	Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di.
> 	Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di.
> 	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
> 	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
> 	builtin definitions.
> 	Rename xsxexpqp_kf to xsxexpqp_kf_di.
> 	* config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin):
> 	Add else if for MODE_VECTOR_INT. Update comments.
> 	* config/rs6000/rs6000-overload.def
> 	(__builtin_vec_scalar_insert_exp): Add new overload definition with
> 	vector arguments.
> 	* config/vsx.md (VSEEQP_DI): New mode iterator.
> 	Rename define_insn xsxexpqp_<mode> to
> 	sxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>.
> 	(xsxsigqp_f128_<mode>, xsiexpqpf_f128_<mode>): Add define_insn for
> 	new builtins.
> 	* doc/extend.texi (__builtin_extractf128_exp,
> 	__builtin_extractf128_sig): Add documentation for new builtins.
> 	(scalar_insert_exp): Add new overloaded builtin definition.
> 
> gcc/testsuite/
> 	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
> 	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
> 	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
> ---
>  gcc/config/rs6000/rs6000-builtin.cc           |  4 +-
>  gcc/config/rs6000/rs6000-builtins.def         | 11 ++-
>  gcc/config/rs6000/rs6000-c.cc                 | 10 +-
>  gcc/config/rs6000/rs6000-overload.def         |  2 +
>  gcc/config/rs6000/vsx.md                      | 28 +++++-
>  gcc/doc/extend.texi                           |  9 ++
>  .../powerpc/bfp/extract-exp-ieee128.c         | 50 ++++++++++
>  .../powerpc/bfp/extract-sig-ieee128.c         | 57 ++++++++++++
>  .../powerpc/bfp/insert-exp-ieee128.c          | 91 +++++++++++++++++++
>  9 files changed, 253 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
> index 534698e7d3e..d99f0ae5dda 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -3326,8 +3326,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
>        case CODE_FOR_fmakf4_odd:
>  	icode = CODE_FOR_fmatf4_odd;
>  	break;
> -      case CODE_FOR_xsxexpqp_kf:
> -	icode = CODE_FOR_xsxexpqp_tf;
> +      case CODE_FOR_xsxexpqp_kf_di:
> +	icode = CODE_FOR_xsxexpqp_tf_di;

The others newly added such as CODE_FOR_xsxexpqp_kf_v2di should be handled here as well.

>  	break;
>        case CODE_FOR_xsxsigqp_kf:
>  	icode = CODE_FOR_xsxsigqp_tf;
> diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
> index 638d0bc72ca..dcd4a393906 100644
> --- a/gcc/config/rs6000/rs6000-builtins.def
> +++ b/gcc/config/rs6000/rs6000-builtins.def
> @@ -2901,8 +2901,14 @@
>    fpmath double __builtin_truncf128_round_to_odd (_Float128);
>      TRUNCF128_ODD trunckfdf2_odd {}
>  
> +  vull __builtin_scalar_extract_exp_to_vec (_Float128);
> +    EEXPKF xsxexpqp_kf_v2di {}
> +
> +  vuq __builtin_scalar_extract_sig_to_vec (_Float128);
> +    ESIGKF xsxsigqp_f128_kf {}
> +
>    const signed long long __builtin_vsx_scalar_extract_expq (_Float128);
> -    VSEEQP xsxexpqp_kf {}
> +    VSEEQP xsxexpqp_kf_di {}
>  
>    const signed __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
>      VSESQP xsxsigqp_kf {}
> @@ -2915,6 +2921,9 @@
>                                                        unsigned long long);
>      VSIEQPF xsiexpqpf_kf {}
>  
> +  const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
> +    VSIEQPV xsiexpqpf_f128_kf {}
> +
>    const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \
>                                                              const int<7>);
>      VSTDCQP xststdcqp_kf {}
> diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
> index 8555174d36e..11060f697db 100644
> --- a/gcc/config/rs6000/rs6000-c.cc
> +++ b/gcc/config/rs6000/rs6000-c.cc
> @@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
>  	   128-bit variant of built-in function.  */
>  	if (GET_MODE_PRECISION (arg1_mode) > 64)
>  	  {
> -	    /* If first argument is of float variety, choose variant
> -	       that expects __ieee128 argument.  Otherwise, expect
> -	       __int128 argument.  */
> +	    /* If first argument is of float variety, choose the variant that
> +	       expects __ieee128 argument.  If the first argument is vector
> +	       int, choose the variant that expects vector unsigned
> +	       __int128 argument.  Otherwise, expect scalar __int128 argument.
> +	    */
>  	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
>  	      instance_code = RS6000_BIF_VSIEQPF;
> +	    else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT)
> +	      instance_code = RS6000_BIF_VSIEQPV;
>  	    else
>  	      instance_code = RS6000_BIF_VSIEQP;
>  	  }
> diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
> index c582490c084..84743114c8e 100644
> --- a/gcc/config/rs6000/rs6000-overload.def
> +++ b/gcc/config/rs6000/rs6000-overload.def
> @@ -4515,6 +4515,8 @@
>      VSIEQP
>    _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long);
>      VSIEQPF
> +  _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);
> +    VSIEQPV
>  
>  [VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class]
>    unsigned int __builtin_vec_scalar_test_data_class (float, const int);
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index 7d845df5c2d..92a34dec1be 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -396,6 +396,7 @@
>  			   V4SF
>  			   V2DF
>  			   V2DI])
> +(define_mode_iterator VSEEQP_DI  [V2DI DI])
>  
>  (define_mode_attr VM3_char [(V2DI "d")
>  			   (V4SI "w")
> @@ -5008,9 +5009,10 @@
>  ;; ISA 3.0 Binary Floating-Point Support
>  
>  ;; VSX Scalar Extract Exponent Quad-Precision
> -(define_insn "xsxexpqp_<mode>"
> -  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
> -	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
> +  [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand" "=v")
> +	(unspec:VSEEQP_DI
> +	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
>  	 UNSPEC_VSX_SXEXPDP))]
>    "TARGET_P9_VECTOR"
>    "xsxexpqp %0,%1"
> @@ -5034,6 +5036,15 @@
>    "xsxsigqp %0,%1"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating point format
> +(define_insn "xsxsigqp_f128_<mode>"
> +  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
> +	(unspec:V1TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> +	 UNSPEC_VSX_SXSIG))]
> +  "TARGET_P9_VECTOR"
> +  "xsxsigqp %0,%1"
> +  [(set_attr "type" "vecmove")])

Like above xsxexpqp_<mode>, you can also simplify this with existing xsxsigqp_<mode>,
the bif pattern should be updated accordingly.

> +
>  ;; VSX Scalar Extract Significand Double-Precision
>  (define_insn "xsxsigdp"
>    [(set (match_operand:DI 0 "register_operand" "=r")
> @@ -5054,6 +5065,17 @@
>    "xsiexpqp %0,%1,%2"
>    [(set_attr "type" "vecmove")])
>  
> +;; VSX Insert Exponent IEEE 128-bit Floating point format
> +(define_insn "xsiexpqpf_f128_<mode>"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> +	(unspec:IEEE128
> +	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
> +	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
> +	 UNSPEC_VSX_SIEXPQP))]
> +  "TARGET_P9_VECTOR"
> +  "xsiexpqp %0,%1,%2"
> +  [(set_attr "type" "vecmove")])

Ditto, with existing xsiexpqp_<mode>.

> +
>  ;; VSX Scalar Insert Exponent Quad-Precision
>  (define_insn "xsiexpqp_<mode>"
>    [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e426a2eb7d8..625df24b62f 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -19724,6 +19724,10 @@ double scalar_insert_exp (double significand, unsigned long long int exponent);
>  ieee_128 scalar_insert_exp (unsigned __int128 significand,
>                              unsigned long long int exponent);
>  ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent);
> +vector ieee_128 scalar_insert_exp (vector unsigned __int128,
> +                                   vector unsigned long long);
> +vector unsigned long long __builtin_scalar_extract_exp_to_vec (ieee_128);
> +vector unsigned __int128  __builtin_scalar_extract_sig_to_vec (ieee_128);

s/__builtin_// to align with the others.

>  
>  int scalar_cmp_exp_gt (double arg1, double arg2);
>  int scalar_cmp_exp_lt (double arg1, double arg2);
> @@ -19777,6 +19781,11 @@ The significand and exponent components of the result are composed of
>  the least significant 15 bits of the @code{exponent} argument and the
>  least significant 112 bits of the @code{significand} argument respectively.
>  
> +The @code{__builtin_scalar_extract_exp_to_vec},
> +and @code{__builtin_scalar_extract_sig_to_vec} are similar to
> +@code{scalar_extract_exp}, @code{scalar_extract_sig} except they operate
> +on vector arguments.

s/__builtin_// too, the description doesn't match the implementation, the given
arguments are actually the same, but "they return vector type of value"?

> +
>  The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
>  @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in
>  functions return a non-zero value if @code{arg1} is greater than, less
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> new file mode 100644
> index 00000000000..39981f0a274
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c

In this same testsuite, we have some other test cases also testing ieee128,
but they just were named with number, so could you also name this with number
instead of "-ieee128"?

> @@ -0,0 +1,50 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-require-effective-target p9vector_hw } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */

There are some other test cases checking the expected assembly insn,
you can add an extra option "-save-temps" and use dg-final
scan-assembler-times to check just with this test case.

> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned long long int
> +get_exponents (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_scalar_extract_exp_to_vec (source);

s/__builtin_//

> +}
> +
> +int
> +main ()
> +{
> +  vector unsigned long long int result, exp_result;
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __int128  val_int128;
> +  } source;
> +  
> +  exp_result[0] = 0x0ULL;
> +  exp_result[1] = 0x1234ULL;
> +  source.val_int128 = 0x923456789ABCDEF0ULL;
> +  source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL;
> +
> +  result = get_exponents (&source.val_ieee128);
> +
> +  if ((result[0] != exp_result[0]) || (result[1] != exp_result[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +	     result[0], exp_result[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +	     result[1], exp_result[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> new file mode 100644
> index 00000000000..f7b3aedb832
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-require-effective-target p9vector_hw } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#if DEBUG
> +#include <stdio.h>
> +#endif
> +
> +vector unsigned __int128
> +get_significand (__ieee128 *p)
> +{
> +  __ieee128 source = *p;
> +
> +  return __builtin_scalar_extract_sig_to_vec(source);
> +}

Ditto.

> +
> +int
> +main ()
> +{
> +  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
> +
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     unsigned long long int val_ull[2];
> +     unsigned __int128  val_uint128;
> +     __vector unsigned __int128  val_vuint128;


Using vector instead of __vector would better match what users normally adopt.

Same for some other places.

> +  } source, result, exp_result;
> +  
> +  /* Result is not zero or denormal.  */
> +  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL;
> +  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
> +  source.val_uint128 = 0x923456789ABCDEF0ULL;
> +  source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL;
> +
> +  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or
> +     Denormal, 1 otherwise.  */
> +  result.val_vuint128 = get_significand (&source.val_ieee128);
> +
> +  if ((result.val_ull[0] != exp_result.val_ull[0])
> +      || (result.val_ull[1] != exp_result.val_ull[1]))
> +#if DEBUG
> +    {
> +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> +	     result.val_ull[0], exp_result.val_ull[0]);
> +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> +	     result.val_ull[1], exp_result.val_ull[1]);
> +    }
> +#else
> +    abort();
> +#endif
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> new file mode 100644
> index 00000000000..402dc48ad9d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c

Ditto for this case.

BR,
Kewen

> @@ -0,0 +1,91 @@
> +/* { dg-do run { target { powerpc*-*-* } } } */
> +/* { dg-require-effective-target lp64 } */
> +/* { dg-require-effective-target p9vector_hw } */
> +/* { dg-options "-mdejagnu-cpu=power9" } */
> +
> +#include <altivec.h>
> +#include <stdlib.h>
> +
> +#ifdef DEBUG
> +#include <stdio.h>
> +#endif
> +
> +__ieee128
> +insert_exponent (__vector unsigned __int128 *significand_p,
> +		 __vector unsigned long long int *exponent_p)
> +{
> +  __vector unsigned __int128 significand = *significand_p;
> +  __vector unsigned long long int exponent = *exponent_p;
> +
> +  return scalar_insert_exp (significand, exponent);
> +}
> +
> +__ieee128
> +insert_exponent2 (unsigned __int128 significand,
> +		 unsigned long long int exponent)
> +{
> +   return scalar_insert_exp (significand, exponent);
> +}
> +
> +int
> +main ()
> +{
> +  __ieee128 val_ieee128, result_ieee128, exp_result_ieee128;
> +  unsigned __int128 val_int128;
> +  unsigned long long int  val_ull;
> +  union conv128_t
> +   {
> +     __ieee128 val_ieee128;
> +     __vector unsigned __int128 val_vint128;
> +     __vector unsigned long long int  val_vull;
> +  } result, exp_result, significand;
> +
> +  __vector unsigned long long int exponent;
> +
> +  /* Scalar argument test */
> +  val_ieee128 = 0xFEDCBA9876543210ULL;
> +  val_ull = 0x5678;
> +  exp_result.val_vull[1] = 0x5678000000000000ULL;
> +  exp_result.val_vull[0] = 0xfedcba9876543210;  
> +  result_ieee128 = insert_exponent2 (val_ieee128, val_ull);
> +
> +  if (result_ieee128 != exp_result.val_ieee128)
> +#ifdef DEBUG
> +    {
> +      result.val_ieee128 = result_ieee128;
> +      printf("Scalar argument ERROR:\n");
> +      printf(" val_ieee128 = 0x%llx %llx\n",
> +	     result.val_vull[1], result.val_vull[0]);
> +      printf(" val_ieee128 = 0x%llx %llx\n",
> +	     exp_result.val_vull[1], exp_result.val_vull[0]);
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +  /* Vector argument test */
> +  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive value */
> +
> +  exponent[0] = 0x5678;
> +  exponent[1] = 0x1234;
> +
> +  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
> +  exp_result.val_vull[1] = 0x123412345678ABCDULL;
> +
> +  result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent);
> +  
> +  if (result.val_ieee128 != exp_result.val_ieee128)
> +#ifdef DEBUG
> +    {
> +      printf("Vector argument ERROR:\n");
> +      printf(" result = 0x%llx %llx\n",
> +	     result.val_vull[1], result.val_vull[0]);
> +      printf(" exp_result = 0x%llx %llx\n",
> +	     exp_result.val_vull[1], exp_result.val_vull[0]);
> +    }
> +#else
> +    abort ();
> +#endif
> +
> +}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH ver 3] rs6000: Add builtins for IEEE 128-bit floating point values
  2023-06-13  3:10         ` Kewen.Lin
@ 2023-06-14 16:17           ` Carl Love
  0 siblings, 0 replies; 9+ messages in thread
From: Carl Love @ 2023-06-14 16:17 UTC (permalink / raw)
  To: Kewen.Lin, cel
  Cc: Peter Bergner, Segher Boessenkool, gcc-patches, David Edelsohn

On Tue, 2023-06-13 at 11:10 +0800, Kewen.Lin wrote:
> Hi Carl,
> 
> on 2023/6/8 23:21, Carl Love wrote:
> > Kewen, GCC maintainers:
> > 
> > Version 3, was able to get the overloaded version of
> > scalar_insert_exp
> > to work and the change to xsxexpqp_f128_<mode> define instruction
> > to
> > work with the suggestions from Kewen.  
> > 
> > Version 2, I have addressed the various comments from Kewen.  I had
> > issues with adding an additional overloaded version of
> > scalar_insert_exp with vector arguments.  The overload
> > infrastructure
> > didn't work with a mix of scalar and vector arguments.  I did
> > rename
> > the __builtin_insertf128_exp to __builtin_vsx_scalar_insert_exp_qp
> > make
> > it similar to the existing builtin.  I also wasn't able to get the
> > suggested merge of xsxexpqp_f128_<mode> with xsxexpqp_<mode> to
> > work so
> > I left the two simpler definitiions.
> > 
> > The patch add three new builtins to extract the significand and
> > exponent of an IEEE float 128-bit value where the builtin argument
> > is a
> > vector.  Additionally, a builtin to insert the exponent into an
> > IEEE
> > float 128-bit vector argument is added.  These builtins were
> > requested
> > since there is no clean and optimal way to transfer between a
> > vector
> > and a scalar IEEE 128 bit value.
> > 
> > The patch has been tested on Power 10 with no regressions.  Please
> > let
> > me know if the patch is acceptable or not.  Thanks.
> > 
> >                Carl
> > 
> > ---------------------------------------
> > rs6000: Add builtins for IEEE 128-bit floating point values
> > 
> > Add support for the following builtins:
> > 
> >  __vector unsigned long long int
> >      __builtin_scalar_extract_exp_to_vec (__ieee128);
> >  __vector unsigned __int128
> >      __builtin_scalar_extract_sig_to_vec (__ieee128);
> >  __ieee128 scalar_insert_exp (__vector unsigned __int128,
> >  			      __vector unsigned long long);

Fixed commit log, removed __builtin_ from the names per comments from
Kewen below.
> > 
> > These builtins were requesed since there is no clean and performant
> > way to
> > transfer a value from a vector type and scalar type, despite the
> > fact
> > that they both reside in vector registers.
> > 
> > gcc/
> > 	* config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin):
> > 	Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di.
> > 	Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di.
> > 	* config/rs6000/rs6000-buildin.def (__builtin_extractf128_exp,
> > 	 __builtin_extractf128_sig, __builtin_insertf128_exp): Add new
> > 	builtin definitions.
> > 	Rename xsxexpqp_kf to xsxexpqp_kf_di.
> > 	* config/rs6000/rs6000-c.cc
> > (altivec_resolve_overloaded_builtin):
> > 	Add else if for MODE_VECTOR_INT. Update comments.
> > 	* config/rs6000/rs6000-overload.def
> > 	(__builtin_vec_scalar_insert_exp): Add new overload definition
> > with
> > 	vector arguments.
> > 	* config/vsx.md (VSEEQP_DI): New mode iterator.
> > 	Rename define_insn xsxexpqp_<mode> to
> > 	sxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>.
> > 	(xsxsigqp_f128_<mode>, xsiexpqpf_f128_<mode>): Add define_insn
> > for
> > 	new builtins.
> > 	* doc/extend.texi (__builtin_extractf128_exp,
> > 	__builtin_extractf128_sig): Add documentation for new builtins.
> > 	(scalar_insert_exp): Add new overloaded builtin definition.
> > 
> > gcc/testsuite/
> > 	* gcc.target/powerpc/bfp/extract-exp-ieee128.c: New test case.
> > 	* gcc.target/powerpc/bfp/extract-sig-ieee128.c: New test case.
> > 	* gcc.target/powerpc/bfp/insert-exp-ieee128.c: New test case.
> > ---
> >  gcc/config/rs6000/rs6000-builtin.cc           |  4 +-
> >  gcc/config/rs6000/rs6000-builtins.def         | 11 ++-
> >  gcc/config/rs6000/rs6000-c.cc                 | 10 +-
> >  gcc/config/rs6000/rs6000-overload.def         |  2 +
> >  gcc/config/rs6000/vsx.md                      | 28 +++++-
> >  gcc/doc/extend.texi                           |  9 ++
> >  .../powerpc/bfp/extract-exp-ieee128.c         | 50 ++++++++++
> >  .../powerpc/bfp/extract-sig-ieee128.c         | 57 ++++++++++++
> >  .../powerpc/bfp/insert-exp-ieee128.c          | 91
> > +++++++++++++++++++
> >  9 files changed, 253 insertions(+), 9 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
> > exp-ieee128.c
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/extract-
> > sig-ieee128.c
> >  create mode 100644 gcc/testsuite/gcc.target/powerpc/bfp/insert-
> > exp-ieee128.c
> > 
> > diff --git a/gcc/config/rs6000/rs6000-builtin.cc
> > b/gcc/config/rs6000/rs6000-builtin.cc
> > index 534698e7d3e..d99f0ae5dda 100644
> > --- a/gcc/config/rs6000/rs6000-builtin.cc
> > +++ b/gcc/config/rs6000/rs6000-builtin.cc
> > @@ -3326,8 +3326,8 @@ rs6000_expand_builtin (tree exp, rtx target,
> > rtx /* subtarget */,
> >        case CODE_FOR_fmakf4_odd:
> >  	icode = CODE_FOR_fmatf4_odd;
> >  	break;
> > -      case CODE_FOR_xsxexpqp_kf:
> > -	icode = CODE_FOR_xsxexpqp_tf;
> > +      case CODE_FOR_xsxexpqp_kf_di:
> > +	icode = CODE_FOR_xsxexpqp_tf_di;
> 
> The others newly added such as CODE_FOR_xsxexpqp_kf_v2di should be
> handled here as well.

OK, added all of the new cases.

> 
> >  	break;
> >        case CODE_FOR_xsxsigqp_kf:
> >  	icode = CODE_FOR_xsxsigqp_tf;
> > diff --git a/gcc/config/rs6000/rs6000-builtins.def
> > b/gcc/config/rs6000/rs6000-builtins.def
> > index 638d0bc72ca..dcd4a393906 100644
> > --- a/gcc/config/rs6000/rs6000-builtins.def
> > +++ b/gcc/config/rs6000/rs6000-builtins.def
> > @@ -2901,8 +2901,14 @@
> >    fpmath double __builtin_truncf128_round_to_odd (_Float128);
> >      TRUNCF128_ODD trunckfdf2_odd {}
> >  
> > +  vull __builtin_scalar_extract_exp_to_vec (_Float128);
> > +    EEXPKF xsxexpqp_kf_v2di {}
> > +
> > +  vuq __builtin_scalar_extract_sig_to_vec (_Float128);
> > +    ESIGKF xsxsigqp_f128_kf {}
> > +
> >    const signed long long __builtin_vsx_scalar_extract_expq
> > (_Float128);
> > -    VSEEQP xsxexpqp_kf {}
> > +    VSEEQP xsxexpqp_kf_di {}
> >  
> >    const signed __int128 __builtin_vsx_scalar_extract_sigq
> > (_Float128);
> >      VSESQP xsxsigqp_kf {}
> > @@ -2915,6 +2921,9 @@
> >                                                        unsigned
> > long long);
> >      VSIEQPF xsiexpqpf_kf {}
> >  
> > +  const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull);
> > +    VSIEQPV xsiexpqpf_f128_kf {}
> > +
> >    const signed int __builtin_vsx_scalar_test_data_class_qp
> > (_Float128, \
> >                                                              const
> > int<7>);
> >      VSTDCQP xststdcqp_kf {}
> > diff --git a/gcc/config/rs6000/rs6000-c.cc
> > b/gcc/config/rs6000/rs6000-c.cc
> > index 8555174d36e..11060f697db 100644
> > --- a/gcc/config/rs6000/rs6000-c.cc
> > +++ b/gcc/config/rs6000/rs6000-c.cc
> > @@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin
> > (location_t loc, tree fndecl,
> >  	   128-bit variant of built-in function.  */
> >  	if (GET_MODE_PRECISION (arg1_mode) > 64)
> >  	  {
> > -	    /* If first argument is of float variety, choose variant
> > -	       that expects __ieee128 argument.  Otherwise, expect
> > -	       __int128 argument.  */
> > +	    /* If first argument is of float variety, choose the
> > variant that
> > +	       expects __ieee128 argument.  If the first argument is
> > vector
> > +	       int, choose the variant that expects vector unsigned
> > +	       __int128 argument.  Otherwise, expect scalar __int128
> > argument.
> > +	    */
> >  	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
> >  	      instance_code = RS6000_BIF_VSIEQPF;
> > +	    else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT)
> > +	      instance_code = RS6000_BIF_VSIEQPV;
> >  	    else
> >  	      instance_code = RS6000_BIF_VSIEQP;
> >  	  }
> > diff --git a/gcc/config/rs6000/rs6000-overload.def
> > b/gcc/config/rs6000/rs6000-overload.def
> > index c582490c084..84743114c8e 100644
> > --- a/gcc/config/rs6000/rs6000-overload.def
> > +++ b/gcc/config/rs6000/rs6000-overload.def
> > @@ -4515,6 +4515,8 @@
> >      VSIEQP
> >    _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned
> > long long);
> >      VSIEQPF
> > +  _Float128 __builtin_vec_scalar_insert_exp (vuq, vull);
> > +    VSIEQPV
> >  
> >  [VEC_VSTDC, scalar_test_data_class,
> > __builtin_vec_scalar_test_data_class]
> >    unsigned int __builtin_vec_scalar_test_data_class (float, const
> > int);
> > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> > index 7d845df5c2d..92a34dec1be 100644
> > --- a/gcc/config/rs6000/vsx.md
> > +++ b/gcc/config/rs6000/vsx.md
> > @@ -396,6 +396,7 @@
> >  			   V4SF
> >  			   V2DF
> >  			   V2DI])
> > +(define_mode_iterator VSEEQP_DI  [V2DI DI])
> >  
> >  (define_mode_attr VM3_char [(V2DI "d")
> >  			   (V4SI "w")
> > @@ -5008,9 +5009,10 @@
> >  ;; ISA 3.0 Binary Floating-Point Support
> >  
> >  ;; VSX Scalar Extract Exponent Quad-Precision
> > -(define_insn "xsxexpqp_<mode>"
> > -  [(set (match_operand:DI 0 "altivec_register_operand" "=v")
> > -	(unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand"
> > "v")]
> > +(define_insn "xsxexpqp_<IEEE128:mode>_<VSEEQP_DI:mode>"
> > +  [(set (match_operand:VSEEQP_DI 0 "altivec_register_operand"
> > "=v")
> > +	(unspec:VSEEQP_DI
> > +	  [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
> >  	 UNSPEC_VSX_SXEXPDP))]
> >    "TARGET_P9_VECTOR"
> >    "xsxexpqp %0,%1"
> > @@ -5034,6 +5036,15 @@
> >    "xsxsigqp %0,%1"
> >    [(set_attr "type" "vecmove")])
> >  
> > +;; VSX Scalar to Vector Extract Significand IEEE 128-bit floating
> > point format
> > +(define_insn "xsxsigqp_f128_<mode>"
> > +  [(set (match_operand:V1TI 0 "altivec_register_operand" "=v")
> > +	(unspec:V1TI [(match_operand:IEEE128 1
> > "altivec_register_operand" "v")]
> > +	 UNSPEC_VSX_SXSIG))]
> > +  "TARGET_P9_VECTOR"
> > +  "xsxsigqp %0,%1"
> > +  [(set_attr "type" "vecmove")])
> 
> Like above xsxexpqp_<mode>, you can also simplify this with existing
> xsxsigqp_<mode>,
> the bif pattern should be updated accordingly.

Fixed

> 
> > +
> >  ;; VSX Scalar Extract Significand Double-Precision
> >  (define_insn "xsxsigdp"
> >    [(set (match_operand:DI 0 "register_operand" "=r")
> > @@ -5054,6 +5065,17 @@
> >    "xsiexpqp %0,%1,%2"
> >    [(set_attr "type" "vecmove")])
> >  
> > +;; VSX Insert Exponent IEEE 128-bit Floating point format
> > +(define_insn "xsiexpqpf_f128_<mode>"
> > +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > +	(unspec:IEEE128
> > +	 [(match_operand:V1TI 1 "altivec_register_operand" "v")
> > +	  (match_operand:V2DI 2 "altivec_register_operand" "v")]
> > +	 UNSPEC_VSX_SIEXPQP))]
> > +  "TARGET_P9_VECTOR"
> > +  "xsiexpqp %0,%1,%2"
> > +  [(set_attr "type" "vecmove")])
> 
> Ditto, with existing xsiexpqp_<mode>.

Fixed.

> 
> > +
> >  ;; VSX Scalar Insert Exponent Quad-Precision
> >  (define_insn "xsiexpqp_<mode>"
> >    [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index e426a2eb7d8..625df24b62f 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -19724,6 +19724,10 @@ double scalar_insert_exp (double
> > significand, unsigned long long int exponent);
> >  ieee_128 scalar_insert_exp (unsigned __int128 significand,
> >                              unsigned long long int exponent);
> >  ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long
> > long int exponent);
> > +vector ieee_128 scalar_insert_exp (vector unsigned __int128,
> > +                                   vector unsigned long long);
> > +vector unsigned long long __builtin_scalar_extract_exp_to_vec
> > (ieee_128);
> > +vector unsigned __int128  __builtin_scalar_extract_sig_to_vec
> > (ieee_128);
> 
> s/__builtin_// to align with the others.

Renamed the builtins as requested.  Fixed the names thru out the patch.

> 
> >  
> >  int scalar_cmp_exp_gt (double arg1, double arg2);
> >  int scalar_cmp_exp_lt (double arg1, double arg2);
> > @@ -19777,6 +19781,11 @@ The significand and exponent components of
> > the result are composed of
> >  the least significant 15 bits of the @code{exponent} argument and
> > the
> >  least significant 112 bits of the @code{significand} argument
> > respectively.
> >  
> > +The @code{__builtin_scalar_extract_exp_to_vec},
> > +and @code{__builtin_scalar_extract_sig_to_vec} are similar to
> > +@code{scalar_extract_exp}, @code{scalar_extract_sig} except they
> > operate
> > +on vector arguments.
> 
> s/__builtin_// too, the description doesn't match the implementation,
> the given
> arguments are actually the same, but "they return vector type of
> value"?

Fixed up the description.

> 
> > +
> >  The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
> >  @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered}
> > built-in
> >  functions return a non-zero value if @code{arg1} is greater than,
> > less
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..39981f0a274
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-exp-ieee128.c
> 
> In this same testsuite, we have some other test cases also testing
> ieee128,
> but they just were named with number, so could you also name this
> with number
> instead of "-ieee128"?

Renamed the three tests replacing -ieee128 with -1.

> 
> > @@ -0,0 +1,50 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +/* { dg-require-effective-target lp64 } */
> > +/* { dg-require-effective-target p9vector_hw } */
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> 
> There are some other test cases checking the expected assembly insn,
> you can add an extra option "-save-temps" and use dg-final
> scan-assembler-times to check just with this test case.
> 

Added check for the generated instruction, did this in all three new
test files.

> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#if DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +vector unsigned long long int
> > +get_exponents (__ieee128 *p)
> > +{
> > +  __ieee128 source = *p;
> > +
> > +  return __builtin_scalar_extract_exp_to_vec (source);
> 
> s/__builtin_//

Fixed

> 
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  vector unsigned long long int result, exp_result;
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     __int128  val_int128;
> > +  } source;
> > +  
> > +  exp_result[0] = 0x0ULL;
> > +  exp_result[1] = 0x1234ULL;
> > +  source.val_int128 = 0x923456789ABCDEF0ULL;
> > +  source.val_int128 = (source.val_int128 << 64) |
> > 0x123456789ABCDEFULL;
> > +
> > +  result = get_exponents (&source.val_ieee128);
> > +
> > +  if ((result[0] != exp_result[0]) || (result[1] !=
> > exp_result[1]))
> > +#if DEBUG
> > +    {
> > +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> > +	     result[0], exp_result[0]);
> > +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> > +	     result[1], exp_result[1]);
> > +    }
> > +#else
> > +    abort();
> > +#endif
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..f7b3aedb832
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/extract-sig-ieee128.c
> > @@ -0,0 +1,57 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +/* { dg-require-effective-target lp64 } */
> > +/* { dg-require-effective-target p9vector_hw } */
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#if DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +vector unsigned __int128
> > +get_significand (__ieee128 *p)
> > +{
> > +  __ieee128 source = *p;
> > +
> > +  return __builtin_scalar_extract_sig_to_vec(source);
> > +}
> 
> Ditto.

Fixed.

> 
> > +
> > +int
> > +main ()
> > +{
> > +  #define NOT_ZERO_OR_DENORMAL  0x1000000000000
> > +
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     unsigned long long int val_ull[2];
> > +     unsigned __int128  val_uint128;
> > +     __vector unsigned __int128  val_vuint128;
> 
> Using vector instead of __vector would better match what users
> normally adopt.
> 

Changed __vector to vector in all new test files.

> Same for some other places.
> 
> > +  } source, result, exp_result;
> > +  
> > +  /* Result is not zero or denormal.  */
> > +  exp_result.val_ull[1] = 0x00056789ABCDEF0ULL |
> > NOT_ZERO_OR_DENORMAL;
> > +  exp_result.val_ull[0] = 0x123456789ABCDEFULL;
> > +  source.val_uint128 = 0x923456789ABCDEF0ULL;
> > +  source.val_uint128 = (source.val_uint128 << 64) |
> > 0x123456789ABCDEFULL;
> > +
> > +  /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was
> > zero or
> > +     Denormal, 1 otherwise.  */
> > +  result.val_vuint128 = get_significand (&source.val_ieee128);
> > +
> > +  if ((result.val_ull[0] != exp_result.val_ull[0])
> > +      || (result.val_ull[1] != exp_result.val_ull[1]))
> > +#if DEBUG
> > +    {
> > +      printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n",
> > +	     result.val_ull[0], exp_result.val_ull[0]);
> > +      printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n",
> > +	     result.val_ull[1], exp_result.val_ull[1]);
> > +    }
> > +#else
> > +    abort();
> > +#endif
> > +  return 0;
> > +}
> > diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-
> > ieee128.c b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-
> > ieee128.c
> > new file mode 100644
> > index 00000000000..402dc48ad9d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/bfp/insert-exp-ieee128.c
> 
> Ditto for this case.

Fixed.

> 
> BR,
> Kewen
> 
> > @@ -0,0 +1,91 @@
> > +/* { dg-do run { target { powerpc*-*-* } } } */
> > +/* { dg-require-effective-target lp64 } */
> > +/* { dg-require-effective-target p9vector_hw } */
> > +/* { dg-options "-mdejagnu-cpu=power9" } */
> > +
> > +#include <altivec.h>
> > +#include <stdlib.h>
> > +
> > +#ifdef DEBUG
> > +#include <stdio.h>
> > +#endif
> > +
> > +__ieee128
> > +insert_exponent (__vector unsigned __int128 *significand_p,
> > +		 __vector unsigned long long int *exponent_p)
> > +{
> > +  __vector unsigned __int128 significand = *significand_p;
> > +  __vector unsigned long long int exponent = *exponent_p;
> > +
> > +  return scalar_insert_exp (significand, exponent);
> > +}
> > +
> > +__ieee128
> > +insert_exponent2 (unsigned __int128 significand,
> > +		 unsigned long long int exponent)
> > +{
> > +   return scalar_insert_exp (significand, exponent);
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  __ieee128 val_ieee128, result_ieee128, exp_result_ieee128;
> > +  unsigned __int128 val_int128;
> > +  unsigned long long int  val_ull;
> > +  union conv128_t
> > +   {
> > +     __ieee128 val_ieee128;
> > +     __vector unsigned __int128 val_vint128;
> > +     __vector unsigned long long int  val_vull;
> > +  } result, exp_result, significand;
> > +
> > +  __vector unsigned long long int exponent;
> > +
> > +  /* Scalar argument test */
> > +  val_ieee128 = 0xFEDCBA9876543210ULL;
> > +  val_ull = 0x5678;
> > +  exp_result.val_vull[1] = 0x5678000000000000ULL;
> > +  exp_result.val_vull[0] = 0xfedcba9876543210;  
> > +  result_ieee128 = insert_exponent2 (val_ieee128, val_ull);
> > +
> > +  if (result_ieee128 != exp_result.val_ieee128)
> > +#ifdef DEBUG
> > +    {
> > +      result.val_ieee128 = result_ieee128;
> > +      printf("Scalar argument ERROR:\n");
> > +      printf(" val_ieee128 = 0x%llx %llx\n",
> > +	     result.val_vull[1], result.val_vull[0]);
> > +      printf(" val_ieee128 = 0x%llx %llx\n",
> > +	     exp_result.val_vull[1], exp_result.val_vull[0]);
> > +    }
> > +#else
> > +    abort ();
> > +#endif
> > +
> > +  /* Vector argument test */
> > +  significand.val_vull[0] = 0xFEDCBA9876543210ULL;
> > +  significand.val_vull[1] = 0x7FFF12345678ABCDULL;  /* positive
> > value */
> > +
> > +  exponent[0] = 0x5678;
> > +  exponent[1] = 0x1234;
> > +
> > +  exp_result.val_vull[0] = 0xFEDCBA9876543210ULL;
> > +  exp_result.val_vull[1] = 0x123412345678ABCDULL;
> > +
> > +  result.val_ieee128 = insert_exponent(&significand.val_vint128,
> > &exponent);
> > +  
> > +  if (result.val_ieee128 != exp_result.val_ieee128)
> > +#ifdef DEBUG
> > +    {
> > +      printf("Vector argument ERROR:\n");
> > +      printf(" result = 0x%llx %llx\n",
> > +	     result.val_vull[1], result.val_vull[0]);
> > +      printf(" exp_result = 0x%llx %llx\n",
> > +	     exp_result.val_vull[1], exp_result.val_vull[0]);
> > +    }
> > +#else
> > +    abort ();
> > +#endif
> > +
> > +}


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-06-14 16:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-02 15:52 [PATCH] rs6000: Add builtins for IEEE 128-bit floating point values Carl Love
2023-06-05  8:45 ` Kewen.Lin
2023-06-06 19:54   ` [PATCH ver 2] " Carl Love
2023-06-06 19:54   ` [PATCH] " Carl Love
2023-06-07  9:36     ` Kewen.Lin
2023-06-08 15:21       ` Carl Love
2023-06-08 15:21       ` [PATCH ver 3] " Carl Love
2023-06-13  3:10         ` Kewen.Lin
2023-06-14 16:17           ` Carl Love

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).