From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1983) id 47F083858D1E; Tue, 20 Jun 2023 15:42:51 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 47F083858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1687275771; bh=N7uh2VcVk3/2k8AE0/xRgBt+vRXoKZ6/M8F3Zw9iVbw=; h=From:To:Subject:Date:From; b=CAwNpbCESn4WeSQpZx9X+VuM+5wMQdd0RTuCyrFyQYPrQiOqrhg6A3UwS46d/C3Yx 3TkgPvaOW0YErB94QFj0zVbKMPzzX6oslixasfJ5AoMeWl20fv7akGSZR+yJdj1JMI qXaU6fhTPt8IlprSpueI3hZ0AjLEgj0saKwmEzM0= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Carl Love To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-1996] rs6000: Add builtins for IEEE 128-bit floating point values X-Act-Checkin: gcc X-Git-Author: Carl Love X-Git-Refname: refs/heads/master X-Git-Oldrev: b4f1e4a644380282f1e873f9ab2ebb2941ca34ce X-Git-Newrev: 86df278de15b4a51d6cdb0e8922c2d05adfb64a4 Message-Id: <20230620154251.47F083858D1E@sourceware.org> Date: Tue, 20 Jun 2023 15:42:51 +0000 (GMT) List-Id: https://gcc.gnu.org/g:86df278de15b4a51d6cdb0e8922c2d05adfb64a4 commit r14-1996-g86df278de15b4a51d6cdb0e8922c2d05adfb64a4 Author: Carl Love Date: Tue Jun 20 11:40:30 2023 -0400 rs6000: Add builtins for IEEE 128-bit floating point values Add support for the following builtins: __vector unsigned long long int scalar_extract_exp_to_vec (__ieee128); __vector unsigned __int128 scalar_extract_sig_to_vec (__ieee128); __ieee128 scalar_insert_exp (__vector unsigned __int128, __vector unsigned long long); The instructions used in the builtins operate on vector registers. Thus the result must be moved to a scalar type. There is no clean, performant way to do this. The user code typically needs the result as a vector anyway. gcc/ * config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): Rename CODE_FOR_xsxsigqp_tf to CODE_FOR_xsxsigqp_tf_ti. Rename CODE_FOR_xsxsigqp_kf to CODE_FOR_xsxsigqp_kf_ti. Rename CCDE_FOR_xsxexpqp_tf to CODE_FOR_xsxexpqp_tf_di. Rename CODE_FOR_xsxexpqp_kf to CODE_FOR_xsxexpqp_kf_di. (CODE_FOR_xsxexpqp_kf_v2di, CODE_FOR_xsxsigqp_kf_v1ti, CODE_FOR_xsiexpqp_kf_v2di): Add case statements. * config/rs6000/rs6000-builtins.def (__builtin_vsx_scalar_extract_exp_to_vec, __builtin_vsx_scalar_extract_sig_to_vec, __builtin_vsx_scalar_insert_exp_vqp): Add new builtin definitions. Rename xsxexpqp_kf, xsxsigqp_kf, xsiexpqp_kf to xsexpqp_kf_di, xsxsigqp_kf_ti, xsiexpqp_kf_di respectively. * config/rs6000/rs6000-c.cc (altivec_resolve_overloaded_builtin): Update case RS6000_OVLD_VEC_VSIE to handle MODE_VECTOR_INT for new overloaded instance. Update comments. * config/rs6000/rs6000-overload.def (__builtin_vec_scalar_insert_exp): Add new overload definition with vector arguments. (scalar_extract_exp_to_vec, scalar_extract_sig_to_vec): New overloaded definitions. * config/rs6000/vsx.md (V2DI_DI): New mode iterator. (DI_to_TI): New mode attribute. Rename xsxexpqp_ to sxexpqp__. Rename xsxsigqp_ to xsxsigqp__. Rename xsiexpqp_ to xsiexpqp__. * doc/extend.texi (scalar_extract_exp_to_vec, scalar_extract_sig_to_vec): Add documentation for new builtins. (scalar_insert_exp): Add new overloaded builtin definition. gcc/testsuite/ * gcc.target/powerpc/bfp/scalar-extract-exp-8.c: New test case. * gcc.target/powerpc/bfp/scalar-extract-sig-8.c: New test case. * gcc.target/powerpc/bfp/scalar-insert-exp-16.c: New test case. Diff: --- gcc/config/rs6000/rs6000-builtin.cc | 21 +++-- gcc/config/rs6000/rs6000-builtins.def | 15 ++- gcc/config/rs6000/rs6000-c.cc | 10 +- gcc/config/rs6000/rs6000-overload.def | 12 +++ gcc/config/rs6000/vsx.md | 25 +++-- gcc/doc/extend.texi | 24 ++++- .../gcc.target/powerpc/bfp/scalar-extract-exp-8.c | 58 ++++++++++++ .../gcc.target/powerpc/bfp/scalar-extract-sig-8.c | 65 +++++++++++++ .../gcc.target/powerpc/bfp/scalar-insert-exp-16.c | 103 +++++++++++++++++++++ 9 files changed, 307 insertions(+), 26 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 534698e7d3e..a8f291c6a72 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -3326,17 +3326,26 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break; - case CODE_FOR_xsxexpqp_kf: - icode = CODE_FOR_xsxexpqp_tf; + case CODE_FOR_xsxexpqp_kf_di: + icode = CODE_FOR_xsxexpqp_tf_di; break; - case CODE_FOR_xsxsigqp_kf: - icode = CODE_FOR_xsxsigqp_tf; + case CODE_FOR_xsxexpqp_kf_v2di: + icode = CODE_FOR_xsxexpqp_tf_v2di; + break; + case CODE_FOR_xsxsigqp_kf_ti: + icode = CODE_FOR_xsxsigqp_tf_ti; + break; + case CODE_FOR_xsxsigqp_kf_v1ti: + icode = CODE_FOR_xsxsigqp_tf_v1ti; break; case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break; - case CODE_FOR_xsiexpqp_kf: - icode = CODE_FOR_xsiexpqp_tf; + case CODE_FOR_xsiexpqp_kf_di: + icode = CODE_FOR_xsiexpqp_tf_di; + break; + case CODE_FOR_xsiexpqp_kf_v2di: + icode = CODE_FOR_xsiexpqp_tf_v2di; break; case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 5ac6af4c6e3..1062cd414d4 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2901,19 +2901,28 @@ TRUNCF128_ODD trunckfdf2_odd {} const signed long long __builtin_vsx_scalar_extract_expq (_Float128); - VSEEQP xsxexpqp_kf {} + VSEEQP xsxexpqp_kf_di {} + + vull __builtin_vsx_scalar_extract_exp_to_vec (_Float128); + VSEEQPV xsxexpqp_kf_v2di {} const signed __int128 __builtin_vsx_scalar_extract_sigq (_Float128); - VSESQP xsxsigqp_kf {} + VSESQP xsxsigqp_kf_ti {} + + vuq __builtin_vsx_scalar_extract_sig_to_vec (_Float128); + VSESQPV xsxsigqp_kf_v1ti {} const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, \ unsigned long long); - VSIEQP xsiexpqp_kf {} + VSIEQP xsiexpqp_kf_di {} const _Float128 __builtin_vsx_scalar_insert_exp_qp (_Float128, \ unsigned long long); VSIEQPF xsiexpqpf_kf {} + const _Float128 __builtin_vsx_scalar_insert_exp_vqp (vuq, vull); + VSIEQPV xsiexpqp_kf_v2di {} + const signed int __builtin_vsx_scalar_test_data_class_qp (_Float128, \ const int<7>); VSTDCQP xststdcqp_kf {} diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 8555174d36e..11060f697db 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -1929,11 +1929,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, 128-bit variant of built-in function. */ if (GET_MODE_PRECISION (arg1_mode) > 64) { - /* If first argument is of float variety, choose variant - that expects __ieee128 argument. Otherwise, expect - __int128 argument. */ + /* If first argument is of float variety, choose the variant that + expects __ieee128 argument. If the first argument is vector + int, choose the variant that expects vector unsigned + __int128 argument. Otherwise, expect scalar __int128 argument. + */ if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT) instance_code = RS6000_BIF_VSIEQPF; + else if (GET_MODE_CLASS (arg1_mode) == MODE_VECTOR_INT) + instance_code = RS6000_BIF_VSIEQPV; else instance_code = RS6000_BIF_VSIEQP; } diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index c582490c084..470d718efde 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -4515,6 +4515,18 @@ VSIEQP _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long); VSIEQPF + _Float128 __builtin_vec_scalar_insert_exp (vuq, vull); + VSIEQPV + +[VEC_VSEEV, scalar_extract_exp_to_vec, \ + __builtin_vec_scalar_extract_exp_to_vector] + vull __builtin_vec_scalar_extract_exp_to_vector (_Float128); + VSEEQPV + +[VEC_VSESV, scalar_extract_sig_to_vec, \ + __builtin_vec_scalar_extract_sig_to_vector] + vuq __builtin_vec_scalar_extract_sig_to_vector (_Float128); + VSESQPV [VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class] unsigned int __builtin_vec_scalar_test_data_class (float, const int); diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0a34ceebeb5..e01738127ed 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -396,6 +396,9 @@ V4SF V2DF V2DI]) +(define_mode_iterator V2DI_DI [V2DI DI]) +(define_mode_attr DI_to_TI [(V2DI "V1TI") + (DI "TI")]) (define_mode_attr VM3_char [(V2DI "d") (V4SI "w") @@ -5008,9 +5011,10 @@ ;; ISA 3.0 Binary Floating-Point Support ;; VSX Scalar Extract Exponent Quad-Precision -(define_insn "xsxexpqp_" - [(set (match_operand:DI 0 "altivec_register_operand" "=v") - (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] +(define_insn "xsxexpqp__" + [(set (match_operand:V2DI_DI 0 "altivec_register_operand" "=v") + (unspec:V2DI_DI + [(match_operand:IEEE128 1 "altivec_register_operand" "v")] UNSPEC_VSX_SXEXPDP))] "TARGET_P9_VECTOR" "xsxexpqp %0,%1" @@ -5026,9 +5030,10 @@ [(set_attr "type" "integer")]) ;; VSX Scalar Extract Significand Quad-Precision -(define_insn "xsxsigqp_" - [(set (match_operand:TI 0 "altivec_register_operand" "=v") - (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] +(define_insn "xsxsigqp__" + [(set (match_operand:VEC_TI 0 "altivec_register_operand" "=v") + (unspec:VEC_TI [(match_operand:IEEE128 1 + "altivec_register_operand" "v")] UNSPEC_VSX_SXSIG))] "TARGET_P9_VECTOR" "xsxsigqp %0,%1" @@ -5055,10 +5060,12 @@ [(set_attr "type" "vecmove")]) ;; VSX Scalar Insert Exponent Quad-Precision -(define_insn "xsiexpqp_" +(define_insn "xsiexpqp__" [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") - (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") - (match_operand:DI 2 "altivec_register_operand" "v")] + (unspec:IEEE128 [(match_operand: 1 + "altivec_register_operand" "v") + (match_operand:V2DI_DI 2 + "altivec_register_operand" "v")] UNSPEC_VSX_SIEXPQP))] "TARGET_P9_VECTOR" "xsiexpqp %0,%1,%2" diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index c01cd3fe90c..3040a9bdea6 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -19807,6 +19807,10 @@ double scalar_insert_exp (double significand, unsigned long long int exponent); ieee_128 scalar_insert_exp (unsigned __int128 significand, unsigned long long int exponent); ieee_128 scalar_insert_exp (ieee_128 significand, unsigned long long int exponent); +vector ieee_128 scalar_insert_exp (vector unsigned __int128 significand, + vector unsigned long long exponent); +vector unsigned long long scalar_extract_exp_to_vec (ieee_128); +vector unsigned __int128 scalar_extract_sig_to_vec (ieee_128); int scalar_cmp_exp_gt (double arg1, double arg2); int scalar_cmp_exp_lt (double arg1, double arg2); @@ -19857,11 +19861,21 @@ of the result are composed of the least significant 11 bits of the When supplied with a 128-bit first argument, the @code{scalar_insert_exp} built-in function returns a quad-precision -ieee floating point value. The sign bit of the result is copied from -the most significant bit of the @code{significand} argument. -The significand and exponent components of the result are composed of -the least significant 15 bits of the @code{exponent} argument and the -least significant 112 bits of the @code{significand} argument respectively. +IEEE floating point value if the two arguments were scalar. If the two +arguments are vectors, the return value is a vector IEEE floating point value. +The sign bit of the result is copied from the most significant bit of the +@code{significand} argument. The significand and exponent components of the +result are composed of the least significant 15 bits of the @code{exponent} +argument (element 0 on big-endian and element 1 on little-endian) and the +least significant 112 bits of the @code{significand} argument +respectively. Note, the @code{significand} is the scalar argument or in the +case of vector arguments, @code{significand} is element 0 for big-endian and +element 1 for little-endian. + +The @code{scalar_extract_exp_to_vec}, +and @code{scalar_extract_sig_to_vec} are similar to +@code{scalar_extract_exp}, @code{scalar_extract_sig} except they return +a vector result of type unsigned long long and unsigned __int128 respectively. The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt}, @code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-8.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-8.c new file mode 100644 index 00000000000..eedcfcdac96 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-8.c @@ -0,0 +1,58 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9 -save-temps" } */ + +#include +#include + +#if DEBUG +#include +#endif + +vector unsigned long long int +get_exponents (__ieee128 *p) +{ + __ieee128 source = *p; + + return scalar_extract_exp_to_vec (source); +} + +int +main () +{ + vector unsigned long long int result, exp_result; + union conv128_t + { + __ieee128 val_ieee128; + __int128 val_int128; + } source; + +#ifdef _BIG_ENDIAN + exp_result[1] = 0x0ULL; + exp_result[0] = 0x1234ULL; +#else + exp_result[0] = 0x0ULL; + exp_result[1] = 0x1234ULL; +#endif + source.val_int128 = 0x923456789ABCDEF0ULL; + source.val_int128 = (source.val_int128 << 64) | 0x123456789ABCDEFULL; + + result = get_exponents (&source.val_ieee128); + + if ((result[0] != exp_result[0]) || (result[1] != exp_result[1])) +#if DEBUG + { + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", + result[0], exp_result[0]); + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", + result[1], exp_result[1]); + } +#else + abort(); +#endif + return 0; +} + +/* Check that the expected extract exponent instruction is generated. */ +/* { dg-final { scan-assembler-times {\mxsxexpqp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-8.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-8.c new file mode 100644 index 00000000000..69a908a53c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-8.c @@ -0,0 +1,65 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9 -save-temps" } */ + +#include +#include + +#if DEBUG +#include +#endif + +vector unsigned __int128 +get_significand (__ieee128 *p) +{ + __ieee128 source = *p; + + return scalar_extract_sig_to_vec(source); +} + +int +main () +{ + #define NOT_ZERO_OR_DENORMAL 0x1000000000000 + + union conv128_t + { + __ieee128 val_ieee128; + unsigned long long int val_ull[2]; + unsigned __int128 val_uint128; + vector unsigned __int128 val_vuint128; + } source, result, exp_result; + + /* Result is not zero or denormal. */ +#ifdef _BIG_ENDIAN + exp_result.val_ull[0] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL; + exp_result.val_ull[1] = 0x123456789ABCDEFULL; +#else + exp_result.val_ull[1] = 0x00056789ABCDEF0ULL | NOT_ZERO_OR_DENORMAL; + exp_result.val_ull[0] = 0x123456789ABCDEFULL; +#endif + source.val_uint128 = 0x923456789ABCDEF0ULL; + source.val_uint128 = (source.val_uint128 << 64) | 0x123456789ABCDEFULL; + + /* Note, bits[0:14] are set to 0, bit[15] is 0 if the input was zero or + Denormal, 1 otherwise. */ + result.val_vuint128 = get_significand (&source.val_ieee128); + + if ((result.val_ull[0] != exp_result.val_ull[0]) + || (result.val_ull[1] != exp_result.val_ull[1])) +#if DEBUG + { + printf("result[0] = 0x%llx; exp_result[0] = 0x%llx\n", + result.val_ull[0], exp_result.val_ull[0]); + printf("result[1] = 0x%llx; exp_result[1] = 0x%llx\n", + result.val_ull[1], exp_result.val_ull[1]); + } +#else + abort(); +#endif + return 0; +} + +/* Check that the expected extract significand instruction is generated. */ +/* { dg-final { scan-assembler-times {\mxsxsigqp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c new file mode 100644 index 00000000000..f0e03c5173d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-insert-exp-16.c @@ -0,0 +1,103 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target p9vector_hw } */ +/* { dg-options "-mdejagnu-cpu=power9 -save-temps" } */ + +#include +#include + +#ifdef DEBUG +#include +#endif + +__ieee128 +insert_exponent (vector unsigned __int128 *significand_p, + vector unsigned long long int *exponent_p) +{ + vector unsigned __int128 significand = *significand_p; + vector unsigned long long int exponent = *exponent_p; + + return scalar_insert_exp (significand, exponent); +} + +__ieee128 +insert_exponent2 (unsigned __int128 significand, + unsigned long long int exponent) +{ + return scalar_insert_exp (significand, exponent); +} + +int +main () +{ + __ieee128 val_ieee128, result_ieee128, exp_result_ieee128; + unsigned __int128 val_int128; + unsigned long long int val_ull; + union conv128_t + { + __ieee128 val_ieee128; + vector unsigned __int128 val_vint128; + vector unsigned long long int val_vull; + } result, exp_result, significand; + + vector unsigned long long int exponent; + + /* Scalar argument test */ + val_ieee128 = 0xFEDCBA9876543210ULL; + val_ull = 0x5678; +#ifdef _BIG_ENDIAN + exp_result.val_vull[1] = 0xfedcba9876543210; + exp_result.val_vull[0] = 0x5678000000000000ULL; +#else + exp_result.val_vull[0] = 0xfedcba9876543210; + exp_result.val_vull[1] = 0x5678000000000000ULL; +#endif + result_ieee128 = insert_exponent2 (val_ieee128, val_ull); + + if (result_ieee128 != exp_result.val_ieee128) +#ifdef DEBUG + { + result.val_ieee128 = result_ieee128; + printf("Scalar argument ERROR:\n"); + printf(" val_ieee128 = 0x%llx %llx\n", + result.val_vull[1], result.val_vull[0]); + printf(" exp_val_ieee128 = 0x%llx %llx\n", + exp_result.val_vull[1], exp_result.val_vull[0]); + } +#else + abort (); +#endif + + /* Vector argument test */ + significand.val_vull[0] = 0xFEDCBA9876543210ULL; + significand.val_vull[1] = 0x7FFF12345678ABCDULL; /* positive value */ + + exponent[0] = 0x5678; + exponent[1] = 0x1234; + +#ifdef _BIG_ENDIAN + exp_result.val_vull[0] = 0xD678BA9876543210ULL; + exp_result.val_vull[1] = 0x7FFF12345678ABCDULL; +#else + exp_result.val_vull[0] = 0xFEDCBA9876543210ULL; + exp_result.val_vull[1] = 0x123412345678ABCDULL; +#endif + result.val_ieee128 = insert_exponent(&significand.val_vint128, &exponent); + + if (result.val_ieee128 != exp_result.val_ieee128) +#ifdef DEBUG + { + printf("Vector argument ERROR:\n"); + printf(" result = 0x%llx %llx\n", + result.val_vull[1], result.val_vull[0]); + printf(" exp_result = 0x%llx %llx\n", + exp_result.val_vull[1], exp_result.val_vull[0]); + } +#else + abort (); +#endif + +} + +/* Check that the expected insert exponent instruction is generated. */ +/* { dg-final { scan-assembler-times {\mxsiexpqp\M} 2 } } */