public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work129-vpair)] Add fp built-in overload support.
@ 2023-08-01 3:36 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-08-01 3:36 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:ff9141eac886e540968dc6a000d5820481bfcf6f
commit ff9141eac886e540968dc6a000d5820481bfcf6f
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Mon Jul 31 23:35:47 2023 -0400
Add fp built-in overload support.
2023-07-31 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000-builtin.cc (fold_builtin_overload_fp): New helper
function for floating point overloaded built-in functions.
(rs6000_gimple_fold_builtin): Add support for floating point overloaded
built-in functions that map directly to gimple.
* config/rs6000/rs6000-builtins.def (__builtin_*_f32_scalar): New
built-in functions for overloaded floating point support.
(__builtin_*_f32_vector): Likewise.
(__builtin_*_f32_vpair): Likewise.
(__builtin_*_f64_scalar): Likewise.
(__builtin_*_f64_vector): Likewise.
(__builtin_*_f64_vpair): Likewise.
* config/rs6000/rs6000-overload.def (__builtin_*_f32): Likewise.
(__builtin_*_f64): Likewise.
* config/rs6000/rs6000.md (vector-pair.md): Include.
* config/rs6000/t-rs6000 (MD_INCLUDES): Add vector-pair.md.
* config/rs6000/vector-pair.md: New file.
* doc/extend.texi (PowerPC Built-in functions): Document the floating
point overloaded built-in function.
gcc/testsuite/
* gcc.target/powerpc/fp-overload-f32-scalar.c: New test.
* gcc.target/powerpc/fp-overload-f32-vector.c: Likewise.
* gcc.target/powerpc/fp-overload-f32-vpair.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-scalar.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-vector.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-vpair.c: Likewise.
* gcc.target/powerpc/fp-overload.h: New include file for fp overloaded
built-in functions.
Diff:
---
gcc/config/rs6000/rs6000-builtin.cc | 105 ++++++
gcc/config/rs6000/rs6000-builtins.def | 173 +++++++++
gcc/config/rs6000/rs6000-overload.def | 178 +++++++++
gcc/config/rs6000/rs6000.md | 1 +
gcc/config/rs6000/t-rs6000 | 1 +
gcc/config/rs6000/vector-pair.md | 417 +++++++++++++++++++++
gcc/doc/extend.texi | 107 ++++++
.../gcc.target/powerpc/fp-overload-f32-scalar.c | 21 ++
.../gcc.target/powerpc/fp-overload-f32-vector.c | 23 ++
.../gcc.target/powerpc/fp-overload-f32-vpair.c | 23 ++
.../gcc.target/powerpc/fp-overload-f64-scalar.c | 22 ++
.../gcc.target/powerpc/fp-overload-f64-vector.c | 22 ++
.../gcc.target/powerpc/fp-overload-f64-vpair.c | 22 ++
gcc/testsuite/gcc.target/powerpc/fp-overload.h | 85 +++++
14 files changed, 1200 insertions(+)
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 82cc3a19447..e32d9175a0c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1261,6 +1261,49 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
return true;
}
+/* Helper function to fold the overloaded fp functions for the scalar and
+ vector types that support the operation directly. */
+
+static void
+fold_builtin_overload_fp (gimple_stmt_iterator *gsi,
+ gimple *stmt,
+ enum tree_code code,
+ int nargs)
+{
+ location_t loc = gimple_location (stmt);
+ tree lhs = gimple_call_lhs (stmt);
+ tree t;
+
+ if (nargs == 1)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ t = build1 (code, TREE_TYPE (lhs), arg0);
+ }
+
+ else if (nargs == 2)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ t = build2 (code, TREE_TYPE (lhs), arg0, arg1);
+ }
+
+ else if (nargs == 3)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ tree arg2 = gimple_call_arg (stmt, 2);
+ t = build3 (code, TREE_TYPE (lhs), arg0, arg1, arg2);
+ }
+
+ else
+ gcc_unreachable ();
+
+ gimple *g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return;
+}
+
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
a constant, use rs6000_fold_builtin.) */
bool
@@ -2233,6 +2276,68 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
return true;
}
+ case RS6000_BIF_ABS_F32_SCALAR:
+ case RS6000_BIF_ABS_F32_VECTOR:
+ case RS6000_BIF_ABS_F64_SCALAR:
+ case RS6000_BIF_ABS_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_ADD_F32_SCALAR:
+ case RS6000_BIF_ADD_F32_VECTOR:
+ case RS6000_BIF_ADD_F64_SCALAR:
+ case RS6000_BIF_ADD_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_MULT_F32_SCALAR:
+ case RS6000_BIF_MULT_F32_VECTOR:
+ case RS6000_BIF_MULT_F64_SCALAR:
+ case RS6000_BIF_MULT_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_NEG_F32_SCALAR:
+ case RS6000_BIF_NEG_F32_VECTOR:
+ case RS6000_BIF_NEG_F64_SCALAR:
+ case RS6000_BIF_NEG_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_REDUCE_F32_SCALAR:
+ case RS6000_BIF_REDUCE_F64_SCALAR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ g = gimple_build_assign (lhs, arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_SMAX_F32_SCALAR:
+ case RS6000_BIF_SMAX_F32_VECTOR:
+ case RS6000_BIF_SMAX_F64_SCALAR:
+ case RS6000_BIF_SMAX_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_SMIN_F32_SCALAR:
+ case RS6000_BIF_SMIN_F32_VECTOR:
+ case RS6000_BIF_SMIN_F64_SCALAR:
+ case RS6000_BIF_SMIN_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2);
+ return true;
+
+
+ case RS6000_BIF_SUB_F32_SCALAR:
+ case RS6000_BIF_SUB_F32_VECTOR:
+ case RS6000_BIF_SUB_F64_SCALAR:
+ case RS6000_BIF_SUB_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2);
+ return true;
+
default:
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 35c4cdf74c5..acc76adca12 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4116,3 +4116,176 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
+
+; Builtins for overload floating point operations, including scalar and
+; 128-bit vector codes that are converted into direct operations.
+; The 256 codes that are kept in vector pairs insns that are split
+; into separate operations after register allocation.
+
+ float __builtin_abs_f32_scalar (float);
+ ABS_F32_SCALAR nothing {}
+ vf __builtin_abs_f32_vector (vf);
+ ABS_F32_VECTOR nothing {}
+ v256 __builtin_abs_f32_vpair (v256);
+ ABS_F32_VPAIR vpair_absv8sf2 {mma}
+
+ double __builtin_abs_f64_scalar (double);
+ ABS_F64_SCALAR nothing {}
+ vd __builtin_abs_f64_vector (vd);
+ ABS_F64_VECTOR nothing {}
+ v256 __builtin_abs_f64_vpair (v256);
+ ABS_F64_VPAIR vpair_absv4df2 {mma}
+
+ float __builtin_add_f32_scalar (float, float);
+ ADD_F32_SCALAR nothing {}
+ vf __builtin_add_f32_vector (vf, vf);
+ ADD_F32_VECTOR nothing {}
+ v256 __builtin_add_f32_vpair (v256, v256);
+ ADD_F32_VPAIR vpair_addv8sf3 {mma}
+
+ double __builtin_add_f64_scalar (double, double);
+ ADD_F64_SCALAR nothing {}
+ vd __builtin_add_f64_vector (vd, vd);
+ ADD_F64_VECTOR nothing {}
+ v256 __builtin_add_f64_vpair (v256, v256);
+ ADD_F64_VPAIR vpair_addv4df3 {mma}
+
+ float __builtin_copysign_f32_scalar (float, float);
+ COPYSIGN_F32_SCALAR copysignsf3_fcpsgn {}
+ vf __builtin_copysign_f32_vector (vf, vf);
+ COPYSIGN_F32_VECTOR vsx_copysignv4sf3 {}
+ v256 __builtin_copysign_f32_vpair (v256, v256);
+ COPYSIGN_F32_VPAIR vpair_copysignv8sf3 {mma}
+
+ double __builtin_copysign_f64_scalar (double, double);
+ COPYSIGN_F64_SCALAR copysigndf3_fcpsgn {}
+ vd __builtin_copysign_f64_vector (vd, vd);
+ COPYSIGN_F64_VECTOR vsx_copysignv2df3 {}
+ v256 __builtin_copysign_f64_vpair (v256, v256);
+ COPYSIGN_F64_VPAIR vpair_copysignv4df3 {mma}
+
+ float __builtin_div_f32_scalar (float, float);
+ DIV_F32_SCALAR divsf3 {}
+ vf __builtin_div_f32_vector (vf, vf);
+ DIV_F32_VECTOR divv4sf3 {}
+ v256 __builtin_div_f32_vpair (v256, v256);
+ DIV_F32_VPAIR vpair_divv8sf3 {mma}
+
+ double __builtin_div_f64_scalar (double, double);
+ DIV_F64_SCALAR divdf3 {}
+ vd __builtin_div_f64_vector (vd, vd);
+ DIV_F64_VECTOR divv2df3 {}
+ v256 __builtin_div_f64_vpair (v256, v256);
+ DIV_F64_VPAIR vpair_divv4df3 {mma}
+
+ float __builtin_fma_f32_scalar (float, float, float);
+ FMA_F32_SCALAR fmasf4 {}
+ vf __builtin_fma_f32_vector (vf, vf, vf);
+ FMA_F32_VECTOR fmav4sf4 {}
+ v256 __builtin_fma_v8sf (v256, v256, v256);
+ FMA_F32_VPAIR vpair_fmav8sf4 {mma}
+
+ double __builtin_fma_f64_scalar (double, double, double);
+ FMA_F64_SCALAR fmadf4 {}
+ vd __builtin_fma_f64_vector (vd, vd, vd);
+ FMA_F64_VECTOR fmav2df4 {}
+ v256 __builtin_fma_v4df (v256, v256, v256);
+ FMA_F64_VPAIR vpair_fmav4df4 {mma}
+
+ float __builtin_mult_f32_scalar (float, float);
+ MULT_F32_SCALAR nothing {}
+ vf __builtin_mult_f32_vector (vf, vf);
+ MULT_F32_VECTOR nothing {}
+ v256 __builtin_mult_f32_vpair (v256, v256);
+ MULT_F32_VPAIR vpair_mulv8sf3 {mma}
+
+ double __builtin_mult_f64_scalar (double, double);
+ MULT_F64_SCALAR nothing {}
+ vd __builtin_mult_f64_vector (vd, vd);
+ MULT_F64_VECTOR nothing {}
+ v256 __builtin_mult_f64_vpair (v256, v256);
+ MULT_F64_VPAIR vpair_mulv4df3 {mma}
+
+ float __builtin_neg_f32_scalar (float);
+ NEG_F32_SCALAR nothing {}
+ vf __builtin_neg_f32_vector (vf);
+ NEG_F32_VECTOR nothing {}
+ v256 __builtin_neg_f32_vpair (v256);
+ NEG_F32_VPAIR vpair_negv8sf2 {mma}
+
+ double __builtin_neg_f64_scalar (double);
+ NEG_F64_SCALAR nothing {}
+ vd __builtin_neg_f64_vector (vd);
+ NEG_F64_VECTOR nothing {}
+ v256 __builtin_neg_f64_vpair (v256);
+ NEG_F64_VPAIR vpair_negv4df2 {mma}
+
+ float __builtin_reduce_f32_scalar (float);
+ REDUCE_F32_SCALAR nothing {}
+ float __builtin_reduce_f32_vector (vf);
+ REDUCE_F32_VECTOR reduce_v4sf {}
+ float __builtin_reduce_f32_vpair (v256);
+ REDUCE_F32_VPAIR reduce_v8sf {mma,pair}
+
+ double __builtin_reduce_f64_scalar (double);
+ REDUCE_F64_SCALAR nothing {}
+ double __builtin_reduce_f64_vector (vd);
+ REDUCE_F64_VECTOR reduce_v2df {}
+ double __builtin_reduce_f64_vpair (v256);
+ REDUCE_F64_VPAIR reduce_v4df {mma,pair}
+
+ float __builtin_smax_f32_scalar (float, float);
+ SMAX_F32_SCALAR nothing {}
+ vf __builtin_smax_f32_vector (vf, vf);
+ SMAX_F32_VECTOR nothing {}
+ v256 __builtin_smax_f32_vpair (v256, v256);
+ SMAX_F32_VPAIR vpair_smaxv8sf3 {mma}
+
+ double __builtin_smax_f64_scalar (double, double);
+ SMAX_F64_SCALAR nothing {}
+ vd __builtin_smax_f64_vector (vd, vd);
+ SMAX_F64_VECTOR nothing {}
+ v256 __builtin_smax_f64_vpair (v256, v256);
+ SMAX_F64_VPAIR vpair_smaxv4df3 {mma}
+
+ float __builtin_smin_f32_scalar (float, float);
+ SMIN_F32_SCALAR nothing {}
+ vf __builtin_smin_f32_vector (vf, vf);
+ SMIN_F32_VECTOR nothing {}
+ v256 __builtin_smin_f32_vpair (v256, v256);
+ SMIN_F32_VPAIR vpair_sminv8sf3 {mma}
+
+ double __builtin_smin_f64_scalar (double, double);
+ SMIN_F64_SCALAR nothing {}
+ vd __builtin_smin_f64_vector (vd, vd);
+ SMIN_F64_VECTOR nothing {}
+ v256 __builtin_smin_f64_vpair (v256, v256);
+ SMIN_F64_VPAIR vpair_sminv4df3 {mma}
+
+ float __builtin_sqrt_f32_scalar (float);
+ SQRT_F32_SCALAR nothing {}
+ vf __builtin_sqrt_f32_vector (vf);
+ SQRT_F32_VECTOR nothing {}
+ v256 __builtin_sqrt_f32_vpair (v256);
+ SQRT_F32_VPAIR vpair_sqrtv8sf2 {mma}
+
+ double __builtin_sqrt_f64_scalar (double);
+ SQRT_F64_SCALAR nothing {}
+ vd __builtin_sqrt_f64_vector (vd);
+ SQRT_F64_VECTOR nothing {}
+ v256 __builtin_sqrt_f64_vpair (v256);
+ SQRT_F64_VPAIR vpair_sqrtv4df2 {mma}
+
+ float __builtin_sub_f32_scalar (float, float);
+ SUB_F32_SCALAR nothing {}
+ vf __builtin_sub_f32_vector (vf, vf);
+ SUB_F32_VECTOR nothing {}
+ v256 __builtin_sub_f32_vpair (v256, v256);
+ SUB_F32_VPAIR vpair_subv8sf3 {mma}
+
+ double __builtin_sub_f64_scalar (double, double);
+ SUB_F64_SCALAR nothing {}
+ vd __builtin_sub_f64_vector (vd, vd);
+ SUB_F64_VECTOR nothing {}
+ v256 __builtin_sub_f64_vpair (v256, v256);
+ SUB_F64_VPAIR vpair_subv4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index b83946f5ad8..bbc26de4568 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6187,3 +6187,181 @@
VUPKLSW VUPKLSW_DEPR1
vbll __builtin_vec_vupklsw (vbi);
VUPKLSW VUPKLSW_DEPR2
+
+;; Overloaded floating point built-in functions
+
+[ABS_F32, SKIP, __builtin_abs_f32]
+ float __builtin_abs_f32 (float);
+ ABS_F32_SCALAR
+ vf __builtin_abs_f32 (vf);
+ ABS_F32_VECTOR
+ v256 __builtin_abs_f32 (v256);
+ ABS_F32_VPAIR
+
+[ABS_F64, SKIP, __builtin_abs_f64]
+ double __builtin_abs_f64 (double);
+ ABS_F64_SCALAR
+ vd __builtin_abs_f64 (vd);
+ ABS_F64_VECTOR
+ v256 __builtin_abs_f64 (v256);
+ ABS_F64_VPAIR
+
+[ADD_F32, SKIP, __builtin_add_f32]
+ float __builtin_add_f32 (float, float);
+ ADD_F32_SCALAR
+ vf __builtin_add_f32 (vf, vf);
+ ADD_F32_VECTOR
+ v256 __builtin_add_f32 (v256, v256);
+ ADD_F32_VPAIR
+
+[ADD_F64, SKIP, __builtin_add_f64]
+ double __builtin_add_f64 (double, double);
+ ADD_F64_SCALAR
+ vd __builtin_add_f64 (vd, vd);
+ ADD_F64_VECTOR
+ v256 __builtin_add_f64 (v256, v256);
+ ADD_F64_VPAIR
+
+[COPYSIGN_F32, SKIP, __builtin_copysign_f32]
+ float __builtin_copysign_f32 (float, float);
+ COPYSIGN_F32_SCALAR
+ vf __builtin_copysign_f32 (vf, vf);
+ COPYSIGN_F32_VECTOR
+ v256 __builtin_copysign_f32 (v256, v256);
+ COPYSIGN_F32_VPAIR
+
+[COPYSIGN_F64, SKIP, __builtin_copysign_f64]
+ double __builtin_copysign_f64 (double, double);
+ COPYSIGN_F64_SCALAR
+ vd __builtin_copysign_f64 (vd, vd);
+ COPYSIGN_F64_VECTOR
+ v256 __builtin_copysign_f64 (v256, v256);
+ COPYSIGN_F64_VPAIR
+
+[DIV_F32, SKIP, __builtin_div_f32]
+ float __builtin_div_f32 (float, float);
+ DIV_F32_SCALAR
+ vf __builtin_div_f32 (vf, vf);
+ DIV_F32_VECTOR
+ v256 __builtin_div_f32 (v256, v256);
+ DIV_F32_VPAIR
+
+[DIV_F64, SKIP, __builtin_div_f64]
+ double __builtin_div_f64 (double, double);
+ DIV_F64_SCALAR
+ vd __builtin_div_f64 (vd, vd);
+ DIV_F64_VECTOR
+ v256 __builtin_div_f64 (v256, v256);
+ DIV_F64_VPAIR
+
+[FMA_F32, SKIP, __builtin_fma_f32]
+ float __builtin_fma_f32 (float, float, float);
+ FMA_F32_SCALAR
+ vf __builtin_fma_f32 (vf, vf, vf);
+ FMA_F32_VECTOR
+ v256 __builtin_fma_f32 (v256, v256, v256);
+ FMA_F32_VPAIR
+
+[FMA_F64, SKIP, __builtin_fma_f64]
+ double __builtin_fma_f64 (double, double, double);
+ FMA_F64_SCALAR
+ vd __builtin_fma_f64 (vd, vd, vd);
+ FMA_F64_VECTOR
+ v256 __builtin_fma_f64 (v256, v256, v256);
+ FMA_F64_VPAIR
+
+[MULT_F32, SKIP, __builtin_mult_f32]
+ float __builtin_mult_f32 (float, float);
+ MULT_F32_SCALAR
+ vf __builtin_mult_f32 (vf, vf);
+ MULT_F32_VECTOR
+ v256 __builtin_mult_f32 (v256, v256);
+ MULT_F32_VPAIR
+
+[MULT_F64, SKIP, __builtin_mult_f64]
+ double __builtin_mult_f64 (double, double);
+ MULT_F64_SCALAR
+ vd __builtin_mult_f64 (vd, vd);
+ MULT_F64_VECTOR
+ v256 __builtin_mult_f64 (v256, v256);
+ MULT_F64_VPAIR
+
+[NEG_F32, SKIP, __builtin_neg_f32]
+ float __builtin_neg_f32 (float);
+ NEG_F32_SCALAR
+ vf __builtin_neg_f32 (vf);
+ NEG_F32_VECTOR
+ v256 __builtin_neg_f32 (v256);
+ NEG_F32_VPAIR
+
+[NEG_F64, SKIP, __builtin_neg_f64]
+ double __builtin_neg_f64 (double);
+ NEG_F64_SCALAR
+ vd __builtin_neg_f64 (vd);
+ NEG_F64_VECTOR
+ v256 __builtin_neg_f64 (v256);
+ NEG_F64_VPAIR
+
+[REDUCE_F32, SKIP, __builtin_reduce_f32]
+ float __builtin_reduce_f32 (float);
+ REDUCE_F32_SCALAR
+ float __builtin_reduce_f32 (vf);
+ REDUCE_F32_VECTOR
+ float __builtin_reduce_f32 (v256);
+ REDUCE_F32_VPAIR
+
+[REDUCE_F64, SKIP, __builtin_reduce_f64]
+ double __builtin_reduce_f64 (double);
+ REDUCE_F64_SCALAR
+ double __builtin_reduce_f64 (vd);
+ REDUCE_F64_VECTOR
+ double __builtin_reduce_f64 (v256);
+ REDUCE_F64_VPAIR
+
+[SMAX_F32, SKIP, __builtin_smax_f32]
+ float __builtin_smax_f32 (float, float);
+ SMAX_F32_SCALAR
+ vf __builtin_smax_f32 (vf, vf);
+ SMAX_F32_VECTOR
+ v256 __builtin_smax_f32 (v256, v256);
+ SMAX_F32_VPAIR
+
+[SMAX_F64, SKIP, __builtin_smax_f64]
+ double __builtin_smax_f64 (double, double);
+ SMAX_F64_SCALAR
+ vd __builtin_smax_f64 (vd, vd);
+ SMAX_F64_VECTOR
+ v256 __builtin_smax_f64 (v256, v256);
+ SMAX_F64_VPAIR
+
+[SMIN_F32, SKIP, __builtin_smin_f32]
+ float __builtin_smin_f32 (float, float);
+ SMIN_F32_SCALAR
+ vf __builtin_smin_f32 (vf, vf);
+ SMIN_F32_VECTOR
+ v256 __builtin_smin_f32 (v256, v256);
+ SMIN_F32_VPAIR
+
+[SMIN_F64, SKIP, __builtin_smin_f64]
+ double __builtin_smin_f64 (double, double);
+ SMIN_F64_SCALAR
+ vd __builtin_smin_f64 (vd, vd);
+ SMIN_F64_VECTOR
+ v256 __builtin_smin_f64 (v256, v256);
+ SMIN_F64_VPAIR
+
+[SUB_F32, SKIP, __builtin_sub_f32]
+ float __builtin_sub_f32 (float, float);
+ SUB_F32_SCALAR
+ vf __builtin_sub_f32 (vf, vf);
+ SUB_F32_VECTOR
+ v256 __builtin_sub_f32 (v256, v256);
+ SUB_F32_VPAIR
+
+[SUB_F64, SKIP, __builtin_sub_f64]
+ double __builtin_sub_f64 (double, double);
+ SUB_F64_SCALAR
+ vd __builtin_sub_f64 (vd, vd);
+ SUB_F64_VECTOR
+ v256 __builtin_sub_f64 (v256, v256);
+ SUB_F64_VPAIR
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index be615c3584e..1cf0ed31fb8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -15778,6 +15778,7 @@
(include "vsx.md")
(include "altivec.md")
(include "mma.md")
+(include "vector-pair.md")
(include "dfp.md")
(include "crypto.md")
(include "htm.md")
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index f183b42ce1d..5fc89499795 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -128,6 +128,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
$(srcdir)/config/rs6000/vsx.md \
$(srcdir)/config/rs6000/altivec.md \
$(srcdir)/config/rs6000/mma.md \
+ $(srcdir)/config/rs6000/vector-pair.md \
$(srcdir)/config/rs6000/crypto.md \
$(srcdir)/config/rs6000/htm.md \
$(srcdir)/config/rs6000/dfp.md \
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
new file mode 100644
index 00000000000..13f6e0464b5
--- /dev/null
+++ b/gcc/config/rs6000/vector-pair.md
@@ -0,0 +1,417 @@
+;; Vector pair arithmetic support.
+;; Copyright (C) 2020-2023 Free Software Foundation, Inc.
+;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
+;; Michael Meissner <meissner@linux.ibm.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; This file adds support for doing vector operations on pairs of vector
+;; registers. Most of the instructions use vector pair instructions to load
+;; and possibly store registers, but splitting the operation after register
+;; allocation to do 2 separate operations. The second scheduler pass can
+;; interleave other instructions between these pairs of instructions if
+;; possible.
+
+(define_c_enum "unspec"
+ [UNSPEC_VPAIR_V4DF
+ UNSPEC_VPAIR_V8SF
+ UNSPEC_VPAIR_V32QI
+ UNSPEC_VPAIR_V16HI
+ UNSPEC_VPAIR_V8SI
+ UNSPEC_VPAIR_V4DI
+ UNSPEC_REDUCE_F32
+ UNSPEC_REDUCE_F64
+ ])
+
+;; Iterator doing unary/binary arithmetic on vector pairs
+(define_code_iterator VPAIR_UNARY [neg abs sqrt])
+(define_code_iterator VPAIR_BINARY [plus minus mult div copysign smin smax])
+
+;; Give the insn name from the opertion
+(define_code_attr vpair_op [(abs "abs")
+ (copysign "copysign")
+ (div "div")
+ (minus "sub")
+ (mult "mul")
+ (neg "neg")
+ (plus "add")
+ (smin "smin")
+ (smax "smax")
+ (sqrt "sqrt")])
+
+;; Iterator for creating the wrapper for vector pair built-ins
+(define_int_iterator VPAIR_WRAPPER [UNSPEC_VPAIR_V4DF UNSPEC_VPAIR_V8SF])
+
+;; Map VPAIR_WRAPPER to vector type (i.e. V2DF or V4SF)
+(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF")
+ (UNSPEC_VPAIR_V8SF "V4SF")])
+
+(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df")
+ (UNSPEC_VPAIR_V8SF "v8sf")])
+
+\f
+;; Vector pair floating point unary operations
+(define_insn_and_split "vpair_<vpair_op><vpair_type>2"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO [(VPAIR_UNARY:OO
+ (match_operand:OO 1 "vsx_register_operand" "wa"))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 3)))
+ (set (match_dup 4) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 5)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+
+ operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+}
+ [(set_attr "length" "8")])
+
+;; Optimize vector pair negate of absolute value
+(define_insn_and_split "vpair_nabs<vpair_type>2"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO
+ [(neg:OO
+ (unspec:OO
+ [(abs:OO (match_operand:OO 1 "vsx_register_operand" "ww"))]
+ VPAIR_WRAPPER))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2)
+ (neg:<VPAIR_VECTOR>
+ (abs:<VPAIR_VECTOR> (match_dup 3))))
+ (set (match_dup 4)
+ (neg:<VPAIR_VECTOR>
+ (abs:<VPAIR_VECTOR> (match_dup 5))))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+
+ operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+}
+ [(set_attr "length" "8")])
+
+;; Vector pair floating binary operations
+(define_insn_and_split "vpair_<vpair_op><vpair_type>3"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO [(VPAIR_BINARY:OO
+ (match_operand:OO 1 "vsx_register_operand" "wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa"))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3)
+ (VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 6)
+ (VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 7)
+ (match_dup 8)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+
+ operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+
+ operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+}
+ [(set_attr "length" "8")])
+
+;; Vector pair fused multiply-add floating point operations
+(define_insn_and_split "vpair_fma<vpair_type>4"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(fma:OO
+ (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (fma:<VPAIR_VECTOR> (match_dup 5)
+ (match_dup 6)
+ (match_dup 7)))
+ (set (match_dup 8)
+ (fma:<VPAIR_VECTOR> (match_dup 9)
+ (match_dup 10)
+ (match_dup 11)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+ unsigned reg3 = reg_or_subregno (operands[3]);
+
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+ operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+
+ operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+ operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+}
+ [(set_attr "length" "8")])
+
+(define_insn_and_split "vpair_fms<vpair_type>4"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(fma:OO
+ (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (unspec:OO
+ [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
+ VPAIR_WRAPPER))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (fma:<VPAIR_VECTOR> (match_dup 5)
+ (match_dup 6)
+ (neg:<VPAIR_VECTOR> (match_dup 7))))
+ (set (match_dup 8)
+ (fma:<VPAIR_VECTOR> (match_dup 9)
+ (match_dup 10)
+ (neg:<VPAIR_VECTOR> (match_dup 11))))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+ unsigned reg3 = reg_or_subregno (operands[3]);
+
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+ operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+
+ operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+ operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+}
+ [(set_attr "length" "8")])
+
+(define_insn_and_split "vpair_nfma<vpair_type>4"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(neg:OO
+ (unspec:OO
+ [(fma:OO
+ (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
+ VPAIR_WRAPPER))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (neg:<VPAIR_VECTOR>
+ (fma:<VPAIR_VECTOR> (match_dup 5)
+ (match_dup 6)
+ (match_dup 7))))
+ (set (match_dup 8)
+ (neg:<VPAIR_VECTOR>
+ (fma:<VPAIR_VECTOR> (match_dup 9)
+ (match_dup 10)
+ (match_dup 11))))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+ unsigned reg3 = reg_or_subregno (operands[3]);
+
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+ operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+
+ operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+ operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+}
+ [(set_attr "length" "8")])
+
+(define_insn_and_split "vpair_nfms<vpair_type>4"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+ (unspec:OO
+ [(neg:OO
+ (unspec:OO
+ [(fma:OO
+ (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0")
+ (unspec:OO
+ [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
+ VPAIR_WRAPPER))]
+ VPAIR_WRAPPER))]
+ VPAIR_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (neg:<VPAIR_VECTOR>
+ (fma:<VPAIR_VECTOR> (match_dup 5)
+ (match_dup 6)
+ (neg:<VPAIR_VECTOR> (match_dup 7)))))
+ (set (match_dup 8)
+ (neg:<VPAIR_VECTOR>
+ (fma:<VPAIR_VECTOR> (match_dup 9)
+ (match_dup 10)
+ (neg:<VPAIR_VECTOR> (match_dup 11)))))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+ unsigned reg3 = reg_or_subregno (operands[3]);
+
+ operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
+ operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
+ operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+ operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+
+ operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
+ operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+ operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+}
+ [(set_attr "length" "8")])
+
+;; Reduction for a V4SF vector
+(define_insn_and_split "reduce_v4sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "v")]
+ UNSPEC_REDUCE_F32))
+ (clobber (match_scratch:V4SF 2 "=&v"))
+ (clobber (match_scratch:V4SF 3 "=&v"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp1 = operands[2];
+ rtx tmp2 = operands[3];
+
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp1, op1, op1, GEN_INT (8)));
+ emit_insn (gen_addv4sf3 (tmp1, op1, tmp1));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (4)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+ DONE;
+}
+ [(set_attr "length" "24")])
+
+;; Reduction for a pair of V4SF vectors
+(define_insn_and_split "reduce_v8sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:OO 1 "vsx_register_operand" "v")]
+ UNSPEC_REDUCE_F32))
+ (clobber (match_scratch:V4SF 2 "=&v"))
+ (clobber (match_scratch:V4SF 3 "=&v"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp1 = operands[2];
+ rtx tmp2 = operands[3];
+ unsigned r = reg_or_subregno (op1);
+ rtx op1_hi = gen_rtx_REG (V4SFmode, r);
+ rtx op1_lo = gen_rtx_REG (V4SFmode, r + 1);
+
+ emit_insn (gen_addv4sf3 (tmp1, op1_hi, op1_lo));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (8)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp1, tmp2, tmp2, GEN_INT (4)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+ DONE;
+}
+ [(set_attr "length" "24")])
+
+;; Reduction for a V2DF vector
+(define_insn_and_split "reduce_v2df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+ (unspec:DF [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+ UNSPEC_REDUCE_F64))
+ (clobber (match_scratch:DF 2 "=&wa"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2)
+ (vec_select:DF (match_dup 1)
+ (parallel [(match_dup 3)])))
+ (set (match_dup 0)
+ (plus:DF (match_dup 4)
+ (match_dup 2)))]
+{
+ unsigned reg1 = reg_or_subregno (operands[1]);
+
+ operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+ operands[4] = gen_rtx_REG (DFmode, reg1);
+})
+
+;; Reduction for a pair of V2DF vectors
+(define_insn_and_split "reduce_v4df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+ (unspec:DF [(match_operand:OO 1 "vsx_register_operand" "wa")]
+ UNSPEC_REDUCE_F64))
+ (clobber (match_scratch:DF 2 "=&wa"))
+ (clobber (match_scratch:V2DF 3 "=&wa"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3)
+ (plus:V2DF (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 2)
+ (vec_select:DF (match_dup 3)
+ (parallel [(match_dup 6)])))
+ (set (match_dup 0)
+ (plus:DF (match_dup 7)
+ (match_dup 2)))]
+{
+ unsigned reg1 = REGNO (operands[1]);
+ unsigned reg3 = REGNO (operands[3]);
+
+ operands[4] = gen_rtx_REG (V2DFmode, reg1);
+ operands[5] = gen_rtx_REG (V2DFmode, reg1 + 1);
+ operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+ operands[7] = gen_rtx_REG (DFmode, reg3);
+})
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 97eaacf8a7e..1e735cdc16f 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19150,6 +19150,7 @@ The PVIPR documents the following overloaded functions:
* PowerPC AltiVec Built-in Functions Available on ISA 2.07::
* PowerPC AltiVec Built-in Functions Available on ISA 3.0::
* PowerPC AltiVec Built-in Functions Available on ISA 3.1::
+* PowerPC Floating Point Overloaded Built-in Functions::
@end menu
@node PowerPC AltiVec Built-in Functions on ISA 2.05
@@ -21102,6 +21103,112 @@ int vec_any_le (vector signed __int128, vector signed __int128);
int vec_any_le (vector unsigned __int128, vector unsigned __int128);
@end smallexample
+@node PowerPC Floating Point Overloaded Built-in Functions
+@subsubsection PowerPC Floating Point Overloaded Built-in Functions
+
+The following additional built-in functions are also available for the
+PowerPC family of processors, that allow programmers to use the same
+built-in function to handle scalar, 128-bit vectors, and on Power10
+systems vector pairs.
+
+The following built-in functions handle 32-bit floating point
+operations on all processors where the VSX registers are available:
+
+@smallexample
+float __builtin_abs_f32 (float);
+vector float __builtin_abs_f32 (vector float);
+float __builtin_add_f32 (float, float);
+vector float __builtin_add_f32 (vector float, vector float);
+float __builtin_copysign_f32 (float, float);
+vector float __builtin_copysign_f32 (vector float, vector float);
+float __builtin_div_f32 (float, float);
+vector float __builtin_div_f32 (vector float, vector float);
+float __builtin_fma_f32 (float, float, float);
+vector float __builtin_fma_f32 (vector float, vector float, vector float);
+float __builtin_mult_f32 (float, float);
+vector float __builtin_mult_f32 (vector float, vector float);
+float __builtin_neg_f32 (float);
+vector float __builtin_neg_f32 (vector float);
+float __builtin_smax_f32 (float, float);
+vector float __builtin_smax_f32 (vector float, vector float);
+float __builtin_smin_f32 (float, float);
+vector float __builtin_smin_f32 (vector float, vector float);
+float __builtin_sub_f32 (float, float);
+vector float __builtin_sub_f32 (vector float, vector float);
+@end smallexample
+
+The following built-in functions handle 32-bit floating point
+operations on Power10 systems that support paired vector load and
+store instructions.
+
+@smallexample
+__vector_pair __builtin_abs_f32 (__vector_pair);
+__vector_pair __builtin_add_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_copysign_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_div_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_fma_f32 (__vector_pair, __vector_pair, __vector_pair);
+__vector_pair __builtin_mult_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_neg_f32 (__vector_pair);
+__vector_pair __builtin_smax_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_smin_f32 (__vector_pair, __vector_pair);
+__vector_pair __builtin_sub_f32 (__vector_pair, __vector_pair);
+@end smallexample
+
+The following built-in functions handle 64-bit floating point
+operations on all processors where the VSX registers are available:
+
+@smallexample
+double __builtin_abs_f64 (double);
+vector double __builtin_abs_f64 (vector double);
+double __builtin_add_f64 (double, double);
+vector double __builtin_add_f64 (vector double, vector double);
+double __builtin_copysign_f64 (double, double);
+vector double __builtin_copysign_f64 (vector double, vector double);
+double __builtin_div_f64 (double, double);
+vector double __builtin_div_f64 (vector double, vector double);
+double __builtin_fma_f64 (double, double, double);
+vector double __builtin_fma_f64 (vector double, vector double, vector double);
+double __builtin_mult_f64 (double, double);
+vector double __builtin_mult_f64 (vector double, vector double);
+double __builtin_neg_f64 (double);
+vector double __builtin_neg_f64 (vector double);
+double __builtin_smax_f64 (double, double);
+vector double __builtin_smax_f64 (vector double, vector double);
+double __builtin_smin_f64 (double, double);
+vector double __builtin_smin_f64 (vector double, vector double);
+double __builtin_sub_f64 (double, double);
+vector double __builtin_sub_f64 (vector double, vector double);
+@end smallexample
+
+The following built-in functions handle 64-bit floating point
+operations on Power10 systems that support paired vector load and
+store instructions.
+
+@smallexample
+__vector_pair __builtin_abs_f64 (__vector_pair);
+__vector_pair __builtin_add_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_copysign_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_div_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_fma_f64 (__vector_pair, __vector_pair, __vector_pair);
+__vector_pair __builtin_mult_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_neg_f64 (__vector_pair);
+__vector_pair __builtin_smax_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_smin_f64 (__vector_pair, __vector_pair);
+__vector_pair __builtin_sub_f64 (__vector_pair, __vector_pair);
+@end smallexample
+
+The following built-in functions return the sum of all of the elements
+in scalar, vector, and vector pair types.
+
+@smallexample
+float __builtin_reduce_f32 (float);
+float __builtin_reduce_f32 (vector float);
+float __builtin_reduce_f32 (__vector_pair);
+double __builtin_reduce_f64 (double);
+double __builtin_reduce_f64 (vector double);
+double __builtin_reduce_f64 (__vector_pair);
+@end smallexample
+
@node PowerPC Hardware Transactional Memory Built-in Functions
@subsection PowerPC Hardware Transactional Memory Built-in Functions
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
new file mode 100644
index 00000000000..400a54f2fd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using scalar float. */
+
+#include "fp-overload.h"
+
+TEST (float, float, flt, 32)
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfadds\M|\mxsaddsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmadds\M|\mxsmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmsubs\M|\mxsmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmuls\M|\mxsmulsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadds\M|\mxsmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsubs\M|\mxsnmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfsubs\M|\mxssubsp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
new file mode 100644
index 00000000000..14f76d8a8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using vector float. */
+
+#include "fp-overload.h"
+
+TEST (vector float, float, vect, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
new file mode 100644
index 00000000000..466f056cf9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using __vector_pair. */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, float, vpair, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
new file mode 100644
index 00000000000..28e7c91c77c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using scalar double. */
+
+#include "fp-overload.h"
+
+TEST (double, double, dbl, 64)
+
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfadd\M|\mxsadddp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmadd\M|\mxsmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmsub\M|\mxsmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmul\M|\mxsmuldp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadd\M|\mxsmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsub\M|\mxsmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfsub\M|\mxssubdp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
new file mode 100644
index 00000000000..4289ba4edb9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using vector double. */
+
+#include "fp-overload.h"
+
+TEST (vector double, double, vect, 64)
+
+/* { dg-final { scan-assembler-times {\mxvabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
new file mode 100644
index 00000000000..7dd0613bf88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using __vector_pair. */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, double, vpair, 64)
+
+/* { dg-final { scan-assembler-times {\mxvabsdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload.h b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
new file mode 100644
index 00000000000..a1ce5f83765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
@@ -0,0 +1,85 @@
+/* Common code to test the floating point overload functions. */
+
+#define TEST(TYPE, SCALAR, TYPE_STR, SIZE) \
+ \
+void \
+do_add_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_add_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_sub_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_sub_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_mult_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_mult_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_neg_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_neg_f ## SIZE (*q); \
+} \
+ \
+void \
+do_abs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_abs_f ## SIZE (*q); \
+} \
+ \
+void \
+do_nabs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_neg_f ## SIZE (__builtin_abs_f ## SIZE (*q)); \
+} \
+ \
+void \
+do_fma_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ *p = __builtin_fma_f ## SIZE (*q, *r, *s); \
+} \
+ \
+void \
+do_fms_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE neg_s = __builtin_neg_f ## SIZE (*s); \
+ *p = __builtin_fma_f ## SIZE (*q, *r, neg_s); \
+} \
+ \
+void \
+do_nfma_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE f = __builtin_fma_f ## SIZE (*q, *r, *s); \
+ *p = __builtin_neg_f ## SIZE (f); \
+} \
+ \
+void \
+do_nfms_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE neg_s = __builtin_neg_f ## SIZE (*s); \
+ TYPE f = __builtin_fma_f ## SIZE (*q, *r, neg_s); \
+ *p = __builtin_neg_f ## SIZE (f); \
+} \
+ \
+void \
+do_reduce_ ## TYPE_STR ## _f ## SIZE (SCALAR *p, TYPE *q) \
+{ \
+ *p = __builtin_reduce_f ## SIZE (*q); \
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-08-01 3:36 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-01 3:36 [gcc(refs/users/meissner/heads/work129-vpair)] Add fp built-in overload support Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).