public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work127-vpair)] Rename insns; Add reduction; Add overloads.
@ 2023-07-28 6:11 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-07-28 6:11 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:30d103db121df0f0b6c6bfd955134f721a168765
commit 30d103db121df0f0b6c6bfd955134f721a168765
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Jul 28 02:06:16 2023 -0400
Rename insns; Add reduction; Add overloads.
2023-07-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (UNSPEC_REDUCE_F32): New unspec.
(UNSPEC_REDUCE_F64): New unspec.
(reduce_v4sf): New insn.
(reduce_v8sf): Likewise.
(reduce_v2df): Likewise.
(reduce_v4df): Likewise.
* config/rs6000/rs6000-builtin.cc (fold_builtin_overload_fp): New helper
function.
(rs6000_gimple_fold_builtin): Add support for the fp overloaded built-in
functions.
* config/rs6000/rs6000-builtins.def (__builtin*_fp_*): Rename fp
overloadd built-in functions. Add support for reduction built-ins.
* config/rs6000/rs6000-overload.def (__builtin_*_f32): Add overloded fp
built-in functions.
(__builtin_*_f64): Likewise.
gcc/testsuite/
* gcc.target/powerpc/fp-overload-f32-scalar.c: New test.
* gcc.target/powerpc/fp-overload-f32-vector.c: Likewise.
* gcc.target/powerpc/fp-overload-f32-vpair.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-scalar.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-vector.c: Likewise.
* gcc.target/powerpc/fp-overload-f64-vpair.c: Likewise.
* gcc.target/powerpc/fp-overload.h: New include file for fp overloaded
built-in functions.
Diff:
---
gcc/config/rs6000/mma.md | 109 +++++++++
gcc/config/rs6000/rs6000-builtin.cc | 105 +++++++++
gcc/config/rs6000/rs6000-builtins.def | 261 ++++++++++++++-------
gcc/config/rs6000/rs6000-overload.def | 178 ++++++++++++++
.../gcc.target/powerpc/fp-overload-f32-scalar.c | 21 ++
.../gcc.target/powerpc/fp-overload-f32-vector.c | 23 ++
.../gcc.target/powerpc/fp-overload-f32-vpair.c | 23 ++
.../gcc.target/powerpc/fp-overload-f64-scalar.c | 22 ++
.../gcc.target/powerpc/fp-overload-f64-vector.c | 22 ++
.../gcc.target/powerpc/fp-overload-f64-vpair.c | 22 ++
gcc/testsuite/gcc.target/powerpc/fp-overload.h | 85 +++++++
11 files changed, 782 insertions(+), 89 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 752661ca5c9..a9621bad39a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -93,6 +93,8 @@
UNSPEC_MMA_XXMTACC
UNSPEC_VPAIR_V4DF
UNSPEC_VPAIR_V8SF
+ UNSPEC_REDUCE_F32
+ UNSPEC_REDUCE_F64
])
(define_c_enum "unspecv"
@@ -1241,3 +1243,110 @@
operands[11] = gen_rtx_REG (<VPAIR_SUBTYPE>mode, reg3 + 1);
}
[(set_attr "length" "8")])
+
+;; Reduction for a V4SF vector
+(define_insn_and_split "reduce_v4sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "v")]
+ UNSPEC_REDUCE_F32))
+ (clobber (match_scratch:V4SF 2 "=&v"))
+ (clobber (match_scratch:V4SF 3 "=&v"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp1 = operands[2];
+ rtx tmp2 = operands[3];
+
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp1, op1, op1, GEN_INT (8)));
+ emit_insn (gen_addv4sf3 (tmp1, op1, tmp1));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (4)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+ DONE;
+}
+ [(set_attr "length" "24")])
+
+;; Reduction for a pair of V4SF vectors
+(define_insn_and_split "reduce_v8sf"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:OO 1 "vsx_register_operand" "v")]
+ UNSPEC_REDUCE_F32))
+ (clobber (match_scratch:V4SF 2 "=&v"))
+ (clobber (match_scratch:V4SF 3 "=&v"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(pc)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp1 = operands[2];
+ rtx tmp2 = operands[3];
+ unsigned r = reg_or_subregno (op1);
+ rtx op1_hi = gen_rtx_REG (V4SFmode, r);
+ rtx op1_lo = gen_rtx_REG (V4SFmode, r + 1);
+
+ emit_insn (gen_addv4sf3 (tmp1, op1_hi, op1_lo));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (8)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_altivec_vsldoi_v4sf (tmp1, tmp2, tmp2, GEN_INT (4)));
+ emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+ emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+ DONE;
+}
+ [(set_attr "length" "24")])
+
+;; Reduction for a V2DF vector
+(define_insn_and_split "reduce_v2df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+ (unspec:DF [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+ UNSPEC_REDUCE_F64))
+ (clobber (match_scratch:DF 2 "=&wa"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2)
+ (vec_select:DF (match_dup 1)
+ (parallel [(match_dup 3)])))
+ (set (match_dup 0)
+ (plus:DF (match_dup 4)
+ (match_dup 2)))]
+{
+ unsigned reg1 = reg_or_subregno (operands[1]);
+
+ operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+ operands[4] = gen_rtx_REG (DFmode, reg1);
+})
+
+;; Reduction for a pair of V2DF vectors
+(define_insn_and_split "reduce_v4df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+ (unspec:DF [(match_operand:OO 1 "vsx_register_operand" "wa")]
+ UNSPEC_REDUCE_F64))
+ (clobber (match_scratch:DF 2 "=&wa"))
+ (clobber (match_scratch:V2DF 3 "=&wa"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3)
+ (plus:V2DF (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 2)
+ (vec_select:DF (match_dup 3)
+ (parallel [(match_dup 6)])))
+ (set (match_dup 0)
+ (plus:DF (match_dup 7)
+ (match_dup 2)))]
+{
+ unsigned reg1 = REGNO (operands[1]);
+ unsigned reg3 = REGNO (operands[3]);
+
+ operands[4] = gen_rtx_REG (V2DFmode, reg1);
+ operands[5] = gen_rtx_REG (V2DFmode, reg1 + 1);
+ operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+ operands[7] = gen_rtx_REG (DFmode, reg3);
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 86072ab6405..7ff250fcb0d 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1273,6 +1273,49 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
return true;
}
+/* Helper function to fold the overloaded fp functions for the scalar and
+ vector types that support the operation directly. */
+
+static void
+fold_builtin_overload_fp (gimple_stmt_iterator *gsi,
+ gimple *stmt,
+ enum tree_code code,
+ int nargs)
+{
+ location_t loc = gimple_location (stmt);
+ tree lhs = gimple_call_lhs (stmt);
+ tree t;
+
+ if (nargs == 1)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ t = build1 (code, TREE_TYPE (lhs), arg0);
+ }
+
+ else if (nargs == 2)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ t = build2 (code, TREE_TYPE (lhs), arg0, arg1);
+ }
+
+ else if (nargs == 3)
+ {
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = gimple_call_arg (stmt, 1);
+ tree arg2 = gimple_call_arg (stmt, 2);
+ t = build3 (code, TREE_TYPE (lhs), arg0, arg1, arg2);
+ }
+
+ else
+ gcc_unreachable ();
+
+ gimple *g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return;
+}
+
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
a constant, use rs6000_fold_builtin.) */
bool
@@ -2241,6 +2284,68 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
return true;
}
+ case RS6000_BIF_ABS_F32_SCALAR:
+ case RS6000_BIF_ABS_F32_VECTOR:
+ case RS6000_BIF_ABS_F64_SCALAR:
+ case RS6000_BIF_ABS_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_ADD_F32_SCALAR:
+ case RS6000_BIF_ADD_F32_VECTOR:
+ case RS6000_BIF_ADD_F64_SCALAR:
+ case RS6000_BIF_ADD_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_MULT_F32_SCALAR:
+ case RS6000_BIF_MULT_F32_VECTOR:
+ case RS6000_BIF_MULT_F64_SCALAR:
+ case RS6000_BIF_MULT_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_NEG_F32_SCALAR:
+ case RS6000_BIF_NEG_F32_VECTOR:
+ case RS6000_BIF_NEG_F64_SCALAR:
+ case RS6000_BIF_NEG_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_REDUCE_F32_SCALAR:
+ case RS6000_BIF_REDUCE_F64_SCALAR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ g = gimple_build_assign (lhs, arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_SMAX_F32_SCALAR:
+ case RS6000_BIF_SMAX_F32_VECTOR:
+ case RS6000_BIF_SMAX_F64_SCALAR:
+ case RS6000_BIF_SMAX_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_SMIN_F32_SCALAR:
+ case RS6000_BIF_SMIN_F32_VECTOR:
+ case RS6000_BIF_SMIN_F64_SCALAR:
+ case RS6000_BIF_SMIN_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2);
+ return true;
+
+
+ case RS6000_BIF_SUB_F32_SCALAR:
+ case RS6000_BIF_SUB_F32_VECTOR:
+ case RS6000_BIF_SUB_F64_SCALAR:
+ case RS6000_BIF_SUB_F64_VECTOR:
+ fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2);
+ return true;
+
default:
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index ecd438642a4..170f83dd907 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4117,92 +4117,175 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
- v256 __builtin_vpair_abs_v4df (v256);
- VPAIR_ABS_V4DF vpair_absv4df2 {mma}
-
- v256 __builtin_vpair_neg_v4df (v256);
- VPAIR_NEG_V4DF vpair_negv4df2 {mma}
-
- v256 __builtin_vpair_nabs_v4df (v256);
- VPAIR_NABS_V4DF vpair_nabsv4df2 {mma}
-
- v256 __builtin_vpair_sqrt_v4df (v256);
- VPAIR_SQRT_V4DF vpair_sqrtv4df2 {mma}
-
- v256 __builtin_vpair_add_v4df (v256, v256);
- VPAIR_ADD_V4DF vpair_addv4df3 {mma}
-
- v256 __builtin_vpair_sub_v4df (v256, v256);
- VPAIR_SUB_V4DF vpair_subv4df3 {mma}
-
- v256 __builtin_vpair_mul_v4df (v256, v256);
- VPAIR_MUL_V4DF vpair_mulv4df3 {mma}
-
- v256 __builtin_vpair_div_v4df (v256, v256);
- VPAIR_DIV_V4DF vpair_divv4df3 {mma}
-
- v256 __builtin_vpair_copysign_v4df (v256, v256);
- VPAIR_COPYSIGN_V4DF vpair_copysignv4df3 {mma}
-
- v256 __builtin_vpair_smin_v4df (v256, v256);
- VPAIR_SMIN_V4DF vpair_sminv4df3 {mma}
-
- v256 __builtin_vpair_smax_v4df (v256, v256);
- VPAIR_SMAX_V4DF vpair_smaxv4df3 {mma}
-
- v256 __builtin_vpair_fma_v4df (v256, v256, v256);
- VPAIR_FMA_V4DF vpair_fmav4df4 {mma}
-
- v256 __builtin_vpair_fms_v4df (v256, v256, v256);
- VPAIR_FMS_V4DF vpair_fmsv4df4 {mma}
-
- v256 __builtin_vpair_nfma_v4df (v256, v256, v256);
- VPAIR_NFMA_V4DF vpair_nfmav4df4 {mma}
-
- v256 __builtin_vpair_nfms_v4df (v256, v256, v256);
- VPAIR_NFMS_V4DF vpair_nfmsv4df4 {mma}
-
- v256 __builtin_vpair_abs_v8sf (v256);
- VPAIR_ABS_V8SF vpair_absv8sf2 {mma}
-
- v256 __builtin_vpair_neg_v8sf (v256);
- VPAIR_NEG_V8SF vpair_negv8sf2 {mma}
-
- v256 __builtin_vpair_nabs_v8sf (v256);
- VPAIR_NABS_V8SF vpair_nabsv8sf2 {mma}
-
- v256 __builtin_vpair_sqrt_v8sf (v256);
- VPAIR_SQRT_V8SF vpair_sqrtv8sf2 {mma}
-
- v256 __builtin_vpair_add_v8sf (v256, v256);
- VPAIR_ADD_V8SF vpair_addv8sf3 {mma}
-
- v256 __builtin_vpair_sub_v8sf (v256, v256);
- VPAIR_SUB_V8SF vpair_subv8sf3 {mma}
-
- v256 __builtin_vpair_mul_v8sf (v256, v256);
- VPAIR_MUL_V8SF vpair_mulv8sf3 {mma}
-
- v256 __builtin_vpair_div_v8sf (v256, v256);
- VPAIR_DIV_V8SF vpair_divv8sf3 {mma}
-
- v256 __builtin_vpair_copysign_v8sf (v256, v256);
- VPAIR_COPYSIGN_V8SF vpair_copysignv8sf3 {mma}
-
- v256 __builtin_vpair_fma_v8sf (v256, v256, v256);
- VPAIR_FMA_V8SF vpair_fmav8sf4 {mma}
-
- v256 __builtin_vpair_fms_v8sf (v256, v256, v256);
- VPAIR_FMS_V8SF vpair_fmsv8sf4 {mma}
-
- v256 __builtin_vpair_nfma_v8sf (v256, v256, v256);
- VPAIR_NFMA_V8SF vpair_nfmav8sf4 {mma}
-
- v256 __builtin_vpair_nfms_v8sf (v256, v256, v256);
- VPAIR_NFMS_V8SF vpair_nfmsv8sf4 {mma}
-
- v256 __builtin_vpair_smin_v8sf (v256, v256);
- VPAIR_SMIN_V8SF vpair_sminv8sf3 {mma}
-
- v256 __builtin_vpair_smax_v8sf (v256, v256);
- VPAIR_SMAX_V8SF vpair_smaxv8sf3 {mma}
+; Builtins for overload floating point operations, including scalar and
+; 128-bit vector codes that are converted into direct operations.
+; The 256 codes that are kept in vector pairs insns that are split
+; into separate operations after register allocation.
+
+ float __builtin_abs_f32_scalar (float);
+ ABS_F32_SCALAR nothing {}
+ vf __builtin_abs_f32_vector (vf);
+ ABS_F32_VECTOR nothing {}
+ v256 __builtin_abs_f32_vpair (v256);
+ ABS_F32_VPAIR vpair_absv8sf2 {mma}
+
+ double __builtin_abs_f64_scalar (double);
+ ABS_F64_SCALAR nothing {}
+ vd __builtin_abs_f64_vector (vd);
+ ABS_F64_VECTOR nothing {}
+ v256 __builtin_abs_f64_vpair (v256);
+ ABS_F64_VPAIR vpair_absv4df2 {mma}
+
+ float __builtin_add_f32_scalar (float, float);
+ ADD_F32_SCALAR nothing {}
+ vf __builtin_add_f32_vector (vf, vf);
+ ADD_F32_VECTOR nothing {}
+ v256 __builtin_add_f32_vpair (v256, v256);
+ ADD_F32_VPAIR vpair_addv8sf3 {mma}
+
+ double __builtin_add_f64_scalar (double, double);
+ ADD_F64_SCALAR nothing {}
+ vd __builtin_add_f64_vector (vd, vd);
+ ADD_F64_VECTOR nothing {}
+ v256 __builtin_add_f64_vpair (v256, v256);
+ ADD_F64_VPAIR vpair_addv4df3 {mma}
+
+ float __builtin_copysign_f32_scalar (float, float);
+ COPYSIGN_F32_SCALAR copysignsf3_fcpsgn {}
+ vf __builtin_copysign_f32_vector (vf, vf);
+ COPYSIGN_F32_VECTOR vsx_copysignv4sf3 {}
+ v256 __builtin_copysign_f32_vpair (v256, v256);
+ COPYSIGN_F32_VPAIR vpair_copysignv8sf3 {mma}
+
+ double __builtin_copysign_f64_scalar (double, double);
+ COPYSIGN_F64_SCALAR copysigndf3_fcpsgn {}
+ vd __builtin_copysign_f64_vector (vd, vd);
+ COPYSIGN_F64_VECTOR vsx_copysignv2df3 {}
+ v256 __builtin_copysign_f64_vpair (v256, v256);
+ COPYSIGN_F64_VPAIR vpair_copysignv4df3 {mma}
+
+ float __builtin_div_f32_scalar (float, float);
+ DIV_F32_SCALAR divsf3 {}
+ vf __builtin_div_f32_vector (vf, vf);
+ DIV_F32_VECTOR divv4sf3 {}
+ v256 __builtin_div_f32_vpair (v256, v256);
+ DIV_F32_VPAIR vpair_divv8sf3 {mma}
+
+ double __builtin_div_f64_scalar (double, double);
+ DIV_F64_SCALAR divdf3 {}
+ vd __builtin_div_f64_vector (vd, vd);
+ DIV_F64_VECTOR divv2df3 {}
+ v256 __builtin_div_f64_vpair (v256, v256);
+ DIV_F64_VPAIR vpair_divv4df3 {mma}
+
+ float __builtin_fma_f32_scalar (float, float, float);
+ FMA_F32_SCALAR fmasf4 {}
+ vf __builtin_fma_f32_vector (vf, vf, vf);
+ FMA_F32_VECTOR fmav4sf4 {}
+ v256 __builtin_fma_v8sf (v256, v256, v256);
+ FMA_F32_VPAIR vpair_fmav8sf4 {mma}
+
+ double __builtin_fma_f64_scalar (double, double, double);
+ FMA_F64_SCALAR fmadf4 {}
+ vd __builtin_fma_f64_vector (vd, vd, vd);
+ FMA_F64_VECTOR fmav2df4 {}
+ v256 __builtin_fma_v4df (v256, v256, v256);
+ FMA_F64_VPAIR vpair_fmav4df4 {mma}
+
+ float __builtin_mult_f32_scalar (float, float);
+ MULT_F32_SCALAR nothing {}
+ vf __builtin_mult_f32_vector (vf, vf);
+ MULT_F32_VECTOR nothing {}
+ v256 __builtin_mult_f32_vpair (v256, v256);
+ MULT_F32_VPAIR vpair_mulv8sf3 {mma}
+
+ double __builtin_mult_f64_scalar (double, double);
+ MULT_F64_SCALAR nothing {}
+ vd __builtin_mult_f64_vector (vd, vd);
+ MULT_F64_VECTOR nothing {}
+ v256 __builtin_mult_f64_vpair (v256, v256);
+ MULT_F64_VPAIR vpair_mulv4df3 {mma}
+
+ float __builtin_neg_f32_scalar (float);
+ NEG_F32_SCALAR nothing {}
+ vf __builtin_neg_f32_vector (vf);
+ NEG_F32_VECTOR nothing {}
+ v256 __builtin_neg_f32_vpair (v256);
+ NEG_F32_VPAIR vpair_negv8sf2 {mma}
+
+ double __builtin_neg_f64_scalar (double);
+ NEG_F64_SCALAR nothing {}
+ vd __builtin_neg_f64_vector (vd);
+ NEG_F64_VECTOR nothing {}
+ v256 __builtin_neg_f64_vpair (v256);
+ NEG_F64_VPAIR vpair_negv4df2 {mma}
+
+ float __builtin_reduce_f32_scalar (float);
+ REDUCE_F32_SCALAR nothing {}
+ float __builtin_reduce_f32_vector (vf);
+ REDUCE_F32_VECTOR reduce_v4sf {}
+ float __builtin_reduce_f32_vpair (v256);
+ REDUCE_F32_VPAIR reduce_v8sf {mma,pair}
+
+ double __builtin_reduce_f64_scalar (double);
+ REDUCE_F64_SCALAR nothing {}
+ double __builtin_reduce_f64_vector (vd);
+ REDUCE_F64_VECTOR reduce_v2df {}
+ double __builtin_reduce_f64_vpair (v256);
+ REDUCE_F64_VPAIR reduce_v4df {mma,pair}
+
+ float __builtin_smax_f32_scalar (float, float);
+ SMAX_F32_SCALAR nothing {}
+ vf __builtin_smax_f32_vector (vf, vf);
+ SMAX_F32_VECTOR nothing {}
+ v256 __builtin_smax_f32_vpair (v256, v256);
+ SMAX_F32_VPAIR vpair_smaxv8sf3 {mma}
+
+ double __builtin_smax_f64_scalar (double, double);
+ SMAX_F64_SCALAR nothing {}
+ vd __builtin_smax_f64_vector (vd, vd);
+ SMAX_F64_VECTOR nothing {}
+ v256 __builtin_smax_f64_vpair (v256, v256);
+ SMAX_F64_VPAIR vpair_smaxv4df3 {mma}
+
+ float __builtin_smin_f32_scalar (float, float);
+ SMIN_F32_SCALAR nothing {}
+ vf __builtin_smin_f32_vector (vf, vf);
+ SMIN_F32_VECTOR nothing {}
+ v256 __builtin_smin_f32_vpair (v256, v256);
+ SMIN_F32_VPAIR vpair_sminv8sf3 {mma}
+
+ double __builtin_smin_f64_scalar (double, double);
+ SMIN_F64_SCALAR nothing {}
+ vd __builtin_smin_f64_vector (vd, vd);
+ SMIN_F64_VECTOR nothing {}
+ v256 __builtin_smin_f64_vpair (v256, v256);
+ SMIN_F64_VPAIR vpair_sminv4df3 {mma}
+
+ float __builtin_sqrt_f32_scalar (float);
+ SQRT_F32_SCALAR nothing {}
+ vf __builtin_sqrt_f32_vector (vf);
+ SQRT_F32_VECTOR nothing {}
+ v256 __builtin_sqrt_f32_vpair (v256);
+ SQRT_F32_VPAIR vpair_sqrtv8sf2 {mma}
+
+ double __builtin_sqrt_f64_scalar (double);
+ SQRT_F64_SCALAR nothing {}
+ vd __builtin_sqrt_f64_vector (vd);
+ SQRT_F64_VECTOR nothing {}
+ v256 __builtin_sqrt_f64_vpair (v256);
+ SQRT_F64_VPAIR vpair_sqrtv4df2 {mma}
+
+ float __builtin_sub_f32_scalar (float, float);
+ SUB_F32_SCALAR nothing {}
+ vf __builtin_sub_f32_vector (vf, vf);
+ SUB_F32_VECTOR nothing {}
+ v256 __builtin_sub_f32_vpair (v256, v256);
+ SUB_F32_VPAIR vpair_subv8sf3 {mma}
+
+ double __builtin_sub_f64_scalar (double, double);
+ SUB_F64_SCALAR nothing {}
+ vd __builtin_sub_f64_vector (vd, vd);
+ SUB_F64_VECTOR nothing {}
+ v256 __builtin_sub_f64_vpair (v256, v256);
+ SUB_F64_VPAIR vpair_subv4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index 470d718efde..62c2432e8ff 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6187,3 +6187,181 @@
VUPKLSW VUPKLSW_DEPR1
vbll __builtin_vec_vupklsw (vbi);
VUPKLSW VUPKLSW_DEPR2
+
+;; Overloaded floating point built-in functions
+
+[ABS_F32, SKIP, __builtin_abs_f32]
+ float __builtin_abs_f32 (float);
+ ABS_F32_SCALAR
+ vf __builtin_abs_f32 (vf);
+ ABS_F32_VECTOR
+ v256 __builtin_abs_f32 (v256);
+ ABS_F32_VPAIR
+
+[ABS_F64, SKIP, __builtin_abs_f64]
+ double __builtin_abs_f64 (double);
+ ABS_F64_SCALAR
+ vd __builtin_abs_f64 (vd);
+ ABS_F64_VECTOR
+ v256 __builtin_abs_f64 (v256);
+ ABS_F64_VPAIR
+
+[ADD_F32, SKIP, __builtin_add_f32]
+ float __builtin_add_f32 (float, float);
+ ADD_F32_SCALAR
+ vf __builtin_add_f32 (vf, vf);
+ ADD_F32_VECTOR
+ v256 __builtin_add_f32 (v256, v256);
+ ADD_F32_VPAIR
+
+[ADD_F64, SKIP, __builtin_add_f64]
+ double __builtin_add_f64 (double, double);
+ ADD_F64_SCALAR
+ vd __builtin_add_f64 (vd, vd);
+ ADD_F64_VECTOR
+ v256 __builtin_add_f64 (v256, v256);
+ ADD_F64_VPAIR
+
+[COPYSIGN_F32, SKIP, __builtin_copysign_f32]
+ float __builtin_copysign_f32 (float, float);
+ COPYSIGN_F32_SCALAR
+ vf __builtin_copysign_f32 (vf, vf);
+ COPYSIGN_F32_VECTOR
+ v256 __builtin_copysign_f32 (v256, v256);
+ COPYSIGN_F32_VPAIR
+
+[COPYSIGN_F64, SKIP, __builtin_copysign_f64]
+ double __builtin_copysign_f64 (double, double);
+ COPYSIGN_F64_SCALAR
+ vd __builtin_copysign_f64 (vd, vd);
+ COPYSIGN_F64_VECTOR
+ v256 __builtin_copysign_f64 (v256, v256);
+ COPYSIGN_F64_VPAIR
+
+[DIV_F32, SKIP, __builtin_div_f32]
+ float __builtin_div_f32 (float, float);
+ DIV_F32_SCALAR
+ vf __builtin_div_f32 (vf, vf);
+ DIV_F32_VECTOR
+ v256 __builtin_div_f32 (v256, v256);
+ DIV_F32_VPAIR
+
+[DIV_F64, SKIP, __builtin_div_f64]
+ double __builtin_div_f64 (double, double);
+ DIV_F64_SCALAR
+ vd __builtin_div_f64 (vd, vd);
+ DIV_F64_VECTOR
+ v256 __builtin_div_f64 (v256, v256);
+ DIV_F64_VPAIR
+
+[FMA_F32, SKIP, __builtin_fma_f32]
+ float __builtin_fma_f32 (float, float, float);
+ FMA_F32_SCALAR
+ vf __builtin_fma_f32 (vf, vf, vf);
+ FMA_F32_VECTOR
+ v256 __builtin_fma_f32 (v256, v256, v256);
+ FMA_F32_VPAIR
+
+[FMA_F64, SKIP, __builtin_fma_f64]
+ double __builtin_fma_f64 (double, double, double);
+ FMA_F64_SCALAR
+ vd __builtin_fma_f64 (vd, vd, vd);
+ FMA_F64_VECTOR
+ v256 __builtin_fma_f64 (v256, v256, v256);
+ FMA_F64_VPAIR
+
+[MULT_F32, SKIP, __builtin_mult_f32]
+ float __builtin_mult_f32 (float, float);
+ MULT_F32_SCALAR
+ vf __builtin_mult_f32 (vf, vf);
+ MULT_F32_VECTOR
+ v256 __builtin_mult_f32 (v256, v256);
+ MULT_F32_VPAIR
+
+[MULT_F64, SKIP, __builtin_mult_f64]
+ double __builtin_mult_f64 (double, double);
+ MULT_F64_SCALAR
+ vd __builtin_mult_f64 (vd, vd);
+ MULT_F64_VECTOR
+ v256 __builtin_mult_f64 (v256, v256);
+ MULT_F64_VPAIR
+
+[NEG_F32, SKIP, __builtin_neg_f32]
+ float __builtin_neg_f32 (float);
+ NEG_F32_SCALAR
+ vf __builtin_neg_f32 (vf);
+ NEG_F32_VECTOR
+ v256 __builtin_neg_f32 (v256);
+ NEG_F32_VPAIR
+
+[NEG_F64, SKIP, __builtin_neg_f64]
+ double __builtin_neg_f64 (double);
+ NEG_F64_SCALAR
+ vd __builtin_neg_f64 (vd);
+ NEG_F64_VECTOR
+ v256 __builtin_neg_f64 (v256);
+ NEG_F64_VPAIR
+
+[REDUCE_F32, SKIP, __builtin_reduce_f32]
+ float __builtin_reduce_f32 (float);
+ REDUCE_F32_SCALAR
+ float __builtin_reduce_f32 (vf);
+ REDUCE_F32_VECTOR
+ float __builtin_reduce_f32 (v256);
+ REDUCE_F32_VPAIR
+
+[REDUCE_F64, SKIP, __builtin_reduce_f64]
+ double __builtin_reduce_f64 (double);
+ REDUCE_F64_SCALAR
+ double __builtin_reduce_f64 (vd);
+ REDUCE_F64_VECTOR
+ double __builtin_reduce_f64 (v256);
+ REDUCE_F64_VPAIR
+
+[SMAX_F32, SKIP, __builtin_smax_f32]
+ float __builtin_smax_f32 (float, float);
+ SMAX_F32_SCALAR
+ vf __builtin_smax_f32 (vf, vf);
+ SMAX_F32_VECTOR
+ v256 __builtin_smax_f32 (v256, v256);
+ SMAX_F32_VPAIR
+
+[SMAX_F64, SKIP, __builtin_smax_f64]
+ double __builtin_smax_f64 (double, double);
+ SMAX_F64_SCALAR
+ vd __builtin_smax_f64 (vd, vd);
+ SMAX_F64_VECTOR
+ v256 __builtin_smax_f64 (v256, v256);
+ SMAX_F64_VPAIR
+
+[SMIN_F32, SKIP, __builtin_smin_f32]
+ float __builtin_smin_f32 (float, float);
+ SMIN_F32_SCALAR
+ vf __builtin_smin_f32 (vf, vf);
+ SMIN_F32_VECTOR
+ v256 __builtin_smin_f32 (v256, v256);
+ SMIN_F32_VPAIR
+
+[SMIN_F64, SKIP, __builtin_smin_f64]
+ double __builtin_smin_f64 (double, double);
+ SMIN_F64_SCALAR
+ vd __builtin_smin_f64 (vd, vd);
+ SMIN_F64_VECTOR
+ v256 __builtin_smin_f64 (v256, v256);
+ SMIN_F64_VPAIR
+
+[SUB_F32, SKIP, __builtin_sub_f32]
+ float __builtin_sub_f32 (float, float);
+ SUB_F32_SCALAR
+ vf __builtin_sub_f32 (vf, vf);
+ SUB_F32_VECTOR
+ v256 __builtin_sub_f32 (v256, v256);
+ SUB_F32_VPAIR
+
+[SUB_F64, SKIP, __builtin_sub_f64]
+ double __builtin_sub_f64 (double, double);
+ SUB_F64_SCALAR
+ vd __builtin_sub_f64 (vd, vd);
+ SUB_F64_VECTOR
+ v256 __builtin_sub_f64 (v256, v256);
+ SUB_F64_VPAIR
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
new file mode 100644
index 00000000000..400a54f2fd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using scalar float. */
+
+#include "fp-overload.h"
+
+TEST (float, float, flt, 32)
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfadds\M|\mxsaddsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmadds\M|\mxsmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmsubs\M|\mxsmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmuls\M|\mxsmulsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadds\M|\mxsmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsubs\M|\mxsnmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfsubs\M|\mxssubsp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
new file mode 100644
index 00000000000..14f76d8a8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using vector float. */
+
+#include "fp-overload.h"
+
+TEST (vector float, float, vect, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
new file mode 100644
index 00000000000..466f056cf9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using __vector_pair. */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, float, vpair, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M} 2 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
new file mode 100644
index 00000000000..28e7c91c77c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using scalar double. */
+
+#include "fp-overload.h"
+
+TEST (double, double, dbl, 64)
+
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfadd\M|\mxsadddp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmadd\M|\mxsmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmsub\M|\mxsmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmul\M|\mxsmuldp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadd\M|\mxsmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsub\M|\mxsmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfsub\M|\mxssubdp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
new file mode 100644
index 00000000000..806fc655b44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using vector double. */
+
+#include "fp-overload.h"
+
+TEST (vector double, double, vect, 64)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
new file mode 100644
index 00000000000..7dd0613bf88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using __vector_pair. */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, double, vpair, 64)
+
+/* { dg-final { scan-assembler-times {\mxvabsdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mbl\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload.h b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
new file mode 100644
index 00000000000..a1ce5f83765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
@@ -0,0 +1,85 @@
+/* Common code to test the floating point overload functions. */
+
+#define TEST(TYPE, SCALAR, TYPE_STR, SIZE) \
+ \
+void \
+do_add_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_add_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_sub_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_sub_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_mult_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r) \
+{ \
+ *p = __builtin_mult_f ## SIZE (*q, *r); \
+} \
+ \
+void \
+do_neg_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_neg_f ## SIZE (*q); \
+} \
+ \
+void \
+do_abs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_abs_f ## SIZE (*q); \
+} \
+ \
+void \
+do_nabs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q) \
+{ \
+ *p = __builtin_neg_f ## SIZE (__builtin_abs_f ## SIZE (*q)); \
+} \
+ \
+void \
+do_fma_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ *p = __builtin_fma_f ## SIZE (*q, *r, *s); \
+} \
+ \
+void \
+do_fms_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE neg_s = __builtin_neg_f ## SIZE (*s); \
+ *p = __builtin_fma_f ## SIZE (*q, *r, neg_s); \
+} \
+ \
+void \
+do_nfma_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE f = __builtin_fma_f ## SIZE (*q, *r, *s); \
+ *p = __builtin_neg_f ## SIZE (f); \
+} \
+ \
+void \
+do_nfms_ ## TYPE_STR ## _f ## SIZE (TYPE *p, \
+ TYPE *q, \
+ TYPE *r, \
+ TYPE *s) \
+{ \
+ TYPE neg_s = __builtin_neg_f ## SIZE (*s); \
+ TYPE f = __builtin_fma_f ## SIZE (*q, *r, neg_s); \
+ *p = __builtin_neg_f ## SIZE (f); \
+} \
+ \
+void \
+do_reduce_ ## TYPE_STR ## _f ## SIZE (SCALAR *p, TYPE *q) \
+{ \
+ *p = __builtin_reduce_f ## SIZE (*q); \
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-07-28 6:11 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-28 6:11 [gcc(refs/users/meissner/heads/work127-vpair)] Rename insns; Add reduction; Add overloads Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).