public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work127-vpair)] Rename insns; Add reduction; Add overloads.
@ 2023-07-28  6:11 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-07-28  6:11 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:30d103db121df0f0b6c6bfd955134f721a168765

commit 30d103db121df0f0b6c6bfd955134f721a168765
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Jul 28 02:06:16 2023 -0400

    Rename insns; Add reduction; Add overloads.
    
    2023-07-28  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (UNSPEC_REDUCE_F32): New unspec.
            (UNSPEC_REDUCE_F64): New unspec.
            (reduce_v4sf): New insn.
            (reduce_v8sf): Likewise.
            (reduce_v2df): Likewise.
            (reduce_v4df): Likewise.
            * config/rs6000/rs6000-builtin.cc (fold_builtin_overload_fp): New helper
            function.
            (rs6000_gimple_fold_builtin): Add support for the fp overloaded built-in
            functions.
            * config/rs6000/rs6000-builtins.def (__builtin*_fp_*): Rename fp
            overloadd built-in functions.  Add support for reduction built-ins.
            * config/rs6000/rs6000-overload.def (__builtin_*_f32): Add overloded fp
            built-in functions.
            (__builtin_*_f64): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/fp-overload-f32-scalar.c: New test.
            * gcc.target/powerpc/fp-overload-f32-vector.c: Likewise.
            * gcc.target/powerpc/fp-overload-f32-vpair.c: Likewise.
            * gcc.target/powerpc/fp-overload-f64-scalar.c: Likewise.
            * gcc.target/powerpc/fp-overload-f64-vector.c: Likewise.
            * gcc.target/powerpc/fp-overload-f64-vpair.c: Likewise.
            * gcc.target/powerpc/fp-overload.h: New include file for fp overloaded
            built-in functions.

Diff:
---
 gcc/config/rs6000/mma.md                           | 109 +++++++++
 gcc/config/rs6000/rs6000-builtin.cc                | 105 +++++++++
 gcc/config/rs6000/rs6000-builtins.def              | 261 ++++++++++++++-------
 gcc/config/rs6000/rs6000-overload.def              | 178 ++++++++++++++
 .../gcc.target/powerpc/fp-overload-f32-scalar.c    |  21 ++
 .../gcc.target/powerpc/fp-overload-f32-vector.c    |  23 ++
 .../gcc.target/powerpc/fp-overload-f32-vpair.c     |  23 ++
 .../gcc.target/powerpc/fp-overload-f64-scalar.c    |  22 ++
 .../gcc.target/powerpc/fp-overload-f64-vector.c    |  22 ++
 .../gcc.target/powerpc/fp-overload-f64-vpair.c     |  22 ++
 gcc/testsuite/gcc.target/powerpc/fp-overload.h     |  85 +++++++
 11 files changed, 782 insertions(+), 89 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 752661ca5c9..a9621bad39a 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -93,6 +93,8 @@
    UNSPEC_MMA_XXMTACC
    UNSPEC_VPAIR_V4DF
    UNSPEC_VPAIR_V8SF
+   UNSPEC_REDUCE_F32
+   UNSPEC_REDUCE_F64
   ])
 
 (define_c_enum "unspecv"
@@ -1241,3 +1243,110 @@
   operands[11] = gen_rtx_REG (<VPAIR_SUBTYPE>mode, reg3 + 1);
 }
   [(set_attr "length" "8")])
+
+;; Reduction for a V4SF vector
+(define_insn_and_split "reduce_v4sf"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "v")]
+		   UNSPEC_REDUCE_F32))
+   (clobber (match_scratch:V4SF 2 "=&v"))
+   (clobber (match_scratch:V4SF 3 "=&v"))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+
+  emit_insn (gen_altivec_vsldoi_v4sf (tmp1, op1, op1, GEN_INT (8)));
+  emit_insn (gen_addv4sf3 (tmp1, op1, tmp1));
+  emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (4)));
+  emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+  DONE;
+}
+  [(set_attr "length" "24")])
+
+;; Reduction for a pair of V4SF vectors
+(define_insn_and_split "reduce_v8sf"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+	(unspec:SF [(match_operand:OO 1 "vsx_register_operand" "v")]
+		   UNSPEC_REDUCE_F32))
+   (clobber (match_scratch:V4SF 2 "=&v"))
+   (clobber (match_scratch:V4SF 3 "=&v"))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx tmp1 = operands[2];
+  rtx tmp2 = operands[3];
+  unsigned r = reg_or_subregno (op1);
+  rtx op1_hi = gen_rtx_REG (V4SFmode, r);
+  rtx op1_lo = gen_rtx_REG (V4SFmode, r + 1);
+
+  emit_insn (gen_addv4sf3 (tmp1, op1_hi, op1_lo));
+  emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (8)));
+  emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+  emit_insn (gen_altivec_vsldoi_v4sf (tmp1, tmp2, tmp2, GEN_INT (4)));
+  emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2));
+  emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2));
+  DONE;
+}
+  [(set_attr "length" "24")])
+
+;; Reduction for a V2DF vector
+(define_insn_and_split "reduce_v2df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+	(unspec:DF [(match_operand:V2DF 1 "vsx_register_operand" "wa")]
+		   UNSPEC_REDUCE_F64))
+   (clobber (match_scratch:DF 2 "=&wa"))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+	(vec_select:DF (match_dup 1)
+		       (parallel [(match_dup 3)])))
+   (set (match_dup 0)
+	(plus:DF (match_dup 4)
+		 (match_dup 2)))]
+{
+  unsigned reg1 = reg_or_subregno (operands[1]);
+
+  operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+  operands[4] = gen_rtx_REG (DFmode, reg1);
+})
+
+;; Reduction for a pair of V2DF vectors
+(define_insn_and_split "reduce_v4df"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=&wa")
+	(unspec:DF [(match_operand:OO 1 "vsx_register_operand" "wa")]
+		   UNSPEC_REDUCE_F64))
+   (clobber (match_scratch:DF 2 "=&wa"))
+   (clobber (match_scratch:V2DF 3 "=&wa"))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(plus:V2DF (match_dup 4)
+		   (match_dup 5)))
+   (set (match_dup 2)
+	(vec_select:DF (match_dup 3)
+		       (parallel [(match_dup 6)])))
+   (set (match_dup 0)
+	(plus:DF (match_dup 7)
+		 (match_dup 2)))]
+{
+  unsigned reg1 = REGNO (operands[1]);
+  unsigned reg3 = REGNO (operands[3]);
+
+  operands[4] = gen_rtx_REG (V2DFmode, reg1);
+  operands[5] = gen_rtx_REG (V2DFmode, reg1 + 1);
+  operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
+  operands[7] = gen_rtx_REG (DFmode, reg3);
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 86072ab6405..7ff250fcb0d 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1273,6 +1273,49 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
   return true;
 }
 
+/* Helper function to fold the overloaded fp functions for the scalar and
+   vector types that support the operation directly.  */
+
+static void
+fold_builtin_overload_fp (gimple_stmt_iterator *gsi,
+			  gimple *stmt,
+			  enum tree_code code,
+			  int nargs)
+{
+  location_t loc = gimple_location (stmt);
+  tree lhs = gimple_call_lhs (stmt);
+  tree t;
+
+  if (nargs == 1)
+    {
+      tree arg0 = gimple_call_arg (stmt, 0);
+      t = build1 (code, TREE_TYPE (lhs), arg0);
+    }
+
+  else if (nargs == 2)
+    {
+      tree arg0 = gimple_call_arg (stmt, 0);
+      tree arg1 = gimple_call_arg (stmt, 1);
+      t = build2 (code, TREE_TYPE (lhs), arg0, arg1);
+    }
+
+  else if (nargs == 3)
+    {
+      tree arg0 = gimple_call_arg (stmt, 0);
+      tree arg1 = gimple_call_arg (stmt, 1);
+      tree arg2 = gimple_call_arg (stmt, 2);
+      t = build3 (code, TREE_TYPE (lhs), arg0, arg1, arg2);
+    }
+
+  else
+    gcc_unreachable ();
+
+  gimple *g = gimple_build_assign (lhs, t);
+  gimple_set_location (g, loc);
+  gsi_replace (gsi, g, true);
+  return;
+}
+
 /* Fold a machine-dependent built-in in GIMPLE.  (For folding into
    a constant, use rs6000_fold_builtin.)  */
 bool
@@ -2241,6 +2284,68 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	return true;
       }
 
+    case RS6000_BIF_ABS_F32_SCALAR:
+    case RS6000_BIF_ABS_F32_VECTOR:
+    case RS6000_BIF_ABS_F64_SCALAR:
+    case RS6000_BIF_ABS_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1);
+      return true;
+
+    case RS6000_BIF_ADD_F32_SCALAR:
+    case RS6000_BIF_ADD_F32_VECTOR:
+    case RS6000_BIF_ADD_F64_SCALAR:
+    case RS6000_BIF_ADD_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2);
+      return true;
+
+    case RS6000_BIF_MULT_F32_SCALAR:
+    case RS6000_BIF_MULT_F32_VECTOR:
+    case RS6000_BIF_MULT_F64_SCALAR:
+    case RS6000_BIF_MULT_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2);
+      return true;
+
+    case RS6000_BIF_NEG_F32_SCALAR:
+    case RS6000_BIF_NEG_F32_VECTOR:
+    case RS6000_BIF_NEG_F64_SCALAR:
+    case RS6000_BIF_NEG_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1);
+      return true;
+
+    case RS6000_BIF_REDUCE_F32_SCALAR:
+    case RS6000_BIF_REDUCE_F64_SCALAR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	g = gimple_build_assign (lhs, arg0);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_SMAX_F32_SCALAR:
+    case RS6000_BIF_SMAX_F32_VECTOR:
+    case RS6000_BIF_SMAX_F64_SCALAR:
+    case RS6000_BIF_SMAX_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2);
+      return true;
+
+    case RS6000_BIF_SMIN_F32_SCALAR:
+    case RS6000_BIF_SMIN_F32_VECTOR:
+    case RS6000_BIF_SMIN_F64_SCALAR:
+    case RS6000_BIF_SMIN_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2);
+      return true;
+
+
+    case RS6000_BIF_SUB_F32_SCALAR:
+    case RS6000_BIF_SUB_F32_VECTOR:
+    case RS6000_BIF_SUB_F64_SCALAR:
+    case RS6000_BIF_SUB_F64_VECTOR:
+      fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2);
+      return true;
+
     default:
       if (TARGET_DEBUG_BUILTIN)
 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index ecd438642a4..170f83dd907 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4117,92 +4117,175 @@
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
     STXVP nothing {mma,pair}
 
-  v256 __builtin_vpair_abs_v4df (v256);
-    VPAIR_ABS_V4DF vpair_absv4df2 {mma}
-
-  v256 __builtin_vpair_neg_v4df (v256);
-    VPAIR_NEG_V4DF vpair_negv4df2 {mma}
-
-  v256 __builtin_vpair_nabs_v4df (v256);
-    VPAIR_NABS_V4DF vpair_nabsv4df2 {mma}
-
-  v256 __builtin_vpair_sqrt_v4df (v256);
-    VPAIR_SQRT_V4DF vpair_sqrtv4df2 {mma}
-
-  v256 __builtin_vpair_add_v4df (v256, v256);
-    VPAIR_ADD_V4DF vpair_addv4df3 {mma}
-
-  v256 __builtin_vpair_sub_v4df (v256, v256);
-    VPAIR_SUB_V4DF vpair_subv4df3 {mma}
-
-  v256 __builtin_vpair_mul_v4df (v256, v256);
-    VPAIR_MUL_V4DF vpair_mulv4df3 {mma}
-
-  v256 __builtin_vpair_div_v4df (v256, v256);
-    VPAIR_DIV_V4DF vpair_divv4df3 {mma}
-
-  v256 __builtin_vpair_copysign_v4df (v256, v256);
-    VPAIR_COPYSIGN_V4DF vpair_copysignv4df3 {mma}
-
-  v256 __builtin_vpair_smin_v4df (v256, v256);
-    VPAIR_SMIN_V4DF vpair_sminv4df3 {mma}
-
-  v256 __builtin_vpair_smax_v4df (v256, v256);
-    VPAIR_SMAX_V4DF vpair_smaxv4df3 {mma}
-
-  v256 __builtin_vpair_fma_v4df (v256, v256, v256);
-    VPAIR_FMA_V4DF vpair_fmav4df4 {mma}
-
-  v256 __builtin_vpair_fms_v4df (v256, v256, v256);
-    VPAIR_FMS_V4DF vpair_fmsv4df4 {mma}
-
-  v256 __builtin_vpair_nfma_v4df (v256, v256, v256);
-    VPAIR_NFMA_V4DF vpair_nfmav4df4 {mma}
-
-  v256 __builtin_vpair_nfms_v4df (v256, v256, v256);
-    VPAIR_NFMS_V4DF vpair_nfmsv4df4 {mma}
-
-  v256 __builtin_vpair_abs_v8sf (v256);
-    VPAIR_ABS_V8SF vpair_absv8sf2 {mma}
-
-  v256 __builtin_vpair_neg_v8sf (v256);
-    VPAIR_NEG_V8SF vpair_negv8sf2 {mma}
-
-  v256 __builtin_vpair_nabs_v8sf (v256);
-    VPAIR_NABS_V8SF vpair_nabsv8sf2 {mma}
-
-  v256 __builtin_vpair_sqrt_v8sf (v256);
-    VPAIR_SQRT_V8SF vpair_sqrtv8sf2 {mma}
-
-  v256 __builtin_vpair_add_v8sf (v256, v256);
-    VPAIR_ADD_V8SF vpair_addv8sf3 {mma}
-
-  v256 __builtin_vpair_sub_v8sf (v256, v256);
-    VPAIR_SUB_V8SF vpair_subv8sf3 {mma}
-
-  v256 __builtin_vpair_mul_v8sf (v256, v256);
-    VPAIR_MUL_V8SF vpair_mulv8sf3 {mma}
-
-  v256 __builtin_vpair_div_v8sf (v256, v256);
-    VPAIR_DIV_V8SF vpair_divv8sf3 {mma}
-
-  v256 __builtin_vpair_copysign_v8sf (v256, v256);
-    VPAIR_COPYSIGN_V8SF vpair_copysignv8sf3 {mma}
-
-  v256 __builtin_vpair_fma_v8sf (v256, v256, v256);
-    VPAIR_FMA_V8SF vpair_fmav8sf4 {mma}
-
-  v256 __builtin_vpair_fms_v8sf (v256, v256, v256);
-    VPAIR_FMS_V8SF vpair_fmsv8sf4 {mma}
-
-  v256 __builtin_vpair_nfma_v8sf (v256, v256, v256);
-    VPAIR_NFMA_V8SF vpair_nfmav8sf4 {mma}
-
-  v256 __builtin_vpair_nfms_v8sf (v256, v256, v256);
-    VPAIR_NFMS_V8SF vpair_nfmsv8sf4 {mma}
-
-  v256 __builtin_vpair_smin_v8sf (v256, v256);
-    VPAIR_SMIN_V8SF vpair_sminv8sf3 {mma}
-
-  v256 __builtin_vpair_smax_v8sf (v256, v256);
-    VPAIR_SMAX_V8SF vpair_smaxv8sf3 {mma}
+; Builtins for overload floating point operations, including scalar and
+; 128-bit vector codes that are converted into direct operations.
+; The 256 codes that are kept in vector pairs insns that are split
+; into separate operations after register allocation.
+
+  float __builtin_abs_f32_scalar (float);
+    ABS_F32_SCALAR nothing {}
+  vf __builtin_abs_f32_vector (vf);
+    ABS_F32_VECTOR nothing {}
+  v256 __builtin_abs_f32_vpair (v256);
+    ABS_F32_VPAIR vpair_absv8sf2 {mma}
+
+  double __builtin_abs_f64_scalar (double);
+    ABS_F64_SCALAR nothing {}
+  vd __builtin_abs_f64_vector (vd);
+    ABS_F64_VECTOR nothing {}
+  v256 __builtin_abs_f64_vpair (v256);
+    ABS_F64_VPAIR vpair_absv4df2 {mma}
+
+  float __builtin_add_f32_scalar (float, float);
+    ADD_F32_SCALAR nothing {}
+  vf __builtin_add_f32_vector (vf, vf);
+    ADD_F32_VECTOR nothing {}
+  v256 __builtin_add_f32_vpair (v256, v256);
+    ADD_F32_VPAIR vpair_addv8sf3 {mma}
+
+  double __builtin_add_f64_scalar (double, double);
+    ADD_F64_SCALAR nothing {}
+  vd __builtin_add_f64_vector (vd, vd);
+    ADD_F64_VECTOR nothing {}
+  v256 __builtin_add_f64_vpair (v256, v256);
+    ADD_F64_VPAIR vpair_addv4df3 {mma}
+
+  float __builtin_copysign_f32_scalar (float, float);
+    COPYSIGN_F32_SCALAR copysignsf3_fcpsgn {}
+  vf __builtin_copysign_f32_vector (vf, vf);
+    COPYSIGN_F32_VECTOR vsx_copysignv4sf3 {}
+  v256 __builtin_copysign_f32_vpair (v256, v256);
+    COPYSIGN_F32_VPAIR vpair_copysignv8sf3 {mma}
+
+  double __builtin_copysign_f64_scalar (double, double);
+    COPYSIGN_F64_SCALAR copysigndf3_fcpsgn {}
+  vd __builtin_copysign_f64_vector (vd, vd);
+    COPYSIGN_F64_VECTOR vsx_copysignv2df3 {}
+  v256 __builtin_copysign_f64_vpair (v256, v256);
+    COPYSIGN_F64_VPAIR vpair_copysignv4df3 {mma}
+
+  float __builtin_div_f32_scalar (float, float);
+    DIV_F32_SCALAR divsf3 {}
+  vf __builtin_div_f32_vector (vf, vf);
+    DIV_F32_VECTOR divv4sf3 {}
+  v256 __builtin_div_f32_vpair (v256, v256);
+    DIV_F32_VPAIR vpair_divv8sf3 {mma}
+
+  double __builtin_div_f64_scalar (double, double);
+    DIV_F64_SCALAR divdf3 {}
+  vd __builtin_div_f64_vector (vd, vd);
+    DIV_F64_VECTOR divv2df3 {}
+  v256 __builtin_div_f64_vpair (v256, v256);
+    DIV_F64_VPAIR vpair_divv4df3 {mma}
+
+  float __builtin_fma_f32_scalar (float, float, float);
+    FMA_F32_SCALAR fmasf4 {}
+  vf __builtin_fma_f32_vector (vf, vf, vf);
+    FMA_F32_VECTOR fmav4sf4 {}
+  v256 __builtin_fma_v8sf (v256, v256, v256);
+    FMA_F32_VPAIR vpair_fmav8sf4 {mma}
+
+  double __builtin_fma_f64_scalar (double, double, double);
+    FMA_F64_SCALAR fmadf4 {}
+  vd __builtin_fma_f64_vector (vd, vd, vd);
+    FMA_F64_VECTOR fmav2df4 {}
+  v256 __builtin_fma_v4df (v256, v256, v256);
+    FMA_F64_VPAIR vpair_fmav4df4 {mma}
+
+  float __builtin_mult_f32_scalar (float, float);
+    MULT_F32_SCALAR nothing {}
+  vf __builtin_mult_f32_vector (vf, vf);
+    MULT_F32_VECTOR nothing {}
+  v256 __builtin_mult_f32_vpair (v256, v256);
+    MULT_F32_VPAIR vpair_mulv8sf3 {mma}
+
+  double __builtin_mult_f64_scalar (double, double);
+    MULT_F64_SCALAR nothing {}
+  vd __builtin_mult_f64_vector (vd, vd);
+    MULT_F64_VECTOR nothing {}
+  v256 __builtin_mult_f64_vpair (v256, v256);
+    MULT_F64_VPAIR vpair_mulv4df3 {mma}
+
+  float __builtin_neg_f32_scalar (float);
+    NEG_F32_SCALAR nothing {}
+  vf __builtin_neg_f32_vector (vf);
+    NEG_F32_VECTOR nothing {}
+  v256 __builtin_neg_f32_vpair (v256);
+    NEG_F32_VPAIR vpair_negv8sf2 {mma}
+
+  double __builtin_neg_f64_scalar (double);
+    NEG_F64_SCALAR nothing {}
+  vd __builtin_neg_f64_vector (vd);
+    NEG_F64_VECTOR nothing {}
+  v256 __builtin_neg_f64_vpair (v256);
+    NEG_F64_VPAIR vpair_negv4df2 {mma}
+
+  float __builtin_reduce_f32_scalar (float);
+    REDUCE_F32_SCALAR nothing {}
+  float __builtin_reduce_f32_vector (vf);
+    REDUCE_F32_VECTOR reduce_v4sf {}
+  float __builtin_reduce_f32_vpair (v256);
+    REDUCE_F32_VPAIR reduce_v8sf {mma,pair}
+
+  double __builtin_reduce_f64_scalar (double);
+    REDUCE_F64_SCALAR nothing {}
+  double __builtin_reduce_f64_vector (vd);
+    REDUCE_F64_VECTOR reduce_v2df {}
+  double __builtin_reduce_f64_vpair (v256);
+    REDUCE_F64_VPAIR reduce_v4df {mma,pair}
+
+  float __builtin_smax_f32_scalar (float, float);
+    SMAX_F32_SCALAR nothing {}
+  vf __builtin_smax_f32_vector (vf, vf);
+    SMAX_F32_VECTOR nothing {}
+  v256 __builtin_smax_f32_vpair (v256, v256);
+    SMAX_F32_VPAIR vpair_smaxv8sf3 {mma}
+
+  double __builtin_smax_f64_scalar (double, double);
+    SMAX_F64_SCALAR nothing {}
+  vd __builtin_smax_f64_vector (vd, vd);
+    SMAX_F64_VECTOR nothing {}
+  v256 __builtin_smax_f64_vpair (v256, v256);
+    SMAX_F64_VPAIR vpair_smaxv4df3 {mma}
+
+  float __builtin_smin_f32_scalar (float, float);
+    SMIN_F32_SCALAR nothing {}
+  vf __builtin_smin_f32_vector (vf, vf);
+    SMIN_F32_VECTOR nothing {}
+  v256 __builtin_smin_f32_vpair (v256, v256);
+    SMIN_F32_VPAIR vpair_sminv8sf3 {mma}
+
+  double __builtin_smin_f64_scalar (double, double);
+    SMIN_F64_SCALAR nothing {}
+  vd __builtin_smin_f64_vector (vd, vd);
+    SMIN_F64_VECTOR nothing {}
+  v256 __builtin_smin_f64_vpair (v256, v256);
+    SMIN_F64_VPAIR vpair_sminv4df3 {mma}
+
+  float __builtin_sqrt_f32_scalar (float);
+    SQRT_F32_SCALAR nothing {}
+  vf __builtin_sqrt_f32_vector (vf);
+    SQRT_F32_VECTOR nothing {}
+  v256 __builtin_sqrt_f32_vpair (v256);
+    SQRT_F32_VPAIR vpair_sqrtv8sf2 {mma}
+
+  double __builtin_sqrt_f64_scalar (double);
+    SQRT_F64_SCALAR nothing {}
+  vd __builtin_sqrt_f64_vector (vd);
+    SQRT_F64_VECTOR nothing {}
+  v256 __builtin_sqrt_f64_vpair (v256);
+    SQRT_F64_VPAIR vpair_sqrtv4df2 {mma}
+
+  float __builtin_sub_f32_scalar (float, float);
+    SUB_F32_SCALAR nothing {}
+  vf __builtin_sub_f32_vector (vf, vf);
+    SUB_F32_VECTOR nothing {}
+  v256 __builtin_sub_f32_vpair (v256, v256);
+    SUB_F32_VPAIR vpair_subv8sf3 {mma}
+
+  double __builtin_sub_f64_scalar (double, double);
+    SUB_F64_SCALAR nothing {}
+  vd __builtin_sub_f64_vector (vd, vd);
+    SUB_F64_VECTOR nothing {}
+  v256 __builtin_sub_f64_vpair (v256, v256);
+    SUB_F64_VPAIR vpair_subv4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index 470d718efde..62c2432e8ff 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6187,3 +6187,181 @@
     VUPKLSW  VUPKLSW_DEPR1
   vbll __builtin_vec_vupklsw (vbi);
     VUPKLSW  VUPKLSW_DEPR2
+
+;; Overloaded floating point built-in functions
+
+[ABS_F32, SKIP, __builtin_abs_f32]
+  float __builtin_abs_f32 (float);
+    ABS_F32_SCALAR
+  vf __builtin_abs_f32 (vf);
+    ABS_F32_VECTOR
+  v256 __builtin_abs_f32 (v256);
+    ABS_F32_VPAIR
+
+[ABS_F64, SKIP, __builtin_abs_f64]
+  double __builtin_abs_f64 (double);
+    ABS_F64_SCALAR
+  vd __builtin_abs_f64 (vd);
+    ABS_F64_VECTOR
+  v256 __builtin_abs_f64 (v256);
+    ABS_F64_VPAIR
+
+[ADD_F32, SKIP, __builtin_add_f32]
+  float __builtin_add_f32 (float, float);
+    ADD_F32_SCALAR
+  vf __builtin_add_f32 (vf, vf);
+    ADD_F32_VECTOR
+  v256 __builtin_add_f32 (v256, v256);
+    ADD_F32_VPAIR
+
+[ADD_F64, SKIP, __builtin_add_f64]
+  double __builtin_add_f64 (double, double);
+    ADD_F64_SCALAR
+  vd __builtin_add_f64 (vd, vd);
+    ADD_F64_VECTOR
+  v256 __builtin_add_f64 (v256, v256);
+    ADD_F64_VPAIR
+
+[COPYSIGN_F32, SKIP, __builtin_copysign_f32]
+  float __builtin_copysign_f32 (float, float);
+    COPYSIGN_F32_SCALAR
+  vf __builtin_copysign_f32 (vf, vf);
+    COPYSIGN_F32_VECTOR
+  v256 __builtin_copysign_f32 (v256, v256);
+    COPYSIGN_F32_VPAIR
+
+[COPYSIGN_F64, SKIP, __builtin_copysign_f64]
+  double __builtin_copysign_f64 (double, double);
+    COPYSIGN_F64_SCALAR
+  vd __builtin_copysign_f64 (vd, vd);
+    COPYSIGN_F64_VECTOR
+  v256 __builtin_copysign_f64 (v256, v256);
+    COPYSIGN_F64_VPAIR
+
+[DIV_F32, SKIP, __builtin_div_f32]
+  float __builtin_div_f32 (float, float);
+    DIV_F32_SCALAR
+  vf __builtin_div_f32 (vf, vf);
+    DIV_F32_VECTOR
+  v256 __builtin_div_f32 (v256, v256);
+    DIV_F32_VPAIR
+
+[DIV_F64, SKIP, __builtin_div_f64]
+  double __builtin_div_f64 (double, double);
+    DIV_F64_SCALAR
+  vd __builtin_div_f64 (vd, vd);
+    DIV_F64_VECTOR
+  v256 __builtin_div_f64 (v256, v256);
+    DIV_F64_VPAIR
+
+[FMA_F32, SKIP, __builtin_fma_f32]
+  float __builtin_fma_f32 (float, float, float);
+    FMA_F32_SCALAR
+  vf __builtin_fma_f32 (vf, vf, vf);
+    FMA_F32_VECTOR
+  v256 __builtin_fma_f32 (v256, v256, v256);
+    FMA_F32_VPAIR
+
+[FMA_F64, SKIP, __builtin_fma_f64]
+  double __builtin_fma_f64 (double, double, double);
+    FMA_F64_SCALAR
+  vd __builtin_fma_f64 (vd, vd, vd);
+    FMA_F64_VECTOR
+  v256 __builtin_fma_f64 (v256, v256, v256);
+    FMA_F64_VPAIR
+
+[MULT_F32, SKIP, __builtin_mult_f32]
+  float __builtin_mult_f32 (float, float);
+    MULT_F32_SCALAR
+  vf __builtin_mult_f32 (vf, vf);
+    MULT_F32_VECTOR
+  v256 __builtin_mult_f32 (v256, v256);
+    MULT_F32_VPAIR
+
+[MULT_F64, SKIP, __builtin_mult_f64]
+  double __builtin_mult_f64 (double, double);
+    MULT_F64_SCALAR
+  vd __builtin_mult_f64 (vd, vd);
+    MULT_F64_VECTOR
+  v256 __builtin_mult_f64 (v256, v256);
+    MULT_F64_VPAIR
+
+[NEG_F32, SKIP, __builtin_neg_f32]
+  float __builtin_neg_f32 (float);
+    NEG_F32_SCALAR
+  vf __builtin_neg_f32 (vf);
+    NEG_F32_VECTOR
+  v256 __builtin_neg_f32 (v256);
+    NEG_F32_VPAIR
+
+[NEG_F64, SKIP, __builtin_neg_f64]
+  double __builtin_neg_f64 (double);
+    NEG_F64_SCALAR
+  vd __builtin_neg_f64 (vd);
+    NEG_F64_VECTOR
+  v256 __builtin_neg_f64 (v256);
+    NEG_F64_VPAIR
+
+[REDUCE_F32, SKIP, __builtin_reduce_f32]
+  float __builtin_reduce_f32 (float);
+    REDUCE_F32_SCALAR
+  float __builtin_reduce_f32 (vf);
+    REDUCE_F32_VECTOR
+  float __builtin_reduce_f32 (v256);
+    REDUCE_F32_VPAIR
+
+[REDUCE_F64, SKIP, __builtin_reduce_f64]
+  double __builtin_reduce_f64 (double);
+    REDUCE_F64_SCALAR
+  double __builtin_reduce_f64 (vd);
+    REDUCE_F64_VECTOR
+  double __builtin_reduce_f64 (v256);
+    REDUCE_F64_VPAIR
+
+[SMAX_F32, SKIP, __builtin_smax_f32]
+  float __builtin_smax_f32 (float, float);
+    SMAX_F32_SCALAR
+  vf __builtin_smax_f32 (vf, vf);
+    SMAX_F32_VECTOR
+  v256 __builtin_smax_f32 (v256, v256);
+    SMAX_F32_VPAIR
+
+[SMAX_F64, SKIP, __builtin_smax_f64]
+  double __builtin_smax_f64 (double, double);
+    SMAX_F64_SCALAR
+  vd __builtin_smax_f64 (vd, vd);
+    SMAX_F64_VECTOR
+  v256 __builtin_smax_f64 (v256, v256);
+    SMAX_F64_VPAIR
+
+[SMIN_F32, SKIP, __builtin_smin_f32]
+  float __builtin_smin_f32 (float, float);
+    SMIN_F32_SCALAR
+  vf __builtin_smin_f32 (vf, vf);
+    SMIN_F32_VECTOR
+  v256 __builtin_smin_f32 (v256, v256);
+    SMIN_F32_VPAIR
+
+[SMIN_F64, SKIP, __builtin_smin_f64]
+  double __builtin_smin_f64 (double, double);
+    SMIN_F64_SCALAR
+  vd __builtin_smin_f64 (vd, vd);
+    SMIN_F64_VECTOR
+  v256 __builtin_smin_f64 (v256, v256);
+    SMIN_F64_VPAIR
+
+[SUB_F32, SKIP, __builtin_sub_f32]
+  float __builtin_sub_f32 (float, float);
+    SUB_F32_SCALAR
+  vf __builtin_sub_f32 (vf, vf);
+    SUB_F32_VECTOR
+  v256 __builtin_sub_f32 (v256, v256);
+    SUB_F32_VPAIR
+
+[SUB_F64, SKIP, __builtin_sub_f64]
+  double __builtin_sub_f64 (double, double);
+    SUB_F64_SCALAR
+  vd __builtin_sub_f64 (vd, vd);
+    SUB_F64_VECTOR
+  v256 __builtin_sub_f64 (v256, v256);
+    SUB_F64_VPAIR
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
new file mode 100644
index 00000000000..400a54f2fd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-scalar.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using scalar float.  */
+
+#include "fp-overload.h"
+
+TEST (float, float, flt, 32)
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M}          1 } } */
+/* { dg-final { scan-assembler-times {\mfadds\M|\mxsaddsp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfmadds\M|\mxsmadd[am]sp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mfmsubs\M|\mxsmsub[am]sp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mfmuls\M|\mxsmulsp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M}        1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M}          1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadds\M|\mxsmadd[am]sp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsubs\M|\mxsnmsub[am]sp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfsubs\M|\mxssubsp\M}         1 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}                          } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
new file mode 100644
index 00000000000..14f76d8a8f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vector.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using vector float.  */
+
+#include "fp-overload.h"
+
+TEST (vector float, float, vect, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M}      1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M}       1 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M}       3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M}       1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M}      1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M}       1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M}       1 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}              } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
new file mode 100644
index 00000000000..466f056cf9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f32-vpair.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f32 using __vector_pair.  */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, float, vpair, 32)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxscvspdp\M}      1 } } */
+/* { dg-final { scan-assembler-times {\mxvabssp\M}       2 } } */
+/* { dg-final { scan-assembler-times {\mxvaddsp\M}       5 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvmulsp\M}       2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabssp\M}      2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegsp\M}       2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]sp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubsp\M}       2 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}              } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
new file mode 100644
index 00000000000..28e7c91c77c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-scalar.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using scalar double.  */
+
+#include "fp-overload.h"
+
+TEST (double, double, dbl, 64)
+
+
+/* { dg-final { scan-assembler-times {\mfabs\M|\mxsabsdp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfadd\M|\mxsadddp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfmadd\M|\mxsmadd[am]dp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mfmsub\M|\mxsmsub[am]dp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mfmul\M|\mxsmuldp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfnabs\M|\mxsnabsdp\M}       1 } } */
+/* { dg-final { scan-assembler-times {\mfneg\M|\mxsnegdp\M}         1 } } */
+/* { dg-final { scan-assembler-times {\mfnmadd\M|\mxsmadd[am]dp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfnmsub\M|\mxsmsub[am]dp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mfsub\M|\mxssubdp\M}         1 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}                         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
new file mode 100644
index 00000000000..806fc655b44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vector.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using vector double.  */
+
+#include "fp-overload.h"
+
+TEST (vector double, double, vect, 64)
+
+/* { dg-final { scan-assembler-times {\mvsldoi\M}    1 } } */
+/* { dg-final { scan-assembler-times {\mxvabsdp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M}  1 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M}   1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M}   1 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}          } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
new file mode 100644
index 00000000000..7dd0613bf88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload-f64-vpair.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
+
+/* Test code generation for __builtin_<op>_f64 using __vector_pair.  */
+
+#include "fp-overload.h"
+
+TEST (__vector_pair, double, vpair, 64)
+
+/* { dg-final { scan-assembler-times {\mxvabsdp\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxvadddp\M}        3 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd[am]dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub[am]dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvmuldp\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxvnabsdp\M}       2 } } */
+/* { dg-final { scan-assembler-times {\mxvnegdp\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd[am]dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub[am]dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvsubdp\M}        2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M}       1 } } */
+/* { dg-final { scan-assembler-not   {\mbl\M}               } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fp-overload.h b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
new file mode 100644
index 00000000000..a1ce5f83765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fp-overload.h
@@ -0,0 +1,85 @@
+/* Common code to test the floating point overload functions.  */
+
+#define TEST(TYPE, SCALAR, TYPE_STR, SIZE)				\
+									\
+void									\
+do_add_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r)		\
+{									\
+  *p = __builtin_add_f ## SIZE (*q, *r);				\
+}									\
+									\
+void									\
+do_sub_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r)		\
+{									\
+  *p = __builtin_sub_f ## SIZE (*q, *r);				\
+}									\
+									\
+void									\
+do_mult_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q, TYPE *r)		\
+{									\
+  *p = __builtin_mult_f ## SIZE (*q, *r);				\
+}									\
+									\
+void									\
+do_neg_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q)			\
+{									\
+  *p = __builtin_neg_f ## SIZE (*q);					\
+}									\
+									\
+void									\
+do_abs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q)			\
+{									\
+  *p = __builtin_abs_f ## SIZE (*q);					\
+}									\
+									\
+void									\
+do_nabs_ ## TYPE_STR ## _f ## SIZE (TYPE *p, TYPE *q)			\
+{									\
+  *p = __builtin_neg_f ## SIZE (__builtin_abs_f ## SIZE (*q));		\
+}									\
+									\
+void									\
+do_fma_ ## TYPE_STR ## _f ## SIZE (TYPE *p,				\
+				   TYPE *q,				\
+				   TYPE *r,				\
+				   TYPE *s)				\
+{									\
+  *p = __builtin_fma_f ## SIZE (*q, *r, *s);				\
+}									\
+									\
+void									\
+do_fms_ ## TYPE_STR ## _f ## SIZE (TYPE *p,				\
+				   TYPE *q,				\
+				   TYPE *r,				\
+				   TYPE *s)				\
+{									\
+  TYPE neg_s = __builtin_neg_f ## SIZE (*s);				\
+  *p = __builtin_fma_f ## SIZE (*q, *r, neg_s);				\
+}									\
+									\
+void									\
+do_nfma_ ## TYPE_STR ## _f ## SIZE (TYPE *p,				\
+				    TYPE *q,				\
+				    TYPE *r,				\
+				    TYPE *s)				\
+{									\
+  TYPE f = __builtin_fma_f ## SIZE (*q, *r, *s);			\
+  *p = __builtin_neg_f ## SIZE (f);					\
+}									\
+									\
+void									\
+do_nfms_ ## TYPE_STR ## _f ## SIZE (TYPE *p,				\
+				    TYPE *q,				\
+				    TYPE *r,				\
+				    TYPE *s)				\
+{									\
+  TYPE neg_s = __builtin_neg_f ## SIZE (*s);				\
+  TYPE f = __builtin_fma_f ## SIZE (*q, *r, neg_s);			\
+  *p = __builtin_neg_f ## SIZE (f);					\
+}									\
+									\
+void									\
+do_reduce_ ## TYPE_STR ## _f ## SIZE (SCALAR *p, TYPE *q)		\
+{									\
+  *p = __builtin_reduce_f ## SIZE (*q);					\
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-28  6:11 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-28  6:11 [gcc(refs/users/meissner/heads/work127-vpair)] Rename insns; Add reduction; Add overloads Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).