[gcc(refs/users/meissner/heads/work122-vpair)] Initial vector pair arithmetic support.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work122-vpair)] Initial vector pair arithmetic support.
@ 2023-06-16 16:23 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-06-16 16:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d38035c76000da268ad5c9828c2b86651af83e36

commit d38035c76000da268ad5c9828c2b86651af83e36
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Jun 16 12:23:18 2023 -0400

    Initial vector pair arithmetic support.
    
    2023-06-16   Peter Bergner  <bergner@linux.ibm.com>
                 Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/mma.md (UNSPEC_PAIR_ABS_F32): New unspec.
            (UNSPEC_PAIR_ABS_F64): Likewise.
            (UNSPEC_PAIR_ADD_F32): Likewise.
            (UNSPEC_PAIR_ADD_F64): Likewise.
            (UNSPEC_PAIR_FMA_F32): Likewise.
            (UNSPEC_PAIR_FMA_F64): Likewise.
            (UNSPEC_PAIR_MULT_F32): Likewise.
            (UNSPEC_PAIR_MULT_F64): Likewise.
            (UNSPEC_PAIR_SCALE_F32): Likewise.
            (UNSPEC_PAIR_SCALE_F64): Likewise.
            (UNSPEC_PAIR_SUB_F32): Likewise.
            (UNSPEC_PAIR_SUB_F64): Likewise.
            (UNSPEC_PAIR_1OPS): New iterator.
            (UNSPEC_PAIR_2OPS): Likewise.
            (UNSPEC_PAIR_3OPS): Likewise.
            (UNSPEC_PAIR_SCALE): Likewise.
            (pairop): Likewise.
            (pairmode): Likewise.
            (vpair_<pairop>_<pairmode>): New insns for unary, binary, and trinary
            operations.
            (vpair_<pairop>_<pairmode> splitter): New splitters.
            (vpair_SCALE_<pairmode>): New insn.
            (vpair_SCALE_<pairmode> splitter): New splitter.
            * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Add
            vector pair arithmetic support.
            * config/rs6000/rs6000-builtins.def (__builtin_abs_f32_scalar): New
            vector pair arithmetic built-in functions.
            (__builtin_abs_f64_scalar): Likewise.
            (__builtin_add_f32_scalar): Likewise.
            (__builtin_add_f64_scalar): Likewise.
            (__builtin_mult_f32_scalar): Likewise.
            (__builtin_mult_f64_scalar): Likewise.
            (__builtin_reduce_f32_scalar): Likewise.
            (__builtin_reduce_f64_scalar): Likewise.
            (__builtin_scale_f32_scalar): Likewise.
            (__builtin_scale_f64_scalar): Likewise.
            (__builtin_sub_f32_scalar): Likewise.
            (__builtin_sub_f64_scalar): Likewise.
            (__builtin_fma_f32_scalar): Likewise.
            (__builtin_fma_f64_scalar): Likewise.
            * config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Allow
            returning and passing vector pair in calls.
            (rs6000_function_arg_advance_1): Likewise.
            (rs6000_function_arg): Likewise.
            * config/rs6000/rs6000-overload.def (__builtin_abs_f32): Add vector pair
            arithmetic overload built-in functions.
            (__builtin_abs_f64): Likewise.
            (__builtin_add_f32): Likewise.
            (__builtin_add_f64): Likewise.
            (__builtin_mult_f32): Likewise.
            (__builtin_mult_f64): Likewise.
            (__builtin_reduce_f32): Likewise.
            (__builtin_reduce_f64): Likewise.
            (__builtin_scale_f32): Likewise.
            (__builtin_scale_f64): Likewise.
            (__builtin_sub_f32): Likewise.
            (__builtin_sub_f64): Likewise.
            (__builtin_fma_f32): Likewise.
            (__builtin_fma_f64): Likewise.

Diff:
---
 gcc/config/rs6000/mma.md              | 161 ++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-builtin.cc   | 194 ++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-builtins.def | 106 +++++++++++++++++++
 gcc/config/rs6000/rs6000-call.cc      |  11 +-
 gcc/config/rs6000/rs6000-overload.def | 116 ++++++++++++++++++++
 5 files changed, 584 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index d36dc13872b..c133c244611 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,18 @@
    UNSPEC_MMA_XVI8GER4SPP
    UNSPEC_MMA_XXMFACC
    UNSPEC_MMA_XXMTACC
+   UNSPEC_PAIR_ABS_F32
+   UNSPEC_PAIR_ABS_F64
+   UNSPEC_PAIR_ADD_F32
+   UNSPEC_PAIR_ADD_F64
+   UNSPEC_PAIR_FMA_F32
+   UNSPEC_PAIR_FMA_F64
+   UNSPEC_PAIR_MULT_F32
+   UNSPEC_PAIR_MULT_F64
+   UNSPEC_PAIR_SCALE_F32
+   UNSPEC_PAIR_SCALE_F64
+   UNSPEC_PAIR_SUB_F32
+   UNSPEC_PAIR_SUB_F64
   ])
 
 (define_c_enum "unspecv"
@@ -263,6 +275,46 @@
 (define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
+(define_int_iterator UNSPEC_PAIR_1OPS	[UNSPEC_PAIR_ABS_F32
+					 UNSPEC_PAIR_ABS_F64])
+
+(define_int_iterator UNSPEC_PAIR_2OPS	[UNSPEC_PAIR_ADD_F32
+					 UNSPEC_PAIR_ADD_F64
+					 UNSPEC_PAIR_MULT_F32
+					 UNSPEC_PAIR_MULT_F64
+					 UNSPEC_PAIR_SUB_F32
+					 UNSPEC_PAIR_SUB_F64])
+
+(define_int_iterator UNSPEC_PAIR_3OPS	[UNSPEC_PAIR_FMA_F32
+					 UNSPEC_PAIR_FMA_F64])
+
+(define_int_iterator UNSPEC_PAIR_SCALE	[UNSPEC_PAIR_SCALE_F32
+					 UNSPEC_PAIR_SCALE_F64])
+
+(define_int_attr pairop		[(UNSPEC_PAIR_ABS_F32		"ABS")
+				 (UNSPEC_PAIR_ABS_F64		"ABS")
+				 (UNSPEC_PAIR_ADD_F32		"PLUS")
+				 (UNSPEC_PAIR_ADD_F64		"PLUS")
+				 (UNSPEC_PAIR_FMA_F32		"FMA")
+				 (UNSPEC_PAIR_FMA_F64		"FMA")
+				 (UNSPEC_PAIR_MULT_F32		"MULT")
+				 (UNSPEC_PAIR_MULT_F64		"MULT")
+				 (UNSPEC_PAIR_SUB_F32		"MINUS")
+				 (UNSPEC_PAIR_SUB_F64		"MINUS")])
+
+(define_int_attr pairmode	[(UNSPEC_PAIR_ABS_F32           "V4SF")
+				 (UNSPEC_PAIR_ABS_F64		"V2DF")
+				 (UNSPEC_PAIR_ADD_F32		"V4SF")
+				 (UNSPEC_PAIR_ADD_F64		"V2DF")
+				 (UNSPEC_PAIR_FMA_F32		"V4SF")
+				 (UNSPEC_PAIR_FMA_F64		"V2DF")
+				 (UNSPEC_PAIR_MULT_F32		"V4SF")
+				 (UNSPEC_PAIR_MULT_F64		"V2DF")
+				 (UNSPEC_PAIR_SCALE_F32		"V4SF")
+				 (UNSPEC_PAIR_SCALE_F64		"V2DF")
+				 (UNSPEC_PAIR_SUB_F32		"V4SF")
+				 (UNSPEC_PAIR_SUB_F64		"V2DF")])
+
 
 ;; Vector pair support.  OOmode can only live in VSRs.
 (define_expand "movoo"
@@ -690,3 +742,112 @@
   "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")])
+
+
+(define_insn "vpair_<pairop>_<pairmode>"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand" "wa")]
+		   UNSPEC_PAIR_1OPS))]
+  "TARGET_MMA"
+  "#")
+
+(define_split
+  [(set (match_operand:OO 0 "vsx_register_operand")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand")]
+		   UNSPEC_PAIR_1OPS))]
+  "TARGET_MMA && reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = <pairmode>mode;
+  for (long i = 0; i < 2; i++)
+    {
+      rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+      rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+      emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1)));
+    }
+  DONE;
+})
+
+(define_insn "vpair_<pairop>_<pairmode>"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand" "wa")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")]
+		   UNSPEC_PAIR_2OPS))]
+  "TARGET_MMA"
+  "#")
+
+(define_split
+  [(set (match_operand:OO 0 "vsx_register_operand")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand")
+		    (match_operand:OO 2 "vsx_register_operand")]
+		   UNSPEC_PAIR_2OPS))]
+  "TARGET_MMA && reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = <pairmode>mode;
+  for (long i = 0; i < 2; i++)
+    {
+      rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+      rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+      rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+      emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1, op2)));
+    }
+  DONE;
+})
+
+(define_insn "vpair_<pairop>_<pairmode>"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa,v")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand" "%wa,wa,v")
+		    (match_operand:OO 2 "vsx_register_operand" "wa,0,v")
+		    (match_operand:OO 3 "vsx_register_operand" "0,wa,v")]
+		   UNSPEC_PAIR_3OPS))]
+  "TARGET_MMA"
+  "#")
+
+(define_split
+  [(set (match_operand:OO 0 "vsx_register_operand")
+	(unspec:OO [(match_operand:OO 1 "vsx_register_operand")
+		    (match_operand:OO 2 "vsx_register_operand")
+		    (match_operand:OO 3 "vsx_register_operand")]
+		   UNSPEC_PAIR_3OPS))]
+  "TARGET_MMA && reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = <pairmode>mode;
+  for (long i = 0; i < 2; i++)
+    {
+      rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+      rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+      rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+      rtx op3 = gen_rtx_REG (mode, reg_or_subregno (operands[3]) + i);
+      emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1, op2, op3)));
+    }
+  DONE;
+})
+
+(define_insn "vpair_SCALE_<pairmode>"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+	(unspec:OO [(match_operand:<pairmode> 1 "vsx_register_operand" "wa")
+		    (match_operand:OO 2 "vsx_register_operand" "wa")]
+		   UNSPEC_PAIR_SCALE))]
+  "TARGET_MMA"
+  "#")
+
+(define_split
+  [(set (match_operand:OO 0 "vsx_register_operand")
+	(unspec:OO [(match_operand:<pairmode> 1 "vsx_register_operand")
+		    (match_operand:OO 2 "vsx_register_operand")]
+		   UNSPEC_PAIR_SCALE))]
+  "TARGET_MMA && reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode mode = <pairmode>mode;
+  rtx op1 = gen_rtx_REG (<pairmode>mode, reg_or_subregno (operands[1]));
+  for (long i = 0; i < 2; i++)
+    {
+      rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+      rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+      emit_insn (gen_rtx_SET (op0, gen_rtx_MULT (mode, op1, op2)));
+    }
+  DONE;
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 534698e7d3e..4bac4387b68 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2229,6 +2229,200 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	return true;
       }
 
+    case RS6000_BIF_ABS_F32_SCALAR:
+    case RS6000_BIF_ABS_F32_VECTOR:
+    case RS6000_BIF_ABS_F64_SCALAR:
+    case RS6000_BIF_ABS_F64_VECTOR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	tree t = build1 (ABS_EXPR, TREE_TYPE (lhs), arg0);
+	g = gimple_build_assign (lhs, t);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_ADD_F32_SCALAR:
+    case RS6000_BIF_ADD_F32_VECTOR:
+    case RS6000_BIF_ADD_F64_SCALAR:
+    case RS6000_BIF_ADD_F64_VECTOR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree t = build2 (PLUS_EXPR, TREE_TYPE (lhs), arg0, arg1);
+	g = gimple_build_assign (lhs, t);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_FMA_F32_SCALAR:
+    case RS6000_BIF_FMA_F32_VECTOR:
+    case RS6000_BIF_FMA_F64_SCALAR:
+    case RS6000_BIF_FMA_F64_VECTOR:
+      {
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree arg2 = gimple_call_arg (stmt, 2);
+	gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
+	gimple_call_set_lhs (g, lhs);
+	gimple_call_set_nothrow (g, true);
+	gimple_set_location (g, gimple_location (stmt));
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_MULT_F32_SCALAR:
+    case RS6000_BIF_MULT_F32_VECTOR:
+    case RS6000_BIF_MULT_F64_SCALAR:
+    case RS6000_BIF_MULT_F64_VECTOR:
+    case RS6000_BIF_SCALE_F32_SCALAR:
+    case RS6000_BIF_SCALE_F64_SCALAR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree t = build2 (MULT_EXPR, TREE_TYPE (lhs), arg0, arg1);
+	g = gimple_build_assign (lhs, t);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_REDUCE_F32_SCALAR:
+    case RS6000_BIF_REDUCE_F64_SCALAR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	g = gimple_build_assign (lhs, arg0);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
+    case RS6000_BIF_REDUCE_F32_VECTOR:
+      {
+	gimple_seq new_seq = NULL;
+	push_gimplify_context (true);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	tree lhs_type = TREE_TYPE (lhs);
+	tree shift_decl = rs6000_builtin_decls[RS6000_BIF_VSLDOI_4SF];
+	tree shift4 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+	tree shift8 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+	tree sum4 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+	tree sum8 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+	tree s4 = build_int_cstu (uint16_type_node, 4);
+	tree s8 = build_int_cstu (uint16_type_node, 8);
+	gimple *new_call = gimple_build_call (shift_decl, 3, arg0, arg0, s8);
+	gimple_call_set_lhs (new_call, shift8);
+	gimple_seq_add_stmt (&new_seq, new_call);
+	gimplify_assign (sum8, build2 (PLUS_EXPR, lhs_type, arg0, shift8), &new_seq);
+	new_call = gimple_build_call (shift_decl, 3, sum8, sum8, s4);
+	gimple_call_set_lhs (new_call, shift4);
+	gimple_seq_add_stmt (&new_seq, new_call);
+	gimplify_assign (sum4, build2 (PLUS_EXPR, lhs_type, sum8, shift4), &new_seq);
+	tree size = build_int_cst (bitsizetype, 32);
+	tree offset = build_int_cst (bitsizetype, 96);
+	gimplify_assign (lhs, build3 (BIT_FIELD_REF, lhs_type, sum4, size, offset), &new_seq);
+	pop_gimplify_context (NULL);
+	gsi_replace_with_seq (gsi, new_seq, true);
+	return true;
+      }
+
+    case RS6000_BIF_REDUCE_F64_VECTOR:
+      {
+	gimple_seq new_seq = NULL;
+	push_gimplify_context (true);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	tree lhs_type = TREE_TYPE (lhs);
+	tree shift_decl = rs6000_builtin_decls[RS6000_BIF_VSLDOI_2DF];
+	tree shift8 = create_tmp_reg_or_ssa_name (V2DF_type_node);
+	tree sum8 = create_tmp_reg_or_ssa_name (V2DF_type_node);
+	tree s8 = build_int_cstu (uint16_type_node, 8);
+	gimple *new_call = gimple_build_call (shift_decl, 3, arg0, arg0, s8);
+	gimple_call_set_lhs (new_call, shift8);
+	gimple_seq_add_stmt (&new_seq, new_call);
+	gimplify_assign (sum8, build2 (PLUS_EXPR, lhs_type, arg0, shift8), &new_seq);
+	tree size = build_int_cst (bitsizetype, 64);
+	tree offset = build_int_cst (bitsizetype, 64);
+	gimplify_assign (lhs, build3 (BIT_FIELD_REF, lhs_type, sum8, size, offset), &new_seq);
+	pop_gimplify_context (NULL);
+	gsi_replace_with_seq (gsi, new_seq, true);
+	return true;
+      }
+
+    case RS6000_BIF_SCALE_F32_VECTOR:
+    case RS6000_BIF_SCALE_F64_VECTOR:
+      {
+	gimple_seq new_seq = NULL;
+	push_gimplify_context (true);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree lhs_type = TREE_TYPE (lhs);
+	tree splat = create_tmp_reg_or_ssa_name (lhs_type);
+	gimplify_assign (splat, build_vector_from_val (lhs_type, arg0), &new_seq);
+	gimplify_assign (lhs, build2 (MULT_EXPR, TREE_TYPE (lhs), splat, arg1), &new_seq);
+	pop_gimplify_context (NULL);
+	gsi_replace_with_seq (gsi, new_seq, true);
+	return true;
+      }
+
+    case RS6000_BIF_SCALE_F32_VPAIR:
+    case RS6000_BIF_SCALE_F64_VPAIR:
+      {
+	gimple_seq new_seq = NULL;
+	push_gimplify_context (true);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree scale_type, new_decl;
+
+	if (fn_code == RS6000_BIF_SCALE_F32_VPAIR)
+	  {
+	    scale_type = V4SF_type_node;
+	    new_decl = rs6000_builtin_decls[RS6000_BIF_SCALE_V4SF_VPAIR];
+	  }
+	else
+	  {
+	    scale_type = V2DF_type_node;
+	    new_decl = rs6000_builtin_decls[RS6000_BIF_SCALE_V2DF_VPAIR];
+	  }
+	tree splat = create_tmp_reg_or_ssa_name (scale_type);
+	gimplify_assign (splat, build_vector_from_val (scale_type, arg0), &new_seq);
+	gimple *new_call = gimple_build_call (new_decl, 2, splat, arg1);
+	gimple_call_set_lhs (new_call, lhs);
+	gimple_seq_add_stmt (&new_seq, new_call);
+	pop_gimplify_context (NULL);
+	gsi_replace_with_seq (gsi, new_seq, true);
+	return true;
+      }
+
+    case RS6000_BIF_SUB_F32_SCALAR:
+    case RS6000_BIF_SUB_F32_VECTOR:
+    case RS6000_BIF_SUB_F64_SCALAR:
+    case RS6000_BIF_SUB_F64_VECTOR:
+      {
+	location_t loc = gimple_location (stmt);
+	lhs = gimple_call_lhs (stmt);
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree t = build2 (MINUS_EXPR, TREE_TYPE (lhs), arg0, arg1);
+	g = gimple_build_assign (lhs, t);
+	gimple_set_location (g, loc);
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
     default:
       if (TARGET_DEBUG_BUILTIN)
 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 289a37998b1..ef846cff03d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4107,3 +4107,109 @@
 
   void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
     STXVP nothing {mma,pair}
+
+; __vector_pair operations
+
+  float __builtin_abs_f32_scalar (float);
+    ABS_F32_SCALAR nothing {}
+  vf __builtin_abs_f32_vector (vf);
+    ABS_F32_VECTOR nothing {}
+  v256 __builtin_abs_f32_vpair (v256);
+    ABS_F32_VPAIR vpair_ABS_V4SF {mma,pair}
+
+  double __builtin_abs_f64_scalar (double);
+    ABS_F64_SCALAR nothing {}
+  vd __builtin_abs_f64_vector (vd);
+    ABS_F64_VECTOR nothing {}
+  v256 __builtin_abs_f64_vpair (v256);
+    ABS_F64_VPAIR vpair_ABS_V2DF {mma,pair}
+
+  float __builtin_add_f32_scalar (float, float);
+    ADD_F32_SCALAR nothing {}
+  vf __builtin_add_f32_vector (vf, vf);
+    ADD_F32_VECTOR nothing {}
+  v256 __builtin_add_f32_vpair (v256, v256);
+    ADD_F32_VPAIR vpair_PLUS_V4SF {mma,pair}
+
+  double __builtin_add_f64_scalar (double, double);
+    ADD_F64_SCALAR nothing {}
+  vd __builtin_add_f64_vector (vd, vd);
+    ADD_F64_VECTOR nothing {}
+  v256 __builtin_add_f64_vpair (v256, v256);
+    ADD_F64_VPAIR vpair_PLUS_V2DF {mma,pair}
+
+  float __builtin_mult_f32_scalar (float, float);
+    MULT_F32_SCALAR nothing {}
+  vf __builtin_mult_f32_vector (vf, vf);
+    MULT_F32_VECTOR nothing {}
+  v256 __builtin_mult_f32_vpair (v256, v256);
+    MULT_F32_VPAIR vpair_MULT_V4SF {mma,pair}
+
+  double __builtin_mult_f64_scalar (double, double);
+    MULT_F64_SCALAR nothing {}
+  vd __builtin_mult_f64_vector (vd, vd);
+    MULT_F64_VECTOR nothing {}
+  v256 __builtin_mult_f64_vpair (v256, v256);
+    MULT_F64_VPAIR vpair_MULT_V2DF {mma,pair}
+
+  float __builtin_reduce_f32_scalar (float);
+    REDUCE_F32_SCALAR nothing {}
+  float __builtin_reduce_f32_vector (vf);
+    REDUCE_F32_VECTOR nothing {}
+  float __builtin_reduce_f32_vpair (v256);
+    REDUCE_F32_VPAIR nothing {mma,pair}
+;   REDUCE_F32_VPAIR vpair_REDUCE_V4SF {mma,pair}
+
+  double __builtin_reduce_f64_scalar (double);
+    REDUCE_F64_SCALAR nothing {}
+  double __builtin_reduce_f64_vector (vd);
+    REDUCE_F64_VECTOR nothing {}
+  double __builtin_reduce_f64_vpair (v256);
+    REDUCE_F64_VPAIR nothing {mma,pair}
+;   REDUCE_F64_VPAIR vpair_REDUCE_V2DF {mma,pair}
+
+  float __builtin_scale_f32_scalar (float, float);
+    SCALE_F32_SCALAR nothing {}
+  vf __builtin_scale_f32_vector (float, vf);
+    SCALE_F32_VECTOR nothing {}
+  v256 __builtin_scale_f32_vpair (float, v256);
+    SCALE_F32_VPAIR nothing {mma,pair}
+  v256 __builtin_scale_v4sf_vpair (vf, v256);
+    SCALE_V4SF_VPAIR vpair_SCALE_V4SF {mma,pair}
+
+  double __builtin_scale_f64_scalar (double, double);
+    SCALE_F64_SCALAR nothing {}
+  vd __builtin_scale_f64_vector (double, vd);
+    SCALE_F64_VECTOR nothing {}
+  v256 __builtin_scale_f64_vpair (double, v256);
+    SCALE_F64_VPAIR nothing {mma,pair}
+  v256 __builtin_scale_v2df_vpair (vd, v256);
+    SCALE_V2DF_VPAIR vpair_SCALE_V2DF {mma,pair}
+
+  float __builtin_sub_f32_scalar (float, float);
+    SUB_F32_SCALAR nothing {}
+  vf __builtin_sub_f32_vector (vf, vf);
+    SUB_F32_VECTOR nothing {}
+  v256 __builtin_sub_f32_vpair (v256, v256);
+    SUB_F32_VPAIR vpair_MINUS_V4SF {mma,pair}
+
+  double __builtin_sub_f64_scalar (double, double);
+    SUB_F64_SCALAR nothing {}
+  vd __builtin_sub_f64_vector (vd, vd);
+    SUB_F64_VECTOR nothing {}
+  v256 __builtin_sub_f64_vpair (v256, v256);
+    SUB_F64_VPAIR vpair_MINUS_V2DF {mma,pair}
+
+  float __builtin_fma_f32_scalar (float, float, float);
+    FMA_F32_SCALAR nothing {}
+  vf __builtin_fma_f32_vector (vf, vf, vf);
+    FMA_F32_VECTOR nothing {}
+  v256 __builtin_fma_f32_vpair (v256, v256, v256);
+    FMA_F32_VPAIR vpair_FMA_V4SF {mma,pair}
+
+  double __builtin_fma_f64_scalar (double, double, double);
+    FMA_F64_SCALAR nothing {}
+  vd __builtin_fma_f64_vector (vd, vd, vd);
+    FMA_F64_VECTOR nothing {}
+  v256 __builtin_fma_f64_vpair (v256, v256, v256);
+    FMA_F64_VPAIR vpair_FMA_V2DF {mma,pair}
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 5384c10b986..4a929235d2d 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -432,12 +432,12 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 bool
 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 {
-  /* We do not allow MMA types being used as return values.  Only report
+  /* We do not allow MMA QUAD types being used as return values.  Only report
      the invalid return value usage the first time we encounter it.  */
   if (cfun
       && !cfun->machine->mma_return_type_error
       && TREE_TYPE (cfun->decl) == fntype
-      && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+      && TYPE_MODE (type) == XOmode)
     {
       /* Record we have now handled function CFUN, so the next time we
 	 are called, we do not re-report the same error.  */
@@ -1109,6 +1109,9 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
 	      && TREE_CODE (type) == COMPLEX_TYPE && elt_mode == KCmode)
 	    cum->vregno++;
 
+	  if (type && TREE_CODE (type) == OPAQUE_TYPE && elt_mode == OOmode)
+	    cum->vregno++;
+
 	  if (!TARGET_ALTIVEC)
 	    error ("cannot pass argument in vector register because"
 		   " altivec instructions are disabled, use %qs"
@@ -1631,8 +1634,8 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   machine_mode elt_mode;
   int n_elts;
 
-  /* We do not allow MMA types being used as function arguments.  */
-  if (mode == OOmode || mode == XOmode)
+  /* We do not allow MMA QUAD types being used as function arguments.  */
+  if (mode == XOmode)
     {
       if (TYPE_CANONICAL (type) != NULL_TREE)
 	type = TYPE_CANONICAL (type);
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..a2294bce66c 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6175,3 +6175,119 @@
     VUPKLSW  VUPKLSW_DEPR1
   vbll __builtin_vec_vupklsw (vbi);
     VUPKLSW  VUPKLSW_DEPR2
+
+[ABS_F32, SKIP, __builtin_abs_f32]
+  float __builtin_abs_f32 (float);
+    ABS_F32_SCALAR
+  vf __builtin_abs_f32 (vf);
+    ABS_F32_VECTOR
+  v256 __builtin_abs_f32 (v256);
+    ABS_F32_VPAIR
+
+[ABS_F64, SKIP, __builtin_abs_f64]
+  double __builtin_abs_f64 (double);
+    ABS_F64_SCALAR
+  vd __builtin_abs_f64 (vd);
+    ABS_F64_VECTOR
+  v256 __builtin_abs_f64 (v256);
+    ABS_F64_VPAIR
+
+[ADD_F32, SKIP, __builtin_add_f32]
+  float __builtin_add_f32 (float, float);
+    ADD_F32_SCALAR
+  vf __builtin_add_f32 (vf, vf);
+    ADD_F32_VECTOR
+  v256 __builtin_add_f32 (v256, v256);
+    ADD_F32_VPAIR
+
+[ADD_F64, SKIP, __builtin_add_f64]
+  double __builtin_add_f64 (double, double);
+    ADD_F64_SCALAR
+  vd __builtin_add_f64 (vd, vd);
+    ADD_F64_VECTOR
+  v256 __builtin_add_f64 (v256, v256);
+    ADD_F64_VPAIR
+
+[MULT_F32, SKIP, __builtin_mult_f32]
+  float __builtin_mult_f32 (float, float);
+    MULT_F32_SCALAR
+  vf __builtin_mult_f32 (vf, vf);
+    MULT_F32_VECTOR
+  v256 __builtin_mult_f32 (v256, v256);
+    MULT_F32_VPAIR
+
+[MULT_F64, SKIP, __builtin_mult_f64]
+  double __builtin_mult_f64 (double, double);
+    MULT_F64_SCALAR
+  vd __builtin_mult_f64 (vd, vd);
+    MULT_F64_VECTOR
+  v256 __builtin_mult_f64 (v256, v256);
+    MULT_F64_VPAIR
+
+[REDUCE_F32, SKIP, __builtin_reduce_f32]
+  float __builtin_reduce_f32 (float);
+    REDUCE_F32_SCALAR
+  float __builtin_reduce_f32 (vf);
+    REDUCE_F32_VECTOR
+  float __builtin_reduce_f32 (v256);
+    REDUCE_F32_VPAIR
+
+[REDUCE_F64, SKIP, __builtin_reduce_f64]
+  double __builtin_reduce_f64 (double);
+    REDUCE_F64_SCALAR
+  double __builtin_reduce_f64 (vd);
+    REDUCE_F64_VECTOR
+  double __builtin_reduce_f64 (v256);
+    REDUCE_F64_VPAIR
+
+[SCALE_F32, SKIP, __builtin_scale_f32]
+  float __builtin_scale_f32 (float, float);
+    SCALE_F32_SCALAR
+  vf __builtin_scale_f32 (float, vf);
+    SCALE_F32_VECTOR
+  v256 __builtin_scale_f32 (float, v256);
+    SCALE_F32_VPAIR
+  v256 __builtin_scale_f32 (vf, v256);
+    SCALE_V4SF_VPAIR
+
+[SCALE_F64, SKIP, __builtin_scale_f64]
+  double __builtin_scale_f64 (double, double);
+    SCALE_F64_SCALAR
+  vd __builtin_scale_f64 (double, vd);
+    SCALE_F64_VECTOR
+  v256 __builtin_scale_f64 (double, v256);
+    SCALE_F64_VPAIR
+  v256 __builtin_scale_f64 (vd, v256);
+    SCALE_V2DF_VPAIR
+
+[SUB_F32, SKIP, __builtin_sub_f32]
+  float __builtin_sub_f32 (float, float);
+    SUB_F32_SCALAR
+  vf __builtin_sub_f32 (vf, vf);
+    SUB_F32_VECTOR
+  v256 __builtin_sub_f32 (v256, v256);
+    SUB_F32_VPAIR
+
+[SUB_F64, SKIP, __builtin_sub_f64]
+  double __builtin_sub_f64 (double, double);
+    SUB_F64_SCALAR
+  vd __builtin_sub_f64 (vd, vd);
+    SUB_F64_VECTOR
+  v256 __builtin_sub_f64 (v256, v256);
+    SUB_F64_VPAIR
+
+[FMA_F32, SKIP, __builtin_fma_f32]
+  float __builtin_fma_f32 (float, float, float);
+    FMA_F32_SCALAR
+  vf __builtin_fma_f32 (vf, vf, vf);
+    FMA_F32_VECTOR
+  v256 __builtin_fma_f32 (v256, v256, v256);
+    FMA_F32_VPAIR
+
+[FMA_F64, SKIP, __builtin_fma_f64]
+  double __builtin_fma_f64 (double, double, double);
+    FMA_F64_SCALAR
+  vd __builtin_fma_f64 (vd, vd, vd);
+    FMA_F64_VECTOR
+  v256 __builtin_fma_f64 (v256, v256, v256);
+    FMA_F64_VPAIR

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-16 16:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-16 16:23 [gcc(refs/users/meissner/heads/work122-vpair)] Initial vector pair arithmetic support Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).