public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work122-vpair)] Initial vector pair arithmetic support.
@ 2023-06-16 16:23 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-06-16 16:23 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d38035c76000da268ad5c9828c2b86651af83e36
commit d38035c76000da268ad5c9828c2b86651af83e36
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Jun 16 12:23:18 2023 -0400
Initial vector pair arithmetic support.
2023-06-16 Peter Bergner <bergner@linux.ibm.com>
Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/mma.md (UNSPEC_PAIR_ABS_F32): New unspec.
(UNSPEC_PAIR_ABS_F64): Likewise.
(UNSPEC_PAIR_ADD_F32): Likewise.
(UNSPEC_PAIR_ADD_F64): Likewise.
(UNSPEC_PAIR_FMA_F32): Likewise.
(UNSPEC_PAIR_FMA_F64): Likewise.
(UNSPEC_PAIR_MULT_F32): Likewise.
(UNSPEC_PAIR_MULT_F64): Likewise.
(UNSPEC_PAIR_SCALE_F32): Likewise.
(UNSPEC_PAIR_SCALE_F64): Likewise.
(UNSPEC_PAIR_SUB_F32): Likewise.
(UNSPEC_PAIR_SUB_F64): Likewise.
(UNSPEC_PAIR_1OPS): New iterator.
(UNSPEC_PAIR_2OPS): Likewise.
(UNSPEC_PAIR_3OPS): Likewise.
(UNSPEC_PAIR_SCALE): Likewise.
(pairop): Likewise.
(pairmode): Likewise.
(vpair_<pairop>_<pairmode>): New insns for unary, binary, and trinary
operations.
(vpair_<pairop>_<pairmode> splitter): New splitters.
(vpair_SCALE_<pairmode>): New insn.
(vpair_SCALE_<pairmode> splitter): New splitter.
* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Add
vector pair arithmetic support.
* config/rs6000/rs6000-builtins.def (__builtin_abs_f32_scalar): New
vector pair arithmetic built-in functions.
(__builtin_abs_f64_scalar): Likewise.
(__builtin_add_f32_scalar): Likewise.
(__builtin_add_f64_scalar): Likewise.
(__builtin_mult_f32_scalar): Likewise.
(__builtin_mult_f64_scalar): Likewise.
(__builtin_reduce_f32_scalar): Likewise.
(__builtin_reduce_f64_scalar): Likewise.
(__builtin_scale_f32_scalar): Likewise.
(__builtin_scale_f64_scalar): Likewise.
(__builtin_sub_f32_scalar): Likewise.
(__builtin_sub_f64_scalar): Likewise.
(__builtin_fma_f32_scalar): Likewise.
(__builtin_fma_f64_scalar): Likewise.
* config/rs6000/rs6000-call.cc (rs6000_return_in_memory): Allow
returning and passing vector pair in calls.
(rs6000_function_arg_advance_1): Likewise.
(rs6000_function_arg): Likewise.
* config/rs6000/rs6000-overload.def (__builtin_abs_f32): Add vector pair
arithmetic overload built-in functions.
(__builtin_abs_f64): Likewise.
(__builtin_add_f32): Likewise.
(__builtin_add_f64): Likewise.
(__builtin_mult_f32): Likewise.
(__builtin_mult_f64): Likewise.
(__builtin_reduce_f32): Likewise.
(__builtin_reduce_f64): Likewise.
(__builtin_scale_f32): Likewise.
(__builtin_scale_f64): Likewise.
(__builtin_sub_f32): Likewise.
(__builtin_sub_f64): Likewise.
(__builtin_fma_f32): Likewise.
(__builtin_fma_f64): Likewise.
Diff:
---
gcc/config/rs6000/mma.md | 161 ++++++++++++++++++++++++++++
gcc/config/rs6000/rs6000-builtin.cc | 194 ++++++++++++++++++++++++++++++++++
gcc/config/rs6000/rs6000-builtins.def | 106 +++++++++++++++++++
gcc/config/rs6000/rs6000-call.cc | 11 +-
gcc/config/rs6000/rs6000-overload.def | 116 ++++++++++++++++++++
5 files changed, 584 insertions(+), 4 deletions(-)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index d36dc13872b..c133c244611 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -91,6 +91,18 @@
UNSPEC_MMA_XVI8GER4SPP
UNSPEC_MMA_XXMFACC
UNSPEC_MMA_XXMTACC
+ UNSPEC_PAIR_ABS_F32
+ UNSPEC_PAIR_ABS_F64
+ UNSPEC_PAIR_ADD_F32
+ UNSPEC_PAIR_ADD_F64
+ UNSPEC_PAIR_FMA_F32
+ UNSPEC_PAIR_FMA_F64
+ UNSPEC_PAIR_MULT_F32
+ UNSPEC_PAIR_MULT_F64
+ UNSPEC_PAIR_SCALE_F32
+ UNSPEC_PAIR_SCALE_F64
+ UNSPEC_PAIR_SUB_F32
+ UNSPEC_PAIR_SUB_F64
])
(define_c_enum "unspecv"
@@ -263,6 +275,46 @@
(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp")
(UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")])
+(define_int_iterator UNSPEC_PAIR_1OPS [UNSPEC_PAIR_ABS_F32
+ UNSPEC_PAIR_ABS_F64])
+
+(define_int_iterator UNSPEC_PAIR_2OPS [UNSPEC_PAIR_ADD_F32
+ UNSPEC_PAIR_ADD_F64
+ UNSPEC_PAIR_MULT_F32
+ UNSPEC_PAIR_MULT_F64
+ UNSPEC_PAIR_SUB_F32
+ UNSPEC_PAIR_SUB_F64])
+
+(define_int_iterator UNSPEC_PAIR_3OPS [UNSPEC_PAIR_FMA_F32
+ UNSPEC_PAIR_FMA_F64])
+
+(define_int_iterator UNSPEC_PAIR_SCALE [UNSPEC_PAIR_SCALE_F32
+ UNSPEC_PAIR_SCALE_F64])
+
+(define_int_attr pairop [(UNSPEC_PAIR_ABS_F32 "ABS")
+ (UNSPEC_PAIR_ABS_F64 "ABS")
+ (UNSPEC_PAIR_ADD_F32 "PLUS")
+ (UNSPEC_PAIR_ADD_F64 "PLUS")
+ (UNSPEC_PAIR_FMA_F32 "FMA")
+ (UNSPEC_PAIR_FMA_F64 "FMA")
+ (UNSPEC_PAIR_MULT_F32 "MULT")
+ (UNSPEC_PAIR_MULT_F64 "MULT")
+ (UNSPEC_PAIR_SUB_F32 "MINUS")
+ (UNSPEC_PAIR_SUB_F64 "MINUS")])
+
+(define_int_attr pairmode [(UNSPEC_PAIR_ABS_F32 "V4SF")
+ (UNSPEC_PAIR_ABS_F64 "V2DF")
+ (UNSPEC_PAIR_ADD_F32 "V4SF")
+ (UNSPEC_PAIR_ADD_F64 "V2DF")
+ (UNSPEC_PAIR_FMA_F32 "V4SF")
+ (UNSPEC_PAIR_FMA_F64 "V2DF")
+ (UNSPEC_PAIR_MULT_F32 "V4SF")
+ (UNSPEC_PAIR_MULT_F64 "V2DF")
+ (UNSPEC_PAIR_SCALE_F32 "V4SF")
+ (UNSPEC_PAIR_SCALE_F64 "V2DF")
+ (UNSPEC_PAIR_SUB_F32 "V4SF")
+ (UNSPEC_PAIR_SUB_F64 "V2DF")])
+
;; Vector pair support. OOmode can only live in VSRs.
(define_expand "movoo"
@@ -690,3 +742,112 @@
"<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")])
+
+
+(define_insn "vpair_<pairop>_<pairmode>"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand" "wa")]
+ UNSPEC_PAIR_1OPS))]
+ "TARGET_MMA"
+ "#")
+
+(define_split
+ [(set (match_operand:OO 0 "vsx_register_operand")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand")]
+ UNSPEC_PAIR_1OPS))]
+ "TARGET_MMA && reload_completed"
+ [(const_int 0)]
+{
+ enum machine_mode mode = <pairmode>mode;
+ for (long i = 0; i < 2; i++)
+ {
+ rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+ rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+ emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1)));
+ }
+ DONE;
+})
+
+(define_insn "vpair_<pairop>_<pairmode>"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand" "wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa")]
+ UNSPEC_PAIR_2OPS))]
+ "TARGET_MMA"
+ "#")
+
+(define_split
+ [(set (match_operand:OO 0 "vsx_register_operand")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand")
+ (match_operand:OO 2 "vsx_register_operand")]
+ UNSPEC_PAIR_2OPS))]
+ "TARGET_MMA && reload_completed"
+ [(const_int 0)]
+{
+ enum machine_mode mode = <pairmode>mode;
+ for (long i = 0; i < 2; i++)
+ {
+ rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+ rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+ rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+ emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1, op2)));
+ }
+ DONE;
+})
+
+(define_insn "vpair_<pairop>_<pairmode>"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa,v")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand" "%wa,wa,v")
+ (match_operand:OO 2 "vsx_register_operand" "wa,0,v")
+ (match_operand:OO 3 "vsx_register_operand" "0,wa,v")]
+ UNSPEC_PAIR_3OPS))]
+ "TARGET_MMA"
+ "#")
+
+(define_split
+ [(set (match_operand:OO 0 "vsx_register_operand")
+ (unspec:OO [(match_operand:OO 1 "vsx_register_operand")
+ (match_operand:OO 2 "vsx_register_operand")
+ (match_operand:OO 3 "vsx_register_operand")]
+ UNSPEC_PAIR_3OPS))]
+ "TARGET_MMA && reload_completed"
+ [(const_int 0)]
+{
+ enum machine_mode mode = <pairmode>mode;
+ for (long i = 0; i < 2; i++)
+ {
+ rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+ rtx op1 = gen_rtx_REG (mode, reg_or_subregno (operands[1]) + i);
+ rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+ rtx op3 = gen_rtx_REG (mode, reg_or_subregno (operands[3]) + i);
+ emit_insn (gen_rtx_SET (op0, gen_rtx_<pairop> (mode, op1, op2, op3)));
+ }
+ DONE;
+})
+
+(define_insn "vpair_SCALE_<pairmode>"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=?wa")
+ (unspec:OO [(match_operand:<pairmode> 1 "vsx_register_operand" "wa")
+ (match_operand:OO 2 "vsx_register_operand" "wa")]
+ UNSPEC_PAIR_SCALE))]
+ "TARGET_MMA"
+ "#")
+
+(define_split
+ [(set (match_operand:OO 0 "vsx_register_operand")
+ (unspec:OO [(match_operand:<pairmode> 1 "vsx_register_operand")
+ (match_operand:OO 2 "vsx_register_operand")]
+ UNSPEC_PAIR_SCALE))]
+ "TARGET_MMA && reload_completed"
+ [(const_int 0)]
+{
+ enum machine_mode mode = <pairmode>mode;
+ rtx op1 = gen_rtx_REG (<pairmode>mode, reg_or_subregno (operands[1]));
+ for (long i = 0; i < 2; i++)
+ {
+ rtx op0 = gen_rtx_REG (mode, reg_or_subregno (operands[0]) + i);
+ rtx op2 = gen_rtx_REG (mode, reg_or_subregno (operands[2]) + i);
+ emit_insn (gen_rtx_SET (op0, gen_rtx_MULT (mode, op1, op2)));
+ }
+ DONE;
+})
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 534698e7d3e..4bac4387b68 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2229,6 +2229,200 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
return true;
}
+ case RS6000_BIF_ABS_F32_SCALAR:
+ case RS6000_BIF_ABS_F32_VECTOR:
+ case RS6000_BIF_ABS_F64_SCALAR:
+ case RS6000_BIF_ABS_F64_VECTOR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ tree t = build1 (ABS_EXPR, TREE_TYPE (lhs), arg0);
+ g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_ADD_F32_SCALAR:
+ case RS6000_BIF_ADD_F32_VECTOR:
+ case RS6000_BIF_ADD_F64_SCALAR:
+ case RS6000_BIF_ADD_F64_VECTOR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree t = build2 (PLUS_EXPR, TREE_TYPE (lhs), arg0, arg1);
+ g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_FMA_F32_SCALAR:
+ case RS6000_BIF_FMA_F32_VECTOR:
+ case RS6000_BIF_FMA_F64_SCALAR:
+ case RS6000_BIF_FMA_F64_VECTOR:
+ {
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree arg2 = gimple_call_arg (stmt, 2);
+ gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
+ gimple_call_set_lhs (g, lhs);
+ gimple_call_set_nothrow (g, true);
+ gimple_set_location (g, gimple_location (stmt));
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_MULT_F32_SCALAR:
+ case RS6000_BIF_MULT_F32_VECTOR:
+ case RS6000_BIF_MULT_F64_SCALAR:
+ case RS6000_BIF_MULT_F64_VECTOR:
+ case RS6000_BIF_SCALE_F32_SCALAR:
+ case RS6000_BIF_SCALE_F64_SCALAR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree t = build2 (MULT_EXPR, TREE_TYPE (lhs), arg0, arg1);
+ g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_REDUCE_F32_SCALAR:
+ case RS6000_BIF_REDUCE_F64_SCALAR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ g = gimple_build_assign (lhs, arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
+ case RS6000_BIF_REDUCE_F32_VECTOR:
+ {
+ gimple_seq new_seq = NULL;
+ push_gimplify_context (true);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ tree lhs_type = TREE_TYPE (lhs);
+ tree shift_decl = rs6000_builtin_decls[RS6000_BIF_VSLDOI_4SF];
+ tree shift4 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+ tree shift8 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+ tree sum4 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+ tree sum8 = create_tmp_reg_or_ssa_name (V4SF_type_node);
+ tree s4 = build_int_cstu (uint16_type_node, 4);
+ tree s8 = build_int_cstu (uint16_type_node, 8);
+ gimple *new_call = gimple_build_call (shift_decl, 3, arg0, arg0, s8);
+ gimple_call_set_lhs (new_call, shift8);
+ gimple_seq_add_stmt (&new_seq, new_call);
+ gimplify_assign (sum8, build2 (PLUS_EXPR, lhs_type, arg0, shift8), &new_seq);
+ new_call = gimple_build_call (shift_decl, 3, sum8, sum8, s4);
+ gimple_call_set_lhs (new_call, shift4);
+ gimple_seq_add_stmt (&new_seq, new_call);
+ gimplify_assign (sum4, build2 (PLUS_EXPR, lhs_type, sum8, shift4), &new_seq);
+ tree size = build_int_cst (bitsizetype, 32);
+ tree offset = build_int_cst (bitsizetype, 96);
+ gimplify_assign (lhs, build3 (BIT_FIELD_REF, lhs_type, sum4, size, offset), &new_seq);
+ pop_gimplify_context (NULL);
+ gsi_replace_with_seq (gsi, new_seq, true);
+ return true;
+ }
+
+ case RS6000_BIF_REDUCE_F64_VECTOR:
+ {
+ gimple_seq new_seq = NULL;
+ push_gimplify_context (true);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ tree lhs_type = TREE_TYPE (lhs);
+ tree shift_decl = rs6000_builtin_decls[RS6000_BIF_VSLDOI_2DF];
+ tree shift8 = create_tmp_reg_or_ssa_name (V2DF_type_node);
+ tree sum8 = create_tmp_reg_or_ssa_name (V2DF_type_node);
+ tree s8 = build_int_cstu (uint16_type_node, 8);
+ gimple *new_call = gimple_build_call (shift_decl, 3, arg0, arg0, s8);
+ gimple_call_set_lhs (new_call, shift8);
+ gimple_seq_add_stmt (&new_seq, new_call);
+ gimplify_assign (sum8, build2 (PLUS_EXPR, lhs_type, arg0, shift8), &new_seq);
+ tree size = build_int_cst (bitsizetype, 64);
+ tree offset = build_int_cst (bitsizetype, 64);
+ gimplify_assign (lhs, build3 (BIT_FIELD_REF, lhs_type, sum8, size, offset), &new_seq);
+ pop_gimplify_context (NULL);
+ gsi_replace_with_seq (gsi, new_seq, true);
+ return true;
+ }
+
+ case RS6000_BIF_SCALE_F32_VECTOR:
+ case RS6000_BIF_SCALE_F64_VECTOR:
+ {
+ gimple_seq new_seq = NULL;
+ push_gimplify_context (true);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree lhs_type = TREE_TYPE (lhs);
+ tree splat = create_tmp_reg_or_ssa_name (lhs_type);
+ gimplify_assign (splat, build_vector_from_val (lhs_type, arg0), &new_seq);
+ gimplify_assign (lhs, build2 (MULT_EXPR, TREE_TYPE (lhs), splat, arg1), &new_seq);
+ pop_gimplify_context (NULL);
+ gsi_replace_with_seq (gsi, new_seq, true);
+ return true;
+ }
+
+ case RS6000_BIF_SCALE_F32_VPAIR:
+ case RS6000_BIF_SCALE_F64_VPAIR:
+ {
+ gimple_seq new_seq = NULL;
+ push_gimplify_context (true);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree scale_type, new_decl;
+
+ if (fn_code == RS6000_BIF_SCALE_F32_VPAIR)
+ {
+ scale_type = V4SF_type_node;
+ new_decl = rs6000_builtin_decls[RS6000_BIF_SCALE_V4SF_VPAIR];
+ }
+ else
+ {
+ scale_type = V2DF_type_node;
+ new_decl = rs6000_builtin_decls[RS6000_BIF_SCALE_V2DF_VPAIR];
+ }
+ tree splat = create_tmp_reg_or_ssa_name (scale_type);
+ gimplify_assign (splat, build_vector_from_val (scale_type, arg0), &new_seq);
+ gimple *new_call = gimple_build_call (new_decl, 2, splat, arg1);
+ gimple_call_set_lhs (new_call, lhs);
+ gimple_seq_add_stmt (&new_seq, new_call);
+ pop_gimplify_context (NULL);
+ gsi_replace_with_seq (gsi, new_seq, true);
+ return true;
+ }
+
+ case RS6000_BIF_SUB_F32_SCALAR:
+ case RS6000_BIF_SUB_F32_VECTOR:
+ case RS6000_BIF_SUB_F64_SCALAR:
+ case RS6000_BIF_SUB_F64_VECTOR:
+ {
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_call_lhs (stmt);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ tree t = build2 (MINUS_EXPR, TREE_TYPE (lhs), arg0, arg1);
+ g = gimple_build_assign (lhs, t);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, true);
+ return true;
+ }
+
default:
if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 289a37998b1..ef846cff03d 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4107,3 +4107,109 @@
void __builtin_vsx_stxvp (v256, unsigned long, const v256 *);
STXVP nothing {mma,pair}
+
+; __vector_pair operations
+
+ float __builtin_abs_f32_scalar (float);
+ ABS_F32_SCALAR nothing {}
+ vf __builtin_abs_f32_vector (vf);
+ ABS_F32_VECTOR nothing {}
+ v256 __builtin_abs_f32_vpair (v256);
+ ABS_F32_VPAIR vpair_ABS_V4SF {mma,pair}
+
+ double __builtin_abs_f64_scalar (double);
+ ABS_F64_SCALAR nothing {}
+ vd __builtin_abs_f64_vector (vd);
+ ABS_F64_VECTOR nothing {}
+ v256 __builtin_abs_f64_vpair (v256);
+ ABS_F64_VPAIR vpair_ABS_V2DF {mma,pair}
+
+ float __builtin_add_f32_scalar (float, float);
+ ADD_F32_SCALAR nothing {}
+ vf __builtin_add_f32_vector (vf, vf);
+ ADD_F32_VECTOR nothing {}
+ v256 __builtin_add_f32_vpair (v256, v256);
+ ADD_F32_VPAIR vpair_PLUS_V4SF {mma,pair}
+
+ double __builtin_add_f64_scalar (double, double);
+ ADD_F64_SCALAR nothing {}
+ vd __builtin_add_f64_vector (vd, vd);
+ ADD_F64_VECTOR nothing {}
+ v256 __builtin_add_f64_vpair (v256, v256);
+ ADD_F64_VPAIR vpair_PLUS_V2DF {mma,pair}
+
+ float __builtin_mult_f32_scalar (float, float);
+ MULT_F32_SCALAR nothing {}
+ vf __builtin_mult_f32_vector (vf, vf);
+ MULT_F32_VECTOR nothing {}
+ v256 __builtin_mult_f32_vpair (v256, v256);
+ MULT_F32_VPAIR vpair_MULT_V4SF {mma,pair}
+
+ double __builtin_mult_f64_scalar (double, double);
+ MULT_F64_SCALAR nothing {}
+ vd __builtin_mult_f64_vector (vd, vd);
+ MULT_F64_VECTOR nothing {}
+ v256 __builtin_mult_f64_vpair (v256, v256);
+ MULT_F64_VPAIR vpair_MULT_V2DF {mma,pair}
+
+ float __builtin_reduce_f32_scalar (float);
+ REDUCE_F32_SCALAR nothing {}
+ float __builtin_reduce_f32_vector (vf);
+ REDUCE_F32_VECTOR nothing {}
+ float __builtin_reduce_f32_vpair (v256);
+ REDUCE_F32_VPAIR nothing {mma,pair}
+; REDUCE_F32_VPAIR vpair_REDUCE_V4SF {mma,pair}
+
+ double __builtin_reduce_f64_scalar (double);
+ REDUCE_F64_SCALAR nothing {}
+ double __builtin_reduce_f64_vector (vd);
+ REDUCE_F64_VECTOR nothing {}
+ double __builtin_reduce_f64_vpair (v256);
+ REDUCE_F64_VPAIR nothing {mma,pair}
+; REDUCE_F64_VPAIR vpair_REDUCE_V2DF {mma,pair}
+
+ float __builtin_scale_f32_scalar (float, float);
+ SCALE_F32_SCALAR nothing {}
+ vf __builtin_scale_f32_vector (float, vf);
+ SCALE_F32_VECTOR nothing {}
+ v256 __builtin_scale_f32_vpair (float, v256);
+ SCALE_F32_VPAIR nothing {mma,pair}
+ v256 __builtin_scale_v4sf_vpair (vf, v256);
+ SCALE_V4SF_VPAIR vpair_SCALE_V4SF {mma,pair}
+
+ double __builtin_scale_f64_scalar (double, double);
+ SCALE_F64_SCALAR nothing {}
+ vd __builtin_scale_f64_vector (double, vd);
+ SCALE_F64_VECTOR nothing {}
+ v256 __builtin_scale_f64_vpair (double, v256);
+ SCALE_F64_VPAIR nothing {mma,pair}
+ v256 __builtin_scale_v2df_vpair (vd, v256);
+ SCALE_V2DF_VPAIR vpair_SCALE_V2DF {mma,pair}
+
+ float __builtin_sub_f32_scalar (float, float);
+ SUB_F32_SCALAR nothing {}
+ vf __builtin_sub_f32_vector (vf, vf);
+ SUB_F32_VECTOR nothing {}
+ v256 __builtin_sub_f32_vpair (v256, v256);
+ SUB_F32_VPAIR vpair_MINUS_V4SF {mma,pair}
+
+ double __builtin_sub_f64_scalar (double, double);
+ SUB_F64_SCALAR nothing {}
+ vd __builtin_sub_f64_vector (vd, vd);
+ SUB_F64_VECTOR nothing {}
+ v256 __builtin_sub_f64_vpair (v256, v256);
+ SUB_F64_VPAIR vpair_MINUS_V2DF {mma,pair}
+
+ float __builtin_fma_f32_scalar (float, float, float);
+ FMA_F32_SCALAR nothing {}
+ vf __builtin_fma_f32_vector (vf, vf, vf);
+ FMA_F32_VECTOR nothing {}
+ v256 __builtin_fma_f32_vpair (v256, v256, v256);
+ FMA_F32_VPAIR vpair_FMA_V4SF {mma,pair}
+
+ double __builtin_fma_f64_scalar (double, double, double);
+ FMA_F64_SCALAR nothing {}
+ vd __builtin_fma_f64_vector (vd, vd, vd);
+ FMA_F64_VECTOR nothing {}
+ v256 __builtin_fma_f64_vpair (v256, v256, v256);
+ FMA_F64_VPAIR vpair_FMA_V2DF {mma,pair}
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 5384c10b986..4a929235d2d 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -432,12 +432,12 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
bool
rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
- /* We do not allow MMA types being used as return values. Only report
+ /* We do not allow MMA QUAD types being used as return values. Only report
the invalid return value usage the first time we encounter it. */
if (cfun
&& !cfun->machine->mma_return_type_error
&& TREE_TYPE (cfun->decl) == fntype
- && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode))
+ && TYPE_MODE (type) == XOmode)
{
/* Record we have now handled function CFUN, so the next time we
are called, we do not re-report the same error. */
@@ -1109,6 +1109,9 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
&& TREE_CODE (type) == COMPLEX_TYPE && elt_mode == KCmode)
cum->vregno++;
+ if (type && TREE_CODE (type) == OPAQUE_TYPE && elt_mode == OOmode)
+ cum->vregno++;
+
if (!TARGET_ALTIVEC)
error ("cannot pass argument in vector register because"
" altivec instructions are disabled, use %qs"
@@ -1631,8 +1634,8 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
machine_mode elt_mode;
int n_elts;
- /* We do not allow MMA types being used as function arguments. */
- if (mode == OOmode || mode == XOmode)
+ /* We do not allow MMA QUAD types being used as function arguments. */
+ if (mode == XOmode)
{
if (TYPE_CANONICAL (type) != NULL_TREE)
type = TYPE_CANONICAL (type);
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index c582490c084..a2294bce66c 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6175,3 +6175,119 @@
VUPKLSW VUPKLSW_DEPR1
vbll __builtin_vec_vupklsw (vbi);
VUPKLSW VUPKLSW_DEPR2
+
+[ABS_F32, SKIP, __builtin_abs_f32]
+ float __builtin_abs_f32 (float);
+ ABS_F32_SCALAR
+ vf __builtin_abs_f32 (vf);
+ ABS_F32_VECTOR
+ v256 __builtin_abs_f32 (v256);
+ ABS_F32_VPAIR
+
+[ABS_F64, SKIP, __builtin_abs_f64]
+ double __builtin_abs_f64 (double);
+ ABS_F64_SCALAR
+ vd __builtin_abs_f64 (vd);
+ ABS_F64_VECTOR
+ v256 __builtin_abs_f64 (v256);
+ ABS_F64_VPAIR
+
+[ADD_F32, SKIP, __builtin_add_f32]
+ float __builtin_add_f32 (float, float);
+ ADD_F32_SCALAR
+ vf __builtin_add_f32 (vf, vf);
+ ADD_F32_VECTOR
+ v256 __builtin_add_f32 (v256, v256);
+ ADD_F32_VPAIR
+
+[ADD_F64, SKIP, __builtin_add_f64]
+ double __builtin_add_f64 (double, double);
+ ADD_F64_SCALAR
+ vd __builtin_add_f64 (vd, vd);
+ ADD_F64_VECTOR
+ v256 __builtin_add_f64 (v256, v256);
+ ADD_F64_VPAIR
+
+[MULT_F32, SKIP, __builtin_mult_f32]
+ float __builtin_mult_f32 (float, float);
+ MULT_F32_SCALAR
+ vf __builtin_mult_f32 (vf, vf);
+ MULT_F32_VECTOR
+ v256 __builtin_mult_f32 (v256, v256);
+ MULT_F32_VPAIR
+
+[MULT_F64, SKIP, __builtin_mult_f64]
+ double __builtin_mult_f64 (double, double);
+ MULT_F64_SCALAR
+ vd __builtin_mult_f64 (vd, vd);
+ MULT_F64_VECTOR
+ v256 __builtin_mult_f64 (v256, v256);
+ MULT_F64_VPAIR
+
+[REDUCE_F32, SKIP, __builtin_reduce_f32]
+ float __builtin_reduce_f32 (float);
+ REDUCE_F32_SCALAR
+ float __builtin_reduce_f32 (vf);
+ REDUCE_F32_VECTOR
+ float __builtin_reduce_f32 (v256);
+ REDUCE_F32_VPAIR
+
+[REDUCE_F64, SKIP, __builtin_reduce_f64]
+ double __builtin_reduce_f64 (double);
+ REDUCE_F64_SCALAR
+ double __builtin_reduce_f64 (vd);
+ REDUCE_F64_VECTOR
+ double __builtin_reduce_f64 (v256);
+ REDUCE_F64_VPAIR
+
+[SCALE_F32, SKIP, __builtin_scale_f32]
+ float __builtin_scale_f32 (float, float);
+ SCALE_F32_SCALAR
+ vf __builtin_scale_f32 (float, vf);
+ SCALE_F32_VECTOR
+ v256 __builtin_scale_f32 (float, v256);
+ SCALE_F32_VPAIR
+ v256 __builtin_scale_f32 (vf, v256);
+ SCALE_V4SF_VPAIR
+
+[SCALE_F64, SKIP, __builtin_scale_f64]
+ double __builtin_scale_f64 (double, double);
+ SCALE_F64_SCALAR
+ vd __builtin_scale_f64 (double, vd);
+ SCALE_F64_VECTOR
+ v256 __builtin_scale_f64 (double, v256);
+ SCALE_F64_VPAIR
+ v256 __builtin_scale_f64 (vd, v256);
+ SCALE_V2DF_VPAIR
+
+[SUB_F32, SKIP, __builtin_sub_f32]
+ float __builtin_sub_f32 (float, float);
+ SUB_F32_SCALAR
+ vf __builtin_sub_f32 (vf, vf);
+ SUB_F32_VECTOR
+ v256 __builtin_sub_f32 (v256, v256);
+ SUB_F32_VPAIR
+
+[SUB_F64, SKIP, __builtin_sub_f64]
+ double __builtin_sub_f64 (double, double);
+ SUB_F64_SCALAR
+ vd __builtin_sub_f64 (vd, vd);
+ SUB_F64_VECTOR
+ v256 __builtin_sub_f64 (v256, v256);
+ SUB_F64_VPAIR
+
+[FMA_F32, SKIP, __builtin_fma_f32]
+ float __builtin_fma_f32 (float, float, float);
+ FMA_F32_SCALAR
+ vf __builtin_fma_f32 (vf, vf, vf);
+ FMA_F32_VECTOR
+ v256 __builtin_fma_f32 (v256, v256, v256);
+ FMA_F32_VPAIR
+
+[FMA_F64, SKIP, __builtin_fma_f64]
+ double __builtin_fma_f64 (double, double, double);
+ FMA_F64_SCALAR
+ vd __builtin_fma_f64 (vd, vd, vd);
+ FMA_F64_VECTOR
+ v256 __builtin_fma_f64 (v256, v256, v256);
+ FMA_F64_VPAIR
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-06-16 16:23 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-16 16:23 [gcc(refs/users/meissner/heads/work122-vpair)] Initial vector pair arithmetic support Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).