public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support.
@ 2023-08-02 3:39 Michael Meissner
0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-08-02 3:39 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:15f1121135323895ab8a4b3a91d90db60625c0fd
commit 15f1121135323895ab8a4b3a91d90db60625c0fd
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Aug 1 23:38:42 2023 -0400
Add initial int built-in overload support.
2023-08-01 Michael Meissner <meissner@linux.ibm.com>
gcc/
* config/rs6000/rs6000-builtin.cc (fold_builtin_overload_arith): Rename
from fold_builtin_overload_fp.
(rs6000_gimple_fold_builtin): Add support for integer overload
built-ins.
* config/rs6000/rs6000-builtins.def (__builtin_*_i32_*): Add built-in
integer functions for overloading.
(__builtin_u32_*): Likewise.
(__builtin_i64_*): Likewise.
(__builtin_u64_*): Likewise.
* config/rs6000/rs6000-gen-builtins.cc (MAXOVLDSTANZAS): Bump up to
1,024.
* config/rs6000/rs6000-overload.def (__builtin_*_i32): Add built-in
overloaded integer functions.
(__builtin_u32): Likewise.
(__builtin_i64): Likewise.
(__builtin_u64): Likewise.
* config/rs6000/vector-pair.md (VPAIR_FP_UNARY): Rename from
VPAIR_UNARY.
(VPAIR_FP_BINARY): Rename from VPAIR_BINARY.
(VPAIR_INT_BINARY): New code iterator.
(vpair_op): Update for integer built-in functions.
(vpair_ireg): New code attribute.
(vpair_ipred): Likewise.
(VPAIR_FP_WRAPPER): Rename from VPAIR_WRAPPER.
(VPAIR_INT_WRAPPER): New int iterator.
(VPAIR_VECTOR): New int attribute.
(vpair_type): New int attribute.
(vpair_neg_reg): New int attribute.
(floating point insns): Update to use VPAIR_FP_WRAPPER, VPAIR_FP_UNARY,
VPAIR_FP_BINARY.
(integer insns): Add new integer insns for built-in functions.
Diff:
---
gcc/config/rs6000/rs6000-builtin.cc | 110 +++++++++--
gcc/config/rs6000/rs6000-builtins.def | 257 ++++++++++++++++++++++++
gcc/config/rs6000/rs6000-gen-builtins.cc | 2 +-
gcc/config/rs6000/rs6000-overload.def | 265 +++++++++++++++++++++++++
gcc/config/rs6000/vector-pair.md | 323 +++++++++++++++++++++++--------
5 files changed, 864 insertions(+), 93 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index e32d9175a0c..62d56c28946 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1261,14 +1261,14 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
return true;
}
-/* Helper function to fold the overloaded fp functions for the scalar and
- vector types that support the operation directly. */
+/* Helper function to fold the overloaded arithmetic functions for the scalar
+ and vector types that support the operation directly. */
static void
-fold_builtin_overload_fp (gimple_stmt_iterator *gsi,
- gimple *stmt,
- enum tree_code code,
- int nargs)
+fold_builtin_overload_arith (gimple_stmt_iterator *gsi,
+ gimple *stmt,
+ enum tree_code code,
+ int nargs)
{
location_t loc = gimple_location (stmt);
tree lhs = gimple_call_lhs (stmt);
@@ -2280,28 +2280,88 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
case RS6000_BIF_ABS_F32_VECTOR:
case RS6000_BIF_ABS_F64_SCALAR:
case RS6000_BIF_ABS_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1);
+ fold_builtin_overload_arith (gsi, stmt, ABS_EXPR, 1);
return true;
+ case RS6000_BIF_ADD_I32_SCALAR:
+ case RS6000_BIF_ADD_I32_VECTOR:
+ case RS6000_BIF_ADD_I64_SCALAR:
+ case RS6000_BIF_ADD_I64_VECTOR:
case RS6000_BIF_ADD_F32_SCALAR:
case RS6000_BIF_ADD_F32_VECTOR:
case RS6000_BIF_ADD_F64_SCALAR:
case RS6000_BIF_ADD_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2);
+ case RS6000_BIF_ADD_U32_SCALAR:
+ case RS6000_BIF_ADD_U32_VECTOR:
+ case RS6000_BIF_ADD_U64_SCALAR:
+ case RS6000_BIF_ADD_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, PLUS_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_AND_I32_SCALAR:
+ case RS6000_BIF_AND_I32_VECTOR:
+ case RS6000_BIF_AND_I64_SCALAR:
+ case RS6000_BIF_AND_I64_VECTOR:
+ case RS6000_BIF_AND_U32_SCALAR:
+ case RS6000_BIF_AND_U32_VECTOR:
+ case RS6000_BIF_AND_U64_SCALAR:
+ case RS6000_BIF_AND_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, BIT_AND_EXPR, 2);
+ return true;
+
+ case RS6000_BIF_IOR_I32_SCALAR:
+ case RS6000_BIF_IOR_I32_VECTOR:
+ case RS6000_BIF_IOR_I64_SCALAR:
+ case RS6000_BIF_IOR_I64_VECTOR:
+ case RS6000_BIF_IOR_U32_SCALAR:
+ case RS6000_BIF_IOR_U32_VECTOR:
+ case RS6000_BIF_IOR_U64_SCALAR:
+ case RS6000_BIF_IOR_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, BIT_IOR_EXPR, 2);
return true;
case RS6000_BIF_MULT_F32_SCALAR:
case RS6000_BIF_MULT_F32_VECTOR:
case RS6000_BIF_MULT_F64_SCALAR:
case RS6000_BIF_MULT_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2);
+ fold_builtin_overload_arith (gsi, stmt, MULT_EXPR, 2);
return true;
+ case RS6000_BIF_NEG_I32_SCALAR:
+ case RS6000_BIF_NEG_I32_VECTOR:
+ case RS6000_BIF_NEG_I64_SCALAR:
+ case RS6000_BIF_NEG_I64_VECTOR:
case RS6000_BIF_NEG_F32_SCALAR:
case RS6000_BIF_NEG_F32_VECTOR:
case RS6000_BIF_NEG_F64_SCALAR:
case RS6000_BIF_NEG_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1);
+ case RS6000_BIF_NEG_U32_SCALAR:
+ case RS6000_BIF_NEG_U32_VECTOR:
+ case RS6000_BIF_NEG_U64_SCALAR:
+ case RS6000_BIF_NEG_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, NEGATE_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_NOT_I32_SCALAR:
+ case RS6000_BIF_NOT_I32_VECTOR:
+ case RS6000_BIF_NOT_I64_SCALAR:
+ case RS6000_BIF_NOT_I64_VECTOR:
+ case RS6000_BIF_NOT_U32_SCALAR:
+ case RS6000_BIF_NOT_U32_VECTOR:
+ case RS6000_BIF_NOT_U64_SCALAR:
+ case RS6000_BIF_NOT_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, BIT_NOT_EXPR, 1);
+ return true;
+
+ case RS6000_BIF_XOR_I32_SCALAR:
+ case RS6000_BIF_XOR_I32_VECTOR:
+ case RS6000_BIF_XOR_I64_SCALAR:
+ case RS6000_BIF_XOR_I64_VECTOR:
+ case RS6000_BIF_XOR_U32_SCALAR:
+ case RS6000_BIF_XOR_U32_VECTOR:
+ case RS6000_BIF_XOR_U64_SCALAR:
+ case RS6000_BIF_XOR_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, BIT_XOR_EXPR, 2);
return true;
case RS6000_BIF_REDUCE_F32_SCALAR:
@@ -2316,26 +2376,50 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
return true;
}
+ case RS6000_BIF_SMAX_I32_SCALAR:
+ case RS6000_BIF_SMAX_I32_VECTOR:
+ case RS6000_BIF_SMAX_I64_SCALAR:
+ case RS6000_BIF_SMAX_I64_VECTOR:
case RS6000_BIF_SMAX_F32_SCALAR:
case RS6000_BIF_SMAX_F32_VECTOR:
case RS6000_BIF_SMAX_F64_SCALAR:
case RS6000_BIF_SMAX_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2);
+ case RS6000_BIF_UMAX_U32_SCALAR:
+ case RS6000_BIF_UMAX_U32_VECTOR:
+ case RS6000_BIF_UMAX_U64_SCALAR:
+ case RS6000_BIF_UMAX_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, MAX_EXPR, 2);
return true;
+ case RS6000_BIF_SMIN_I32_SCALAR:
+ case RS6000_BIF_SMIN_I32_VECTOR:
+ case RS6000_BIF_SMIN_I64_SCALAR:
+ case RS6000_BIF_SMIN_I64_VECTOR:
case RS6000_BIF_SMIN_F32_SCALAR:
case RS6000_BIF_SMIN_F32_VECTOR:
case RS6000_BIF_SMIN_F64_SCALAR:
case RS6000_BIF_SMIN_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2);
+ case RS6000_BIF_UMIN_U32_SCALAR:
+ case RS6000_BIF_UMIN_U32_VECTOR:
+ case RS6000_BIF_UMIN_U64_SCALAR:
+ case RS6000_BIF_UMIN_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, MIN_EXPR, 2);
return true;
+ case RS6000_BIF_SUB_I32_SCALAR:
+ case RS6000_BIF_SUB_I32_VECTOR:
+ case RS6000_BIF_SUB_I64_SCALAR:
+ case RS6000_BIF_SUB_I64_VECTOR:
case RS6000_BIF_SUB_F32_SCALAR:
case RS6000_BIF_SUB_F32_VECTOR:
case RS6000_BIF_SUB_F64_SCALAR:
case RS6000_BIF_SUB_F64_VECTOR:
- fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2);
+ case RS6000_BIF_SUB_U32_SCALAR:
+ case RS6000_BIF_SUB_U32_VECTOR:
+ case RS6000_BIF_SUB_U64_SCALAR:
+ case RS6000_BIF_SUB_U64_VECTOR:
+ fold_builtin_overload_arith (gsi, stmt, MINUS_EXPR, 2);
return true;
default:
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index acc76adca12..6a991efa63e 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4289,3 +4289,260 @@
SUB_F64_VECTOR nothing {}
v256 __builtin_sub_f64_vpair (v256, v256);
SUB_F64_VPAIR vpair_subv4df3 {mma}
+
+; Builtins for overload integer operations, including scalar and
+; 128-bit vector codes that are converted into direct operations.
+; The 256 codes that are kept in vector pairs insns that are split
+; into separate operations after register allocation.
+
+ int __builtin_add_i32_scalar (int, int);
+ ADD_I32_SCALAR nothing {}
+ vsi __builtin_add_i32_vector (vsi, vsi);
+ ADD_I32_VECTOR nothing {}
+ v256 __builtin_add_i32_vpair (v256, v256);
+ ADD_I32_VPAIR vpair_addv8si3 {mma}
+
+ unsigned int __builtin_add_u32_scalar (unsigned int, unsigned int);
+ ADD_U32_SCALAR nothing {}
+ vui __builtin_add_u32_vector (vui, vui);
+ ADD_U32_VECTOR nothing {}
+ v256 __builtin_add_u32_vpair (v256, v256);
+ ADD_U32_VPAIR vpair_addv8si3 {mma}
+
+ long long __builtin_add_i64_scalar (long long, long long);
+ ADD_I64_SCALAR nothing {}
+ vsll __builtin_add_i64_vector (vsll, vsll);
+ ADD_I64_VECTOR nothing {}
+ v256 __builtin_add_i64_vpair (v256, v256);
+ ADD_I64_VPAIR vpair_addv4di3 {mma}
+
+ unsigned long long __builtin_add_u64_scalar (unsigned long long, unsigned long long);
+ ADD_U64_SCALAR nothing {}
+ vull __builtin_add_u64_vector (vull, vull);
+ ADD_U64_VECTOR nothing {}
+ v256 __builtin_add_u64_vpair (v256, v256);
+ ADD_U64_VPAIR vpair_addv4di3 {mma}
+
+ int __builtin_and_i32_scalar (int, int);
+ AND_I32_SCALAR nothing {}
+ vsi __builtin_and_i32_vector (vsi, vsi);
+ AND_I32_VECTOR nothing {}
+ v256 __builtin_and_i32_vpair (v256, v256);
+ AND_I32_VPAIR vpair_andv8si3 {mma}
+
+ unsigned int __builtin_and_u32_scalar (unsigned int, unsigned int);
+ AND_U32_SCALAR nothing {}
+ vui __builtin_and_u32_vector (vui, vui);
+ AND_U32_VECTOR nothing {}
+ v256 __builtin_and_u32_vpair (v256, v256);
+ AND_U32_VPAIR vpair_andv8si3 {mma}
+
+ long long __builtin_and_i64_scalar (long long, long long);
+ AND_I64_SCALAR nothing {}
+ vsll __builtin_and_i64_vector (vsll, vsll);
+ AND_I64_VECTOR nothing {}
+ v256 __builtin_and_i64_vpair (v256, v256);
+ AND_I64_VPAIR vpair_andv4di3 {mma}
+
+ unsigned long long __builtin_and_u64_scalar (unsigned long long, unsigned long long);
+ AND_U64_SCALAR nothing {}
+ vull __builtin_and_u64_vector (vull, vull);
+ AND_U64_VECTOR nothing {}
+ v256 __builtin_and_u64_vpair (v256, v256);
+ AND_U64_VPAIR vpair_andv4di3 {mma}
+
+ int __builtin_ior_i32_scalar (int, int);
+ IOR_I32_SCALAR nothing {}
+ vsi __builtin_ior_i32_vector (vsi, vsi);
+ IOR_I32_VECTOR nothing {}
+ v256 __builtin_ior_i32_vpair (v256, v256);
+ IOR_I32_VPAIR vpair_iorv8si3 {mma}
+
+ unsigned int __builtin_ior_u32_scalar (unsigned int, unsigned int);
+ IOR_U32_SCALAR nothing {}
+ vui __builtin_ior_u32_vector (vui, vui);
+ IOR_U32_VECTOR nothing {}
+ v256 __builtin_ior_u32_vpair (v256, v256);
+ IOR_U32_VPAIR vpair_iorv8si3 {mma}
+
+ long long __builtin_ior_i64_scalar (long long, long long);
+ IOR_I64_SCALAR nothing {}
+ vsll __builtin_ior_i64_vector (vsll, vsll);
+ IOR_I64_VECTOR nothing {}
+ v256 __builtin_ior_i64_vpair (v256, v256);
+ IOR_I64_VPAIR vpair_iorv4di3 {mma}
+
+ unsigned long long __builtin_ior_u64_scalar (unsigned long long, unsigned long long);
+ IOR_U64_SCALAR nothing {}
+ vull __builtin_ior_u64_vector (vull, vull);
+ IOR_U64_VECTOR nothing {}
+ v256 __builtin_ior_u64_vpair (v256, v256);
+ IOR_U64_VPAIR vpair_iorv4di3 {mma}
+
+ int __builtin_neg_i32_scalar (int);
+ NEG_I32_SCALAR nothing {}
+ vsi __builtin_neg_i32_vector (vsi);
+ NEG_I32_VECTOR nothing {}
+ v256 __builtin_neg_i32_vpair (v256);
+ NEG_I32_VPAIR vpair_negv8si2 {mma}
+
+ unsigned int __builtin_neg_u32_scalar (unsigned int);
+ NEG_U32_SCALAR nothing {}
+ vui __builtin_neg_u32_vector (vui);
+ NEG_U32_VECTOR nothing {}
+ v256 __builtin_neg_u32_vpair (v256);
+ NEG_U32_VPAIR vpair_negv8si2 {mma}
+
+ long long __builtin_neg_i64_scalar (long long);
+ NEG_I64_SCALAR nothing {}
+ vsll __builtin_neg_i64_vector (vsll);
+ NEG_I64_VECTOR nothing {}
+ v256 __builtin_neg_i64_vpair (v256);
+ NEG_I64_VPAIR vpair_negv8si2 {mma}
+
+ unsigned long long __builtin_neg_u64_scalar (unsigned long long);
+ NEG_U64_SCALAR nothing {}
+ vull __builtin_neg_u64_vector (vull);
+ NEG_U64_VECTOR nothing {}
+ v256 __builtin_neg_u64_vpair (v256);
+ NEG_U64_VPAIR vpair_negv4di2 {mma}
+
+ int __builtin_not_i32_scalar (int);
+ NOT_I32_SCALAR nothing {}
+ vsi __builtin_not_i32_vector (vsi);
+ NOT_I32_VECTOR nothing {}
+ v256 __builtin_not_i32_vpair (v256);
+ NOT_I32_VPAIR vpair_notv8si2 {mma}
+
+ unsigned int __builtin_not_u32_scalar (unsigned int);
+ NOT_U32_SCALAR nothing {}
+ vui __builtin_not_u32_vector (vui);
+ NOT_U32_VECTOR nothing {}
+ v256 __builtin_not_u32_vpair (v256);
+ NOT_U32_VPAIR vpair_notv8si2 {mma}
+
+ unsigned long long __builtin_not_i64_scalar (unsigned long long);
+ NOT_I64_SCALAR nothing {}
+ vsll __builtin_not_i64_vector (vsll);
+ NOT_I64_VECTOR nothing {}
+ v256 __builtin_not_i64_vpair (v256);
+ NOT_I64_VPAIR vpair_notv4di2 {mma}
+
+ unsigned int __builtin_not_u64_scalar (unsigned int);
+ NOT_U64_SCALAR nothing {}
+ vull __builtin_not_u64_vector (vull);
+ NOT_U64_VECTOR nothing {}
+ v256 __builtin_not_u64_vpair (v256);
+ NOT_U64_VPAIR vpair_notv8si2 {mma}
+
+ int __builtin_smax_i32_scalar (int, int);
+ SMAX_I32_SCALAR nothing {}
+ vsi __builtin_smax_i32_vector (vsi, vsi);
+ SMAX_I32_VECTOR nothing {}
+ v256 __builtin_smax_i32_vpair (v256, v256);
+ SMAX_I32_VPAIR vpair_smaxv8si3 {mma}
+
+ long long __builtin_smax_i64_scalar (long long, long long);
+ SMAX_I64_SCALAR nothing {}
+ vsll __builtin_smax_i64_vector (vsll, vsll);
+ SMAX_I64_VECTOR nothing {}
+ v256 __builtin_smax_i64_vpair (v256, v256);
+ SMAX_I64_VPAIR vpair_smaxv4di3 {mma}
+
+ int __builtin_smin_i32_scalar (int, int);
+ SMIN_I32_SCALAR nothing {}
+ vsi __builtin_smin_i32_vector (vsi, vsi);
+ SMIN_I32_VECTOR nothing {}
+ v256 __builtin_smin_i32_vpair (v256, v256);
+ SMIN_I32_VPAIR vpair_sminv8si3 {mma}
+
+ long long __builtin_smin_i64_scalar (long long, long long);
+ SMIN_I64_SCALAR nothing {}
+ vsll __builtin_smin_i64_vector (vsll, vsll);
+ SMIN_I64_VECTOR nothing {}
+ v256 __builtin_smin_i64_vpair (v256, v256);
+ SMIN_I64_VPAIR vpair_sminv4di3 {mma}
+
+ int __builtin_sub_i32_scalar (int, int);
+ SUB_I32_SCALAR nothing {}
+ vsi __builtin_sub_i32_vector (vsi, vsi);
+ SUB_I32_VECTOR nothing {}
+ v256 __builtin_sub_i32_vpair (v256, v256);
+ SUB_I32_VPAIR vpair_subv8si3 {mma}
+
+ unsigned int __builtin_sub_u32_scalar (unsigned int, unsigned int);
+ SUB_U32_SCALAR nothing {}
+ vui __builtin_sub_u32_vector (vui, vui);
+ SUB_U32_VECTOR nothing {}
+ v256 __builtin_sub_u32_vpair (v256, v256);
+ SUB_U32_VPAIR vpair_subv8si3 {mma}
+
+ long long __builtin_sub_i64_scalar (long long, long long);
+ SUB_I64_SCALAR nothing {}
+ vsll __builtin_sub_i64_vector (vsll, vsll);
+ SUB_I64_VECTOR nothing {}
+ v256 __builtin_sub_i64_vpair (v256, v256);
+ SUB_I64_VPAIR vpair_subv4di3 {mma}
+
+ unsigned long long __builtin_sub_u64_scalar (unsigned long long, unsigned long long);
+ SUB_U64_SCALAR nothing {}
+ vull __builtin_sub_u64_vector (vull, vull);
+ SUB_U64_VECTOR nothing {}
+ v256 __builtin_sub_u64_vpair (v256, v256);
+ SUB_U64_VPAIR vpair_subv4di3 {mma}
+
+ unsigned int __builtin_umax_u32_scalar (unsigned int, unsigned int);
+ UMAX_U32_SCALAR nothing {}
+ vui __builtin_umax_u32_vector (vui, vui);
+ UMAX_U32_VECTOR nothing {}
+ v256 __builtin_umax_u32_vpair (v256, v256);
+ UMAX_U32_VPAIR vpair_umaxv8si3 {mma}
+
+ unsigned long long __builtin_umax_u64_scalar (unsigned long long, unsigned long long);
+ UMAX_U64_SCALAR nothing {}
+ vull __builtin_umax_u64_vector (vull, vull);
+ UMAX_U64_VECTOR nothing {}
+ v256 __builtin_umax_u64_vpair (v256, v256);
+ UMAX_U64_VPAIR vpair_umaxv4di3 {mma}
+
+ unsigned int __builtin_umin_u32_scalar (unsigned int, unsigned int);
+ UMIN_U32_SCALAR nothing {}
+ vui __builtin_umin_u32_vector (vui, vui);
+ UMIN_U32_VECTOR nothing {}
+ v256 __builtin_umin_u32_vpair (v256, v256);
+ UMIN_U32_VPAIR vpair_uminv8si3 {mma}
+
+ unsigned long long __builtin_umin_u64_scalar (unsigned long long, unsigned long long);
+ UMIN_U64_SCALAR nothing {}
+ vull __builtin_umin_u64_vector (vull, vull);
+ UMIN_U64_VECTOR nothing {}
+ v256 __builtin_umin_u64_vpair (v256, v256);
+ UMIN_U64_VPAIR vpair_uminv4di3 {mma}
+
+ int __builtin_xor_i32_scalar (int, int);
+ XOR_I32_SCALAR nothing {}
+ vsi __builtin_xor_i32_vector (vsi, vsi);
+ XOR_I32_VECTOR nothing {}
+ v256 __builtin_xor_i32_vpair (v256, v256);
+ XOR_I32_VPAIR vpair_xorv8si3 {mma}
+
+ unsigned int __builtin_xor_u32_scalar (unsigned int, unsigned int);
+ XOR_U32_SCALAR nothing {}
+ vui __builtin_xor_u32_vector (vui, vui);
+ XOR_U32_VECTOR nothing {}
+ v256 __builtin_xor_u32_vpair (v256, v256);
+ XOR_U32_VPAIR vpair_xorv8si3 {mma}
+
+ long long __builtin_xor_i64_scalar (long long, long long);
+ XOR_I64_SCALAR nothing {}
+ vsll __builtin_xor_i64_vector (vsll, vsll);
+ XOR_I64_VECTOR nothing {}
+ v256 __builtin_xor_i64_vpair (v256, v256);
+ XOR_I64_VPAIR vpair_xorv4di3 {mma}
+
+ unsigned long long __builtin_xor_u64_scalar (unsigned long long, unsigned long long);
+ XOR_U64_SCALAR nothing {}
+ vull __builtin_xor_u64_vector (vull, vull);
+ XOR_U64_VECTOR nothing {}
+ v256 __builtin_xor_u64_vpair (v256, v256);
+ XOR_U64_VPAIR vpair_xorv4di3 {mma}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs6000-gen-builtins.cc
index a2f442ed90d..cbd7b916c03 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.cc
+++ b/gcc/config/rs6000/rs6000-gen-builtins.cc
@@ -444,7 +444,7 @@ struct ovld_stanza
char *ifdef;
};
-#define MAXOVLDSTANZAS 512
+#define MAXOVLDSTANZAS 1024
static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS];
static int num_ovld_stanzas;
static int curr_ovld_stanza;
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index bbc26de4568..1bf0d1ceee9 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6365,3 +6365,268 @@
SUB_F64_VECTOR
v256 __builtin_sub_f64 (v256, v256);
SUB_F64_VPAIR
+
+;; Overloaded integer built-in funtions
+[ADD_I32, SKIP, __builtin_add_i32]
+ int __builtin_add_i32 (int, int);
+ ADD_I32_SCALAR
+ vsi __builtin_add_i32 (vsi, vsi);
+ ADD_I32_VECTOR
+ v256 __builtin_add_i32 (v256, v256);
+ ADD_I32_VPAIR
+
+[ADD_U32, SKIP, __builtin_add_u32]
+ unsigned int __builtin_add_u32 (unsigned int, unsigned int);
+ ADD_U32_SCALAR
+ vui __builtin_add_u32 (vui, vui);
+ ADD_U32_VECTOR
+ v256 __builtin_add_u32 (v256, v256);
+ ADD_U32_VPAIR
+
+[ADD_I64, SKIP, __builtin_add_i64]
+ long long __builtin_add_i64 (long long, long long);
+ ADD_I64_SCALAR
+ vsll __builtin_add_i64 (vsll, vsll);
+ ADD_I64_VECTOR
+ v256 __builtin_add_i64 (v256, v256);
+ ADD_I64_VPAIR
+
+[ADD_U64, SKIP, __builtin_add_u64]
+ unsigned long long __builtin_add_u64 (unsigned long long, unsigned long long);
+ ADD_U64_SCALAR
+ vull __builtin_add_u64 (vull, vull);
+ ADD_U64_VECTOR
+ v256 __builtin_add_u64 (v256, v256);
+ ADD_U64_VPAIR
+
+[AND_U32, SKIP, __builtin_and_u32]
+ unsigned int __builtin_and_u32 (unsigned int, unsigned int);
+ AND_U32_SCALAR
+ vui __builtin_and_u32 (vui, vui);
+ AND_U32_VECTOR
+ v256 __builtin_and_u32 (v256, v256);
+ AND_U32_VPAIR
+
+[AND_I64, SKIP, __builtin_and_i64]
+ long long __builtin_and_i64 (long long, long long);
+ AND_I64_SCALAR
+ vsll __builtin_and_i64 (vsll, vsll);
+ AND_I64_VECTOR
+ v256 __builtin_and_i64 (v256, v256);
+ AND_I64_VPAIR
+
+[AND_U64, SKIP, __builtin_and_u64]
+ unsigned long long __builtin_and_u64 (unsigned long long, unsigned long long);
+ AND_U64_SCALAR
+ vull __builtin_and_u64 (vull, vull);
+ AND_U64_VECTOR
+ v256 __builtin_and_u64 (v256, v256);
+ AND_U64_VPAIR
+
+[IOR_U32, SKIP, __builtin_ior_u32]
+ unsigned int __builtin_ior_u32 (unsigned int, unsigned int);
+ IOR_U32_SCALAR
+ vui __builtin_ior_u32 (vui, vui);
+ IOR_U32_VECTOR
+ v256 __builtin_ior_u32 (v256, v256);
+ IOR_U32_VPAIR
+
+[IOR_I64, SKIP, __builtin_ior_i64]
+ long long __builtin_ior_i64 (long long, long long);
+ IOR_I64_SCALAR
+ vsll __builtin_ior_i64 (vsll, vsll);
+ IOR_I64_VECTOR
+ v256 __builtin_ior_i64 (v256, v256);
+ IOR_I64_VPAIR
+
+[IOR_U64, SKIP, __builtin_ior_u64]
+ unsigned long long __builtin_ior_u64 (unsigned long long, unsigned long long);
+ IOR_U64_SCALAR
+ vull __builtin_ior_u64 (vull, vull);
+ IOR_U64_VECTOR
+ v256 __builtin_ior_u64 (v256, v256);
+ IOR_U64_VPAIR
+
+[NEG_I32, SKIP, __builtin_neg_i32]
+ int __builtin_neg_i32 (int);
+ NEG_I32_SCALAR
+ vsi __builtin_neg_i32 (vsi);
+ NEG_I32_VECTOR
+ v256 __builtin_neg_i32 (v256);
+ NEG_I32_VPAIR
+
+[NEG_U32, SKIP, __builtin_neg_u32]
+ unsigned int __builtin_neg_u32 (unsigned int);
+ NEG_U32_SCALAR
+ vui __builtin_neg_u32 (vui);
+ NEG_U32_VECTOR
+ v256 __builtin_neg_u32 (v256);
+ NEG_U32_VPAIR
+
+[NEG_I64, SKIP, __builtin_neg_i64]
+ long long __builtin_neg_i64 (long long);
+ NEG_I64_SCALAR
+ vsll __builtin_neg_i64 (vsll);
+ NEG_I64_VECTOR
+ v256 __builtin_neg_i64 (v256);
+ NEG_I64_VPAIR
+
+[NEG_U64, SKIP, __builtin_neg_u64]
+ unsigned long long __builtin_neg_u64 (unsigned long long);
+ NEG_U64_SCALAR
+ vull __builtin_neg_u64 (vull);
+ NEG_U64_VECTOR
+ v256 __builtin_neg_u64 (v256);
+ NEG_U64_VPAIR
+
+[NOT_I32, SKIP, __builtin_not_i32]
+ int __builtin_not_i32 (int);
+ NOT_I32_SCALAR
+ vsi __builtin_not_i32 (vsi);
+ NOT_I32_VECTOR
+ v256 __builtin_not_i32 (v256);
+ NOT_I32_VPAIR
+
+[NOT_U32, SKIP, __builtin_not_u32]
+ unsigned int __builtin_not_u32 (unsigned int);
+ NOT_U32_SCALAR
+ vui __builtin_not_u32 (vui);
+ NOT_U32_VECTOR
+ v256 __builtin_not_u32 (v256);
+ NOT_U32_VPAIR
+
+[NOT_I64, SKIP, __builtin_not_i64]
+ long long __builtin_not_i64 (long long);
+ NOT_I64_SCALAR
+ vsll __builtin_not_i64 (vsll);
+ NOT_I64_VECTOR
+ v256 __builtin_not_i64 (v256);
+ NOT_I64_VPAIR
+
+[NOT_U64, SKIP, __builtin_not_u64]
+ unsigned long long __builtin_not_u64 (unsigned long long);
+ NOT_U64_SCALAR
+ vull __builtin_not_u64 (vull);
+ NOT_U64_VECTOR
+ v256 __builtin_not_u64 (v256);
+ NOT_U64_VPAIR
+
+[SUB_I32, SKIP, __builtin_sub_i32]
+ int __builtin_sub_i32 (int, int);
+ SUB_I32_SCALAR
+ vsi __builtin_sub_i32 (vsi, vsi);
+ SUB_I32_VECTOR
+ v256 __builtin_sub_i32 (v256, v256);
+ SUB_I32_VPAIR
+
+[SUB_U32, SKIP, __builtin_sub_u32]
+ unsigned int __builtin_sub_u32 (unsigned int, unsigned int);
+ SUB_U32_SCALAR
+ vui __builtin_sub_u32 (vui, vui);
+ SUB_U32_VECTOR
+ v256 __builtin_sub_u32 (v256, v256);
+ SUB_U32_VPAIR
+
+[SUB_I64, SKIP, __builtin_sub_i64]
+ long long __builtin_sub_i64 (long long, long long);
+ SUB_I64_SCALAR
+ vsll __builtin_sub_i64 (vsll, vsll);
+ SUB_I64_VECTOR
+ v256 __builtin_sub_i64 (v256, v256);
+ SUB_I64_VPAIR
+
+[SUB_U64, SKIP, __builtin_sub_u64]
+ unsigned long long __builtin_sub_u64 (unsigned long long, unsigned long long);
+ SUB_U64_SCALAR
+ vull __builtin_sub_u64 (vull, vull);
+ SUB_U64_VECTOR
+ v256 __builtin_sub_u64 (v256, v256);
+ SUB_U64_VPAIR
+
+[SMAX_I32, SKIP, __builtin_smax_i32]
+ int __builtin_smax_i32 (int, int);
+ SMAX_I32_SCALAR
+ vsi __builtin_smax_i32 (vsi, vsi);
+ SMAX_I32_VECTOR
+ v256 __builtin_smax_i32 (v256, v256);
+ SMAX_I32_VPAIR
+
+[SMAX_I64, SKIP, __builtin_smax_i64]
+ long long __builtin_smax_i64 (long long, long long);
+ SMAX_I64_SCALAR
+ vsll __builtin_smax_i64 (vsll, vsll);
+ SMAX_I64_VECTOR
+ v256 __builtin_smax_i64 (v256, v256);
+ SMAX_I64_VPAIR
+
+[SMIN_I32, SKIP, __builtin_smin_i32]
+ int __builtin_smin_i32 (int, int);
+ SMIN_I32_SCALAR
+ vsi __builtin_smin_i32 (vsi, vsi);
+ SMIN_I32_VECTOR
+ v256 __builtin_smin_i32 (v256, v256);
+ SMIN_I32_VPAIR
+
+[SMIN_I64, SKIP, __builtin_smin_i64]
+ long long __builtin_smin_i64 (long long, long long);
+ SMIN_I64_SCALAR
+ vsll __builtin_smin_i64 (vsll, vsll);
+ SMIN_I64_VECTOR
+ v256 __builtin_smin_i64 (v256, v256);
+ SMIN_I64_VPAIR
+
+[UMAX_U32, SKIP, __builtin_umax_u32]
+ unsigned int __builtin_umax_u32 (unsigned int, unsigned int);
+ UMAX_U32_SCALAR
+ vui __builtin_umax_u32 (vui, vui);
+ UMAX_U32_VECTOR
+ v256 __builtin_umax_u32 (v256, v256);
+ UMAX_U32_VPAIR
+
+[UMAX_U64, SKIP, __builtin_umax_u64]
+ unsigned long long __builtin_umax_u64 (unsigned long long, unsigned long long);
+ UMAX_U64_SCALAR
+ vull __builtin_umax_u64 (vull, vull);
+ UMAX_U64_VECTOR
+ v256 __builtin_umax_u64 (v256, v256);
+ UMAX_U64_VPAIR
+
+[UMIN_U32, SKIP, __builtin_umin_u32]
+ unsigned int __builtin_umin_u32 (unsigned int, unsigned int);
+ UMIN_U32_SCALAR
+ vui __builtin_umin_u32 (vui, vui);
+ UMIN_U32_VECTOR
+ v256 __builtin_umin_u32 (v256, v256);
+ UMIN_U32_VPAIR
+
+[UMIN_U64, SKIP, __builtin_umin_u64]
+ unsigned long long __builtin_umin_u64 (unsigned long long, unsigned long long);
+ UMIN_U64_SCALAR
+ vull __builtin_umin_u64 (vull, vull);
+ UMIN_U64_VECTOR
+ v256 __builtin_umin_u64 (v256, v256);
+ UMIN_U64_VPAIR
+
+[XOR_U32, SKIP, __builtin_xor_u32]
+ unsigned int __builtin_xor_u32 (unsigned int, unsigned int);
+ XOR_U32_SCALAR
+ vui __builtin_xor_u32 (vui, vui);
+ XOR_U32_VECTOR
+ v256 __builtin_xor_u32 (v256, v256);
+ XOR_U32_VPAIR
+
+[XOR_I64, SKIP, __builtin_xor_i64]
+ long long __builtin_xor_i64 (long long, long long);
+ XOR_I64_SCALAR
+ vsll __builtin_xor_i64 (vsll, vsll);
+ XOR_I64_VECTOR
+ v256 __builtin_xor_i64 (v256, v256);
+ XOR_I64_VPAIR
+
+[XOR_U64, SKIP, __builtin_xor_u64]
+ unsigned long long __builtin_xor_u64 (unsigned long long, unsigned long long);
+ XOR_U64_SCALAR
+ vull __builtin_xor_u64 (vull, vull);
+ XOR_U64_VECTOR
+ v256 __builtin_xor_u64 (v256, v256);
+ XOR_U64_VPAIR
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 13f6e0464b5..22b3ce54511 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -38,51 +38,108 @@
])
;; Iterator doing unary/binary arithmetic on vector pairs
-(define_code_iterator VPAIR_UNARY [neg abs sqrt])
-(define_code_iterator VPAIR_BINARY [plus minus mult div copysign smin smax])
+(define_code_iterator VPAIR_FP_UNARY [abs neg sqrt])
+(define_code_iterator VPAIR_FP_BINARY [copysign div minus mult plus smin smax])
+
+(define_code_iterator VPAIR_INT_BINARY [and ior minus plus smax smin umax umin xor])
;; Give the insn name from the opertion
(define_code_attr vpair_op [(abs "abs")
+ (and "and")
(copysign "copysign")
(div "div")
+ (ior "ior")
(minus "sub")
(mult "mul")
+ (not "not")
(neg "neg")
(plus "add")
(smin "smin")
(smax "smax")
- (sqrt "sqrt")])
-
-;; Iterator for creating the wrapper for vector pair built-ins
-(define_int_iterator VPAIR_WRAPPER [UNSPEC_VPAIR_V4DF UNSPEC_VPAIR_V8SF])
-
-;; Map VPAIR_WRAPPER to vector type (i.e. V2DF or V4SF)
-(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF")
- (UNSPEC_VPAIR_V8SF "V4SF")])
-
-(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df")
- (UNSPEC_VPAIR_V8SF "v8sf")])
+ (sqrt "sqrt")
+ (umin "umin")
+ (umax "umax")
+ (xor "xor")])
+
+;; Give the register constraint ("v" or "wa") for the integer operation used
+(define_code_attr vpair_ireg [(and "wa")
+ (ior "wa")
+ (minus "v")
+ (not "wa")
+ (neg "v")
+ (plus "v")
+ (smax "v")
+ (smin "v")
+ (umax "v")
+ (umin "v")
+ (xor "wa")])
+
+;; Give the register previdcate for the integer operation used
+(define_code_attr vpair_ipred [(and "vsx_register_operand")
+ (ior "vsx_register_operand")
+ (minus "altivec_register_operand")
+ (not "vsx_register_operand")
+ (neg "altivec_register_operand")
+ (plus "altivec_register_operand")
+ (smax "altivec_register_operand")
+ (smin "altivec_register_operand")
+ (umax "altivec_register_operand")
+ (umin "altivec_register_operand")
+ (xor "vsx_register_operand")])
+
+;; Iterator for creating the wrappers for vector pair built-ins
+(define_int_iterator VPAIR_FP_WRAPPER [UNSPEC_VPAIR_V4DF
+ UNSPEC_VPAIR_V8SF])
+
+(define_int_iterator VPAIR_INT_WRAPPER [UNSPEC_VPAIR_V4DI
+ UNSPEC_VPAIR_V8SI
+ UNSPEC_VPAIR_V16HI
+ UNSPEC_VPAIR_V32QI])
+
+;; Map VPAIR_{INT,FP}_WRAPPER to vector the type of the arguments after they
+;; are split
+(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF")
+ (UNSPEC_VPAIR_V8SF "V4SF")
+ (UNSPEC_VPAIR_V32QI "V16QI")
+ (UNSPEC_VPAIR_V16HI "V8HI")
+ (UNSPEC_VPAIR_V8SI "V4SI")
+ (UNSPEC_VPAIR_V4DI "V2DI")])
+
+;; Map VPAIR_{INT,FP}_WRAPPER to a lower case name to identify the vector pair.
+(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df")
+ (UNSPEC_VPAIR_V8SF "v8sf")
+ (UNSPEC_VPAIR_V32QI "v32qi")
+ (UNSPEC_VPAIR_V16HI "v16hi")
+ (UNSPEC_VPAIR_V8SI "v8si")
+ (UNSPEC_VPAIR_V4DI "v4di")])
+
+;; Map VPAIR_INT_WRAPPER to constraints used for the negate scratch register.
+(define_int_attr vpair_neg_reg [(UNSPEC_VPAIR_V32QI "&v")
+ (UNSPEC_VPAIR_V16HI "&v")
+ (UNSPEC_VPAIR_V8SI "X")
+ (UNSPEC_VPAIR_V4DI "X")])
\f
;; Vector pair floating point unary operations
(define_insn_and_split "vpair_<vpair_op><vpair_type>2"
[(set (match_operand:OO 0 "vsx_register_operand" "=wa")
- (unspec:OO [(VPAIR_UNARY:OO
+ (unspec:OO [(VPAIR_FP_UNARY:OO
(match_operand:OO 1 "vsx_register_operand" "wa"))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
- [(set (match_dup 2) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 3)))
- (set (match_dup 4) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 5)))]
+ [(set (match_dup 2) (VPAIR_FP_UNARY:<VPAIR_VECTOR> (match_dup 3)))
+ (set (match_dup 4) (VPAIR_FP_UNARY:<VPAIR_VECTOR> (match_dup 5)))]
{
unsigned reg0 = reg_or_subregno (operands[0]);
unsigned reg1 = reg_or_subregno (operands[1]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[2] = gen_rtx_REG (vmode, reg0);
+ operands[3] = gen_rtx_REG (vmode, reg1);
+ operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[5] = gen_rtx_REG (vmode, reg1 + 1);
}
[(set_attr "length" "8")])
@@ -93,8 +150,8 @@
[(neg:OO
(unspec:OO
[(abs:OO (match_operand:OO 1 "vsx_register_operand" "ww"))]
- VPAIR_WRAPPER))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -107,42 +164,44 @@
{
unsigned reg0 = reg_or_subregno (operands[0]);
unsigned reg1 = reg_or_subregno (operands[1]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+ operands[2] = gen_rtx_REG (vmode, reg0);
+ operands[3] = gen_rtx_REG (vmode, reg1);
+ operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[5] = gen_rtx_REG (vmode, reg1 + 1);
}
[(set_attr "length" "8")])
;; Vector pair floating binary operations
(define_insn_and_split "vpair_<vpair_op><vpair_type>3"
[(set (match_operand:OO 0 "vsx_register_operand" "=wa")
- (unspec:OO [(VPAIR_BINARY:OO
+ (unspec:OO [(VPAIR_FP_BINARY:OO
(match_operand:OO 1 "vsx_register_operand" "wa")
(match_operand:OO 2 "vsx_register_operand" "wa"))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
[(set (match_dup 3)
- (VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 4)
- (match_dup 5)))
+ (VPAIR_FP_BINARY:<VPAIR_VECTOR> (match_dup 4)
+ (match_dup 5)))
(set (match_dup 6)
- (VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 7)
- (match_dup 8)))]
+ (VPAIR_FP_BINARY:<VPAIR_VECTOR> (match_dup 7)
+ (match_dup 8)))]
{
unsigned reg0 = reg_or_subregno (operands[0]);
unsigned reg1 = reg_or_subregno (operands[1]);
unsigned reg2 = reg_or_subregno (operands[2]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+ operands[3] = gen_rtx_REG (vmode, reg0);
+ operands[4] = gen_rtx_REG (vmode, reg1);
+ operands[5] = gen_rtx_REG (vmode, reg2);
- operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
- operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+ operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg2 + 1);
}
[(set_attr "length" "8")])
@@ -154,7 +213,7 @@
(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
(match_operand:OO 2 "vsx_register_operand" "wa,0")
(match_operand:OO 3 "vsx_register_operand" "0,wa"))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -171,16 +230,17 @@
unsigned reg1 = reg_or_subregno (operands[1]);
unsigned reg2 = reg_or_subregno (operands[2]);
unsigned reg3 = reg_or_subregno (operands[3]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
- operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+ operands[4] = gen_rtx_REG (vmode, reg0);
+ operands[5] = gen_rtx_REG (vmode, reg1);
+ operands[6] = gen_rtx_REG (vmode, reg2);
+ operands[7] = gen_rtx_REG (vmode, reg3);
- operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
- operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
- operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+ operands[11] = gen_rtx_REG (vmode, reg3 + 1);
}
[(set_attr "length" "8")])
@@ -192,8 +252,8 @@
(match_operand:OO 2 "vsx_register_operand" "wa,0")
(unspec:OO
[(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
- VPAIR_WRAPPER))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -210,16 +270,17 @@
unsigned reg1 = reg_or_subregno (operands[1]);
unsigned reg2 = reg_or_subregno (operands[2]);
unsigned reg3 = reg_or_subregno (operands[3]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
- operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+ operands[4] = gen_rtx_REG (vmode, reg0);
+ operands[5] = gen_rtx_REG (vmode, reg1);
+ operands[6] = gen_rtx_REG (vmode, reg2);
+ operands[7] = gen_rtx_REG (vmode, reg3);
- operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
- operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
- operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+ operands[11] = gen_rtx_REG (vmode, reg3 + 1);
}
[(set_attr "length" "8")])
@@ -232,8 +293,8 @@
(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
(match_operand:OO 2 "vsx_register_operand" "wa,0")
(match_operand:OO 3 "vsx_register_operand" "0,wa"))]
- VPAIR_WRAPPER))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -252,16 +313,17 @@
unsigned reg1 = reg_or_subregno (operands[1]);
unsigned reg2 = reg_or_subregno (operands[2]);
unsigned reg3 = reg_or_subregno (operands[3]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
- operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+ operands[4] = gen_rtx_REG (vmode, reg0);
+ operands[5] = gen_rtx_REG (vmode, reg1);
+ operands[6] = gen_rtx_REG (vmode, reg2);
+ operands[7] = gen_rtx_REG (vmode, reg3);
- operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
- operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
- operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+ operands[11] = gen_rtx_REG (vmode, reg3 + 1);
}
[(set_attr "length" "8")])
@@ -275,9 +337,9 @@
(match_operand:OO 2 "vsx_register_operand" "wa,0")
(unspec:OO
[(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
- VPAIR_WRAPPER))]
- VPAIR_WRAPPER))]
- VPAIR_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
+ VPAIR_FP_WRAPPER))]
"TARGET_MMA"
"#"
"&& reload_completed"
@@ -296,16 +358,17 @@
unsigned reg1 = reg_or_subregno (operands[1]);
unsigned reg2 = reg_or_subregno (operands[2]);
unsigned reg3 = reg_or_subregno (operands[3]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
- operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
- operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
- operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
- operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+ operands[4] = gen_rtx_REG (vmode, reg0);
+ operands[5] = gen_rtx_REG (vmode, reg1);
+ operands[6] = gen_rtx_REG (vmode, reg2);
+ operands[7] = gen_rtx_REG (vmode, reg3);
- operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
- operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
- operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
- operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+ operands[11] = gen_rtx_REG (vmode, reg3 + 1);
}
[(set_attr "length" "8")])
@@ -415,3 +478,105 @@
operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
operands[7] = gen_rtx_REG (DFmode, reg3);
})
+
+\f
+;; Vector pair integer negate support.
+(define_insn_and_split "vpair_neg<vpair_type>2"
+ [(set (match_operand:OO 0 "altivec_register_operand" "=v")
+ (unspec:OO [(neg:OO
+ (match_operand:OO 1 "altivec_register_operand" "v"))]
+ VPAIR_INT_WRAPPER))
+ (clobber (match_scratch:<VPAIR_VECTOR> 2 "=<vpair_neg_reg>"))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (minus:<VPAIR_VECTOR> (match_dup 2)
+ (match_dup 5)))
+ (set (match_dup 6) (minus:<VPAIR_VECTOR> (match_dup 2)
+ (match_dup 7)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+
+ operands[3] = CONST0_RTX (vmode);
+
+ operands[4] = gen_rtx_REG (vmode, reg0);
+ operands[5] = gen_rtx_REG (vmode, reg1);
+
+ operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+
+ /* If the vector integer size is 32 or 64 bits, we can use the vneg{w,d}
+ instructions. */
+ if (vmode == V4SImode)
+ {
+ emit_insn (gen_negv4si2 (operands[4], operands[5]));
+ emit_insn (gen_negv4si2 (operands[6], operands[7]));
+ DONE;
+ }
+ else if (vmode == V2DImode)
+ {
+ emit_insn (gen_negv2di2 (operands[4], operands[5]));
+ emit_insn (gen_negv2di2 (operands[6], operands[7]));
+ DONE;
+ }
+}
+ [(set_attr "length" "8")])
+
+;; Vector pair integer not support.
+(define_insn_and_split "vpair_not<vpair_type>2"
+ [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+ (unspec:OO [(not:OO (match_operand:OO 1 "vsx_register_operand" "wa"))]
+ VPAIR_INT_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 2) (not:<VPAIR_VECTOR> (match_dup 3)))
+ (set (match_dup 4) (not:<VPAIR_VECTOR> (match_dup 5)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+
+ operands[2] = gen_rtx_REG (vmode, reg0);
+ operands[3] = gen_rtx_REG (vmode, reg1);
+
+ operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[5] = gen_rtx_REG (vmode, reg1 + 1);
+}
+ [(set_attr "length" "8")])
+
+;; Vector pair integer binary operations.
+(define_insn_and_split "vpair_<vpair_op><vpair_type>3"
+ [(set (match_operand:OO 0 "<vpair_ipred>" "=<vpair_ireg>")
+ (unspec:OO [(VPAIR_INT_BINARY:OO
+ (match_operand:OO 1 "<vpair_ipred>" "<vpair_ireg>")
+ (match_operand:OO 2 "<vpair_ipred>" "<vpair_ireg>"))]
+ VPAIR_INT_WRAPPER))]
+ "TARGET_MMA"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 3)
+ (VPAIR_INT_BINARY:<VPAIR_VECTOR> (match_dup 4)
+ (match_dup 5)))
+ (set (match_dup 6)
+ (VPAIR_INT_BINARY:<VPAIR_VECTOR> (match_dup 7)
+ (match_dup 8)))]
+{
+ unsigned reg0 = reg_or_subregno (operands[0]);
+ unsigned reg1 = reg_or_subregno (operands[1]);
+ unsigned reg2 = reg_or_subregno (operands[2]);
+ machine_mode vmode = <VPAIR_VECTOR>mode;
+
+ operands[3] = gen_rtx_REG (vmode, reg0);
+ operands[4] = gen_rtx_REG (vmode, reg1);
+ operands[5] = gen_rtx_REG (vmode, reg2);
+
+ operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+ operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+ operands[8] = gen_rtx_REG (vmode, reg2 + 1);
+}
+ [(set_attr "length" "8")])
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-08-02 3:39 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 3:39 [gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).