From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 664133858D39; Wed, 2 Aug 2023 03:39:06 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 664133858D39 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1690947546; bh=G1V1TVVcnFY/yZrHCbXFbYxDwK6yaAabMuHJ/jUlr+U=; h=From:To:Subject:Date:From; b=MlCyZt30Z/hoiLNYuYYEIQkrBXhObszh8Df4GQSWx7DSvBSwUNomMoNX8Iso9F9VY h/g+Q7DzX9mQGrSn33EcT2PvLSeRXaFnTmthxb4jx9BPqNtKzXf1o9oWooIBVR83oy NDeYpeDVvU5NdzPaKZacBDzfv1XnrkWiZ/WHwEs8= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work129-vpair X-Git-Oldrev: dbef0570515018657e9c2785346273ec088bcf65 X-Git-Newrev: 15f1121135323895ab8a4b3a91d90db60625c0fd Message-Id: <20230802033906.664133858D39@sourceware.org> Date: Wed, 2 Aug 2023 03:39:06 +0000 (GMT) List-Id: https://gcc.gnu.org/g:15f1121135323895ab8a4b3a91d90db60625c0fd commit 15f1121135323895ab8a4b3a91d90db60625c0fd Author: Michael Meissner Date: Tue Aug 1 23:38:42 2023 -0400 Add initial int built-in overload support. 2023-08-01 Michael Meissner gcc/ * config/rs6000/rs6000-builtin.cc (fold_builtin_overload_arith): Rename from fold_builtin_overload_fp. (rs6000_gimple_fold_builtin): Add support for integer overload built-ins. * config/rs6000/rs6000-builtins.def (__builtin_*_i32_*): Add built-in integer functions for overloading. (__builtin_u32_*): Likewise. (__builtin_i64_*): Likewise. (__builtin_u64_*): Likewise. * config/rs6000/rs6000-gen-builtins.cc (MAXOVLDSTANZAS): Bump up to 1,024. * config/rs6000/rs6000-overload.def (__builtin_*_i32): Add built-in overloaded integer functions. (__builtin_u32): Likewise. (__builtin_i64): Likewise. (__builtin_u64): Likewise. * config/rs6000/vector-pair.md (VPAIR_FP_UNARY): Rename from VPAIR_UNARY. (VPAIR_FP_BINARY): Rename from VPAIR_BINARY. (VPAIR_INT_BINARY): New code iterator. (vpair_op): Update for integer built-in functions. (vpair_ireg): New code attribute. (vpair_ipred): Likewise. (VPAIR_FP_WRAPPER): Rename from VPAIR_WRAPPER. (VPAIR_INT_WRAPPER): New int iterator. (VPAIR_VECTOR): New int attribute. (vpair_type): New int attribute. (vpair_neg_reg): New int attribute. (floating point insns): Update to use VPAIR_FP_WRAPPER, VPAIR_FP_UNARY, VPAIR_FP_BINARY. (integer insns): Add new integer insns for built-in functions. Diff: --- gcc/config/rs6000/rs6000-builtin.cc | 110 +++++++++-- gcc/config/rs6000/rs6000-builtins.def | 257 ++++++++++++++++++++++++ gcc/config/rs6000/rs6000-gen-builtins.cc | 2 +- gcc/config/rs6000/rs6000-overload.def | 265 +++++++++++++++++++++++++ gcc/config/rs6000/vector-pair.md | 323 +++++++++++++++++++++++-------- 5 files changed, 864 insertions(+), 93 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index e32d9175a0c..62d56c28946 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1261,14 +1261,14 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, return true; } -/* Helper function to fold the overloaded fp functions for the scalar and - vector types that support the operation directly. */ +/* Helper function to fold the overloaded arithmetic functions for the scalar + and vector types that support the operation directly. */ static void -fold_builtin_overload_fp (gimple_stmt_iterator *gsi, - gimple *stmt, - enum tree_code code, - int nargs) +fold_builtin_overload_arith (gimple_stmt_iterator *gsi, + gimple *stmt, + enum tree_code code, + int nargs) { location_t loc = gimple_location (stmt); tree lhs = gimple_call_lhs (stmt); @@ -2280,28 +2280,88 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) case RS6000_BIF_ABS_F32_VECTOR: case RS6000_BIF_ABS_F64_SCALAR: case RS6000_BIF_ABS_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1); + fold_builtin_overload_arith (gsi, stmt, ABS_EXPR, 1); return true; + case RS6000_BIF_ADD_I32_SCALAR: + case RS6000_BIF_ADD_I32_VECTOR: + case RS6000_BIF_ADD_I64_SCALAR: + case RS6000_BIF_ADD_I64_VECTOR: case RS6000_BIF_ADD_F32_SCALAR: case RS6000_BIF_ADD_F32_VECTOR: case RS6000_BIF_ADD_F64_SCALAR: case RS6000_BIF_ADD_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2); + case RS6000_BIF_ADD_U32_SCALAR: + case RS6000_BIF_ADD_U32_VECTOR: + case RS6000_BIF_ADD_U64_SCALAR: + case RS6000_BIF_ADD_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, PLUS_EXPR, 2); + return true; + + case RS6000_BIF_AND_I32_SCALAR: + case RS6000_BIF_AND_I32_VECTOR: + case RS6000_BIF_AND_I64_SCALAR: + case RS6000_BIF_AND_I64_VECTOR: + case RS6000_BIF_AND_U32_SCALAR: + case RS6000_BIF_AND_U32_VECTOR: + case RS6000_BIF_AND_U64_SCALAR: + case RS6000_BIF_AND_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, BIT_AND_EXPR, 2); + return true; + + case RS6000_BIF_IOR_I32_SCALAR: + case RS6000_BIF_IOR_I32_VECTOR: + case RS6000_BIF_IOR_I64_SCALAR: + case RS6000_BIF_IOR_I64_VECTOR: + case RS6000_BIF_IOR_U32_SCALAR: + case RS6000_BIF_IOR_U32_VECTOR: + case RS6000_BIF_IOR_U64_SCALAR: + case RS6000_BIF_IOR_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, BIT_IOR_EXPR, 2); return true; case RS6000_BIF_MULT_F32_SCALAR: case RS6000_BIF_MULT_F32_VECTOR: case RS6000_BIF_MULT_F64_SCALAR: case RS6000_BIF_MULT_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2); + fold_builtin_overload_arith (gsi, stmt, MULT_EXPR, 2); return true; + case RS6000_BIF_NEG_I32_SCALAR: + case RS6000_BIF_NEG_I32_VECTOR: + case RS6000_BIF_NEG_I64_SCALAR: + case RS6000_BIF_NEG_I64_VECTOR: case RS6000_BIF_NEG_F32_SCALAR: case RS6000_BIF_NEG_F32_VECTOR: case RS6000_BIF_NEG_F64_SCALAR: case RS6000_BIF_NEG_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1); + case RS6000_BIF_NEG_U32_SCALAR: + case RS6000_BIF_NEG_U32_VECTOR: + case RS6000_BIF_NEG_U64_SCALAR: + case RS6000_BIF_NEG_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, NEGATE_EXPR, 1); + return true; + + case RS6000_BIF_NOT_I32_SCALAR: + case RS6000_BIF_NOT_I32_VECTOR: + case RS6000_BIF_NOT_I64_SCALAR: + case RS6000_BIF_NOT_I64_VECTOR: + case RS6000_BIF_NOT_U32_SCALAR: + case RS6000_BIF_NOT_U32_VECTOR: + case RS6000_BIF_NOT_U64_SCALAR: + case RS6000_BIF_NOT_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, BIT_NOT_EXPR, 1); + return true; + + case RS6000_BIF_XOR_I32_SCALAR: + case RS6000_BIF_XOR_I32_VECTOR: + case RS6000_BIF_XOR_I64_SCALAR: + case RS6000_BIF_XOR_I64_VECTOR: + case RS6000_BIF_XOR_U32_SCALAR: + case RS6000_BIF_XOR_U32_VECTOR: + case RS6000_BIF_XOR_U64_SCALAR: + case RS6000_BIF_XOR_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, BIT_XOR_EXPR, 2); return true; case RS6000_BIF_REDUCE_F32_SCALAR: @@ -2316,26 +2376,50 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) return true; } + case RS6000_BIF_SMAX_I32_SCALAR: + case RS6000_BIF_SMAX_I32_VECTOR: + case RS6000_BIF_SMAX_I64_SCALAR: + case RS6000_BIF_SMAX_I64_VECTOR: case RS6000_BIF_SMAX_F32_SCALAR: case RS6000_BIF_SMAX_F32_VECTOR: case RS6000_BIF_SMAX_F64_SCALAR: case RS6000_BIF_SMAX_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2); + case RS6000_BIF_UMAX_U32_SCALAR: + case RS6000_BIF_UMAX_U32_VECTOR: + case RS6000_BIF_UMAX_U64_SCALAR: + case RS6000_BIF_UMAX_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, MAX_EXPR, 2); return true; + case RS6000_BIF_SMIN_I32_SCALAR: + case RS6000_BIF_SMIN_I32_VECTOR: + case RS6000_BIF_SMIN_I64_SCALAR: + case RS6000_BIF_SMIN_I64_VECTOR: case RS6000_BIF_SMIN_F32_SCALAR: case RS6000_BIF_SMIN_F32_VECTOR: case RS6000_BIF_SMIN_F64_SCALAR: case RS6000_BIF_SMIN_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2); + case RS6000_BIF_UMIN_U32_SCALAR: + case RS6000_BIF_UMIN_U32_VECTOR: + case RS6000_BIF_UMIN_U64_SCALAR: + case RS6000_BIF_UMIN_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, MIN_EXPR, 2); return true; + case RS6000_BIF_SUB_I32_SCALAR: + case RS6000_BIF_SUB_I32_VECTOR: + case RS6000_BIF_SUB_I64_SCALAR: + case RS6000_BIF_SUB_I64_VECTOR: case RS6000_BIF_SUB_F32_SCALAR: case RS6000_BIF_SUB_F32_VECTOR: case RS6000_BIF_SUB_F64_SCALAR: case RS6000_BIF_SUB_F64_VECTOR: - fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2); + case RS6000_BIF_SUB_U32_SCALAR: + case RS6000_BIF_SUB_U32_VECTOR: + case RS6000_BIF_SUB_U64_SCALAR: + case RS6000_BIF_SUB_U64_VECTOR: + fold_builtin_overload_arith (gsi, stmt, MINUS_EXPR, 2); return true; default: diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index acc76adca12..6a991efa63e 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4289,3 +4289,260 @@ SUB_F64_VECTOR nothing {} v256 __builtin_sub_f64_vpair (v256, v256); SUB_F64_VPAIR vpair_subv4df3 {mma} + +; Builtins for overload integer operations, including scalar and +; 128-bit vector codes that are converted into direct operations. +; The 256 codes that are kept in vector pairs insns that are split +; into separate operations after register allocation. + + int __builtin_add_i32_scalar (int, int); + ADD_I32_SCALAR nothing {} + vsi __builtin_add_i32_vector (vsi, vsi); + ADD_I32_VECTOR nothing {} + v256 __builtin_add_i32_vpair (v256, v256); + ADD_I32_VPAIR vpair_addv8si3 {mma} + + unsigned int __builtin_add_u32_scalar (unsigned int, unsigned int); + ADD_U32_SCALAR nothing {} + vui __builtin_add_u32_vector (vui, vui); + ADD_U32_VECTOR nothing {} + v256 __builtin_add_u32_vpair (v256, v256); + ADD_U32_VPAIR vpair_addv8si3 {mma} + + long long __builtin_add_i64_scalar (long long, long long); + ADD_I64_SCALAR nothing {} + vsll __builtin_add_i64_vector (vsll, vsll); + ADD_I64_VECTOR nothing {} + v256 __builtin_add_i64_vpair (v256, v256); + ADD_I64_VPAIR vpair_addv4di3 {mma} + + unsigned long long __builtin_add_u64_scalar (unsigned long long, unsigned long long); + ADD_U64_SCALAR nothing {} + vull __builtin_add_u64_vector (vull, vull); + ADD_U64_VECTOR nothing {} + v256 __builtin_add_u64_vpair (v256, v256); + ADD_U64_VPAIR vpair_addv4di3 {mma} + + int __builtin_and_i32_scalar (int, int); + AND_I32_SCALAR nothing {} + vsi __builtin_and_i32_vector (vsi, vsi); + AND_I32_VECTOR nothing {} + v256 __builtin_and_i32_vpair (v256, v256); + AND_I32_VPAIR vpair_andv8si3 {mma} + + unsigned int __builtin_and_u32_scalar (unsigned int, unsigned int); + AND_U32_SCALAR nothing {} + vui __builtin_and_u32_vector (vui, vui); + AND_U32_VECTOR nothing {} + v256 __builtin_and_u32_vpair (v256, v256); + AND_U32_VPAIR vpair_andv8si3 {mma} + + long long __builtin_and_i64_scalar (long long, long long); + AND_I64_SCALAR nothing {} + vsll __builtin_and_i64_vector (vsll, vsll); + AND_I64_VECTOR nothing {} + v256 __builtin_and_i64_vpair (v256, v256); + AND_I64_VPAIR vpair_andv4di3 {mma} + + unsigned long long __builtin_and_u64_scalar (unsigned long long, unsigned long long); + AND_U64_SCALAR nothing {} + vull __builtin_and_u64_vector (vull, vull); + AND_U64_VECTOR nothing {} + v256 __builtin_and_u64_vpair (v256, v256); + AND_U64_VPAIR vpair_andv4di3 {mma} + + int __builtin_ior_i32_scalar (int, int); + IOR_I32_SCALAR nothing {} + vsi __builtin_ior_i32_vector (vsi, vsi); + IOR_I32_VECTOR nothing {} + v256 __builtin_ior_i32_vpair (v256, v256); + IOR_I32_VPAIR vpair_iorv8si3 {mma} + + unsigned int __builtin_ior_u32_scalar (unsigned int, unsigned int); + IOR_U32_SCALAR nothing {} + vui __builtin_ior_u32_vector (vui, vui); + IOR_U32_VECTOR nothing {} + v256 __builtin_ior_u32_vpair (v256, v256); + IOR_U32_VPAIR vpair_iorv8si3 {mma} + + long long __builtin_ior_i64_scalar (long long, long long); + IOR_I64_SCALAR nothing {} + vsll __builtin_ior_i64_vector (vsll, vsll); + IOR_I64_VECTOR nothing {} + v256 __builtin_ior_i64_vpair (v256, v256); + IOR_I64_VPAIR vpair_iorv4di3 {mma} + + unsigned long long __builtin_ior_u64_scalar (unsigned long long, unsigned long long); + IOR_U64_SCALAR nothing {} + vull __builtin_ior_u64_vector (vull, vull); + IOR_U64_VECTOR nothing {} + v256 __builtin_ior_u64_vpair (v256, v256); + IOR_U64_VPAIR vpair_iorv4di3 {mma} + + int __builtin_neg_i32_scalar (int); + NEG_I32_SCALAR nothing {} + vsi __builtin_neg_i32_vector (vsi); + NEG_I32_VECTOR nothing {} + v256 __builtin_neg_i32_vpair (v256); + NEG_I32_VPAIR vpair_negv8si2 {mma} + + unsigned int __builtin_neg_u32_scalar (unsigned int); + NEG_U32_SCALAR nothing {} + vui __builtin_neg_u32_vector (vui); + NEG_U32_VECTOR nothing {} + v256 __builtin_neg_u32_vpair (v256); + NEG_U32_VPAIR vpair_negv8si2 {mma} + + long long __builtin_neg_i64_scalar (long long); + NEG_I64_SCALAR nothing {} + vsll __builtin_neg_i64_vector (vsll); + NEG_I64_VECTOR nothing {} + v256 __builtin_neg_i64_vpair (v256); + NEG_I64_VPAIR vpair_negv8si2 {mma} + + unsigned long long __builtin_neg_u64_scalar (unsigned long long); + NEG_U64_SCALAR nothing {} + vull __builtin_neg_u64_vector (vull); + NEG_U64_VECTOR nothing {} + v256 __builtin_neg_u64_vpair (v256); + NEG_U64_VPAIR vpair_negv4di2 {mma} + + int __builtin_not_i32_scalar (int); + NOT_I32_SCALAR nothing {} + vsi __builtin_not_i32_vector (vsi); + NOT_I32_VECTOR nothing {} + v256 __builtin_not_i32_vpair (v256); + NOT_I32_VPAIR vpair_notv8si2 {mma} + + unsigned int __builtin_not_u32_scalar (unsigned int); + NOT_U32_SCALAR nothing {} + vui __builtin_not_u32_vector (vui); + NOT_U32_VECTOR nothing {} + v256 __builtin_not_u32_vpair (v256); + NOT_U32_VPAIR vpair_notv8si2 {mma} + + unsigned long long __builtin_not_i64_scalar (unsigned long long); + NOT_I64_SCALAR nothing {} + vsll __builtin_not_i64_vector (vsll); + NOT_I64_VECTOR nothing {} + v256 __builtin_not_i64_vpair (v256); + NOT_I64_VPAIR vpair_notv4di2 {mma} + + unsigned int __builtin_not_u64_scalar (unsigned int); + NOT_U64_SCALAR nothing {} + vull __builtin_not_u64_vector (vull); + NOT_U64_VECTOR nothing {} + v256 __builtin_not_u64_vpair (v256); + NOT_U64_VPAIR vpair_notv8si2 {mma} + + int __builtin_smax_i32_scalar (int, int); + SMAX_I32_SCALAR nothing {} + vsi __builtin_smax_i32_vector (vsi, vsi); + SMAX_I32_VECTOR nothing {} + v256 __builtin_smax_i32_vpair (v256, v256); + SMAX_I32_VPAIR vpair_smaxv8si3 {mma} + + long long __builtin_smax_i64_scalar (long long, long long); + SMAX_I64_SCALAR nothing {} + vsll __builtin_smax_i64_vector (vsll, vsll); + SMAX_I64_VECTOR nothing {} + v256 __builtin_smax_i64_vpair (v256, v256); + SMAX_I64_VPAIR vpair_smaxv4di3 {mma} + + int __builtin_smin_i32_scalar (int, int); + SMIN_I32_SCALAR nothing {} + vsi __builtin_smin_i32_vector (vsi, vsi); + SMIN_I32_VECTOR nothing {} + v256 __builtin_smin_i32_vpair (v256, v256); + SMIN_I32_VPAIR vpair_sminv8si3 {mma} + + long long __builtin_smin_i64_scalar (long long, long long); + SMIN_I64_SCALAR nothing {} + vsll __builtin_smin_i64_vector (vsll, vsll); + SMIN_I64_VECTOR nothing {} + v256 __builtin_smin_i64_vpair (v256, v256); + SMIN_I64_VPAIR vpair_sminv4di3 {mma} + + int __builtin_sub_i32_scalar (int, int); + SUB_I32_SCALAR nothing {} + vsi __builtin_sub_i32_vector (vsi, vsi); + SUB_I32_VECTOR nothing {} + v256 __builtin_sub_i32_vpair (v256, v256); + SUB_I32_VPAIR vpair_subv8si3 {mma} + + unsigned int __builtin_sub_u32_scalar (unsigned int, unsigned int); + SUB_U32_SCALAR nothing {} + vui __builtin_sub_u32_vector (vui, vui); + SUB_U32_VECTOR nothing {} + v256 __builtin_sub_u32_vpair (v256, v256); + SUB_U32_VPAIR vpair_subv8si3 {mma} + + long long __builtin_sub_i64_scalar (long long, long long); + SUB_I64_SCALAR nothing {} + vsll __builtin_sub_i64_vector (vsll, vsll); + SUB_I64_VECTOR nothing {} + v256 __builtin_sub_i64_vpair (v256, v256); + SUB_I64_VPAIR vpair_subv4di3 {mma} + + unsigned long long __builtin_sub_u64_scalar (unsigned long long, unsigned long long); + SUB_U64_SCALAR nothing {} + vull __builtin_sub_u64_vector (vull, vull); + SUB_U64_VECTOR nothing {} + v256 __builtin_sub_u64_vpair (v256, v256); + SUB_U64_VPAIR vpair_subv4di3 {mma} + + unsigned int __builtin_umax_u32_scalar (unsigned int, unsigned int); + UMAX_U32_SCALAR nothing {} + vui __builtin_umax_u32_vector (vui, vui); + UMAX_U32_VECTOR nothing {} + v256 __builtin_umax_u32_vpair (v256, v256); + UMAX_U32_VPAIR vpair_umaxv8si3 {mma} + + unsigned long long __builtin_umax_u64_scalar (unsigned long long, unsigned long long); + UMAX_U64_SCALAR nothing {} + vull __builtin_umax_u64_vector (vull, vull); + UMAX_U64_VECTOR nothing {} + v256 __builtin_umax_u64_vpair (v256, v256); + UMAX_U64_VPAIR vpair_umaxv4di3 {mma} + + unsigned int __builtin_umin_u32_scalar (unsigned int, unsigned int); + UMIN_U32_SCALAR nothing {} + vui __builtin_umin_u32_vector (vui, vui); + UMIN_U32_VECTOR nothing {} + v256 __builtin_umin_u32_vpair (v256, v256); + UMIN_U32_VPAIR vpair_uminv8si3 {mma} + + unsigned long long __builtin_umin_u64_scalar (unsigned long long, unsigned long long); + UMIN_U64_SCALAR nothing {} + vull __builtin_umin_u64_vector (vull, vull); + UMIN_U64_VECTOR nothing {} + v256 __builtin_umin_u64_vpair (v256, v256); + UMIN_U64_VPAIR vpair_uminv4di3 {mma} + + int __builtin_xor_i32_scalar (int, int); + XOR_I32_SCALAR nothing {} + vsi __builtin_xor_i32_vector (vsi, vsi); + XOR_I32_VECTOR nothing {} + v256 __builtin_xor_i32_vpair (v256, v256); + XOR_I32_VPAIR vpair_xorv8si3 {mma} + + unsigned int __builtin_xor_u32_scalar (unsigned int, unsigned int); + XOR_U32_SCALAR nothing {} + vui __builtin_xor_u32_vector (vui, vui); + XOR_U32_VECTOR nothing {} + v256 __builtin_xor_u32_vpair (v256, v256); + XOR_U32_VPAIR vpair_xorv8si3 {mma} + + long long __builtin_xor_i64_scalar (long long, long long); + XOR_I64_SCALAR nothing {} + vsll __builtin_xor_i64_vector (vsll, vsll); + XOR_I64_VECTOR nothing {} + v256 __builtin_xor_i64_vpair (v256, v256); + XOR_I64_VPAIR vpair_xorv4di3 {mma} + + unsigned long long __builtin_xor_u64_scalar (unsigned long long, unsigned long long); + XOR_U64_SCALAR nothing {} + vull __builtin_xor_u64_vector (vull, vull); + XOR_U64_VECTOR nothing {} + v256 __builtin_xor_u64_vpair (v256, v256); + XOR_U64_VPAIR vpair_xorv4di3 {mma} diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs6000-gen-builtins.cc index a2f442ed90d..cbd7b916c03 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.cc +++ b/gcc/config/rs6000/rs6000-gen-builtins.cc @@ -444,7 +444,7 @@ struct ovld_stanza char *ifdef; }; -#define MAXOVLDSTANZAS 512 +#define MAXOVLDSTANZAS 1024 static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS]; static int num_ovld_stanzas; static int curr_ovld_stanza; diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index bbc26de4568..1bf0d1ceee9 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -6365,3 +6365,268 @@ SUB_F64_VECTOR v256 __builtin_sub_f64 (v256, v256); SUB_F64_VPAIR + +;; Overloaded integer built-in funtions +[ADD_I32, SKIP, __builtin_add_i32] + int __builtin_add_i32 (int, int); + ADD_I32_SCALAR + vsi __builtin_add_i32 (vsi, vsi); + ADD_I32_VECTOR + v256 __builtin_add_i32 (v256, v256); + ADD_I32_VPAIR + +[ADD_U32, SKIP, __builtin_add_u32] + unsigned int __builtin_add_u32 (unsigned int, unsigned int); + ADD_U32_SCALAR + vui __builtin_add_u32 (vui, vui); + ADD_U32_VECTOR + v256 __builtin_add_u32 (v256, v256); + ADD_U32_VPAIR + +[ADD_I64, SKIP, __builtin_add_i64] + long long __builtin_add_i64 (long long, long long); + ADD_I64_SCALAR + vsll __builtin_add_i64 (vsll, vsll); + ADD_I64_VECTOR + v256 __builtin_add_i64 (v256, v256); + ADD_I64_VPAIR + +[ADD_U64, SKIP, __builtin_add_u64] + unsigned long long __builtin_add_u64 (unsigned long long, unsigned long long); + ADD_U64_SCALAR + vull __builtin_add_u64 (vull, vull); + ADD_U64_VECTOR + v256 __builtin_add_u64 (v256, v256); + ADD_U64_VPAIR + +[AND_U32, SKIP, __builtin_and_u32] + unsigned int __builtin_and_u32 (unsigned int, unsigned int); + AND_U32_SCALAR + vui __builtin_and_u32 (vui, vui); + AND_U32_VECTOR + v256 __builtin_and_u32 (v256, v256); + AND_U32_VPAIR + +[AND_I64, SKIP, __builtin_and_i64] + long long __builtin_and_i64 (long long, long long); + AND_I64_SCALAR + vsll __builtin_and_i64 (vsll, vsll); + AND_I64_VECTOR + v256 __builtin_and_i64 (v256, v256); + AND_I64_VPAIR + +[AND_U64, SKIP, __builtin_and_u64] + unsigned long long __builtin_and_u64 (unsigned long long, unsigned long long); + AND_U64_SCALAR + vull __builtin_and_u64 (vull, vull); + AND_U64_VECTOR + v256 __builtin_and_u64 (v256, v256); + AND_U64_VPAIR + +[IOR_U32, SKIP, __builtin_ior_u32] + unsigned int __builtin_ior_u32 (unsigned int, unsigned int); + IOR_U32_SCALAR + vui __builtin_ior_u32 (vui, vui); + IOR_U32_VECTOR + v256 __builtin_ior_u32 (v256, v256); + IOR_U32_VPAIR + +[IOR_I64, SKIP, __builtin_ior_i64] + long long __builtin_ior_i64 (long long, long long); + IOR_I64_SCALAR + vsll __builtin_ior_i64 (vsll, vsll); + IOR_I64_VECTOR + v256 __builtin_ior_i64 (v256, v256); + IOR_I64_VPAIR + +[IOR_U64, SKIP, __builtin_ior_u64] + unsigned long long __builtin_ior_u64 (unsigned long long, unsigned long long); + IOR_U64_SCALAR + vull __builtin_ior_u64 (vull, vull); + IOR_U64_VECTOR + v256 __builtin_ior_u64 (v256, v256); + IOR_U64_VPAIR + +[NEG_I32, SKIP, __builtin_neg_i32] + int __builtin_neg_i32 (int); + NEG_I32_SCALAR + vsi __builtin_neg_i32 (vsi); + NEG_I32_VECTOR + v256 __builtin_neg_i32 (v256); + NEG_I32_VPAIR + +[NEG_U32, SKIP, __builtin_neg_u32] + unsigned int __builtin_neg_u32 (unsigned int); + NEG_U32_SCALAR + vui __builtin_neg_u32 (vui); + NEG_U32_VECTOR + v256 __builtin_neg_u32 (v256); + NEG_U32_VPAIR + +[NEG_I64, SKIP, __builtin_neg_i64] + long long __builtin_neg_i64 (long long); + NEG_I64_SCALAR + vsll __builtin_neg_i64 (vsll); + NEG_I64_VECTOR + v256 __builtin_neg_i64 (v256); + NEG_I64_VPAIR + +[NEG_U64, SKIP, __builtin_neg_u64] + unsigned long long __builtin_neg_u64 (unsigned long long); + NEG_U64_SCALAR + vull __builtin_neg_u64 (vull); + NEG_U64_VECTOR + v256 __builtin_neg_u64 (v256); + NEG_U64_VPAIR + +[NOT_I32, SKIP, __builtin_not_i32] + int __builtin_not_i32 (int); + NOT_I32_SCALAR + vsi __builtin_not_i32 (vsi); + NOT_I32_VECTOR + v256 __builtin_not_i32 (v256); + NOT_I32_VPAIR + +[NOT_U32, SKIP, __builtin_not_u32] + unsigned int __builtin_not_u32 (unsigned int); + NOT_U32_SCALAR + vui __builtin_not_u32 (vui); + NOT_U32_VECTOR + v256 __builtin_not_u32 (v256); + NOT_U32_VPAIR + +[NOT_I64, SKIP, __builtin_not_i64] + long long __builtin_not_i64 (long long); + NOT_I64_SCALAR + vsll __builtin_not_i64 (vsll); + NOT_I64_VECTOR + v256 __builtin_not_i64 (v256); + NOT_I64_VPAIR + +[NOT_U64, SKIP, __builtin_not_u64] + unsigned long long __builtin_not_u64 (unsigned long long); + NOT_U64_SCALAR + vull __builtin_not_u64 (vull); + NOT_U64_VECTOR + v256 __builtin_not_u64 (v256); + NOT_U64_VPAIR + +[SUB_I32, SKIP, __builtin_sub_i32] + int __builtin_sub_i32 (int, int); + SUB_I32_SCALAR + vsi __builtin_sub_i32 (vsi, vsi); + SUB_I32_VECTOR + v256 __builtin_sub_i32 (v256, v256); + SUB_I32_VPAIR + +[SUB_U32, SKIP, __builtin_sub_u32] + unsigned int __builtin_sub_u32 (unsigned int, unsigned int); + SUB_U32_SCALAR + vui __builtin_sub_u32 (vui, vui); + SUB_U32_VECTOR + v256 __builtin_sub_u32 (v256, v256); + SUB_U32_VPAIR + +[SUB_I64, SKIP, __builtin_sub_i64] + long long __builtin_sub_i64 (long long, long long); + SUB_I64_SCALAR + vsll __builtin_sub_i64 (vsll, vsll); + SUB_I64_VECTOR + v256 __builtin_sub_i64 (v256, v256); + SUB_I64_VPAIR + +[SUB_U64, SKIP, __builtin_sub_u64] + unsigned long long __builtin_sub_u64 (unsigned long long, unsigned long long); + SUB_U64_SCALAR + vull __builtin_sub_u64 (vull, vull); + SUB_U64_VECTOR + v256 __builtin_sub_u64 (v256, v256); + SUB_U64_VPAIR + +[SMAX_I32, SKIP, __builtin_smax_i32] + int __builtin_smax_i32 (int, int); + SMAX_I32_SCALAR + vsi __builtin_smax_i32 (vsi, vsi); + SMAX_I32_VECTOR + v256 __builtin_smax_i32 (v256, v256); + SMAX_I32_VPAIR + +[SMAX_I64, SKIP, __builtin_smax_i64] + long long __builtin_smax_i64 (long long, long long); + SMAX_I64_SCALAR + vsll __builtin_smax_i64 (vsll, vsll); + SMAX_I64_VECTOR + v256 __builtin_smax_i64 (v256, v256); + SMAX_I64_VPAIR + +[SMIN_I32, SKIP, __builtin_smin_i32] + int __builtin_smin_i32 (int, int); + SMIN_I32_SCALAR + vsi __builtin_smin_i32 (vsi, vsi); + SMIN_I32_VECTOR + v256 __builtin_smin_i32 (v256, v256); + SMIN_I32_VPAIR + +[SMIN_I64, SKIP, __builtin_smin_i64] + long long __builtin_smin_i64 (long long, long long); + SMIN_I64_SCALAR + vsll __builtin_smin_i64 (vsll, vsll); + SMIN_I64_VECTOR + v256 __builtin_smin_i64 (v256, v256); + SMIN_I64_VPAIR + +[UMAX_U32, SKIP, __builtin_umax_u32] + unsigned int __builtin_umax_u32 (unsigned int, unsigned int); + UMAX_U32_SCALAR + vui __builtin_umax_u32 (vui, vui); + UMAX_U32_VECTOR + v256 __builtin_umax_u32 (v256, v256); + UMAX_U32_VPAIR + +[UMAX_U64, SKIP, __builtin_umax_u64] + unsigned long long __builtin_umax_u64 (unsigned long long, unsigned long long); + UMAX_U64_SCALAR + vull __builtin_umax_u64 (vull, vull); + UMAX_U64_VECTOR + v256 __builtin_umax_u64 (v256, v256); + UMAX_U64_VPAIR + +[UMIN_U32, SKIP, __builtin_umin_u32] + unsigned int __builtin_umin_u32 (unsigned int, unsigned int); + UMIN_U32_SCALAR + vui __builtin_umin_u32 (vui, vui); + UMIN_U32_VECTOR + v256 __builtin_umin_u32 (v256, v256); + UMIN_U32_VPAIR + +[UMIN_U64, SKIP, __builtin_umin_u64] + unsigned long long __builtin_umin_u64 (unsigned long long, unsigned long long); + UMIN_U64_SCALAR + vull __builtin_umin_u64 (vull, vull); + UMIN_U64_VECTOR + v256 __builtin_umin_u64 (v256, v256); + UMIN_U64_VPAIR + +[XOR_U32, SKIP, __builtin_xor_u32] + unsigned int __builtin_xor_u32 (unsigned int, unsigned int); + XOR_U32_SCALAR + vui __builtin_xor_u32 (vui, vui); + XOR_U32_VECTOR + v256 __builtin_xor_u32 (v256, v256); + XOR_U32_VPAIR + +[XOR_I64, SKIP, __builtin_xor_i64] + long long __builtin_xor_i64 (long long, long long); + XOR_I64_SCALAR + vsll __builtin_xor_i64 (vsll, vsll); + XOR_I64_VECTOR + v256 __builtin_xor_i64 (v256, v256); + XOR_I64_VPAIR + +[XOR_U64, SKIP, __builtin_xor_u64] + unsigned long long __builtin_xor_u64 (unsigned long long, unsigned long long); + XOR_U64_SCALAR + vull __builtin_xor_u64 (vull, vull); + XOR_U64_VECTOR + v256 __builtin_xor_u64 (v256, v256); + XOR_U64_VPAIR diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 13f6e0464b5..22b3ce54511 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,51 +38,108 @@ ]) ;; Iterator doing unary/binary arithmetic on vector pairs -(define_code_iterator VPAIR_UNARY [neg abs sqrt]) -(define_code_iterator VPAIR_BINARY [plus minus mult div copysign smin smax]) +(define_code_iterator VPAIR_FP_UNARY [abs neg sqrt]) +(define_code_iterator VPAIR_FP_BINARY [copysign div minus mult plus smin smax]) + +(define_code_iterator VPAIR_INT_BINARY [and ior minus plus smax smin umax umin xor]) ;; Give the insn name from the opertion (define_code_attr vpair_op [(abs "abs") + (and "and") (copysign "copysign") (div "div") + (ior "ior") (minus "sub") (mult "mul") + (not "not") (neg "neg") (plus "add") (smin "smin") (smax "smax") - (sqrt "sqrt")]) - -;; Iterator for creating the wrapper for vector pair built-ins -(define_int_iterator VPAIR_WRAPPER [UNSPEC_VPAIR_V4DF UNSPEC_VPAIR_V8SF]) - -;; Map VPAIR_WRAPPER to vector type (i.e. V2DF or V4SF) -(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF") - (UNSPEC_VPAIR_V8SF "V4SF")]) - -(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df") - (UNSPEC_VPAIR_V8SF "v8sf")]) + (sqrt "sqrt") + (umin "umin") + (umax "umax") + (xor "xor")]) + +;; Give the register constraint ("v" or "wa") for the integer operation used +(define_code_attr vpair_ireg [(and "wa") + (ior "wa") + (minus "v") + (not "wa") + (neg "v") + (plus "v") + (smax "v") + (smin "v") + (umax "v") + (umin "v") + (xor "wa")]) + +;; Give the register previdcate for the integer operation used +(define_code_attr vpair_ipred [(and "vsx_register_operand") + (ior "vsx_register_operand") + (minus "altivec_register_operand") + (not "vsx_register_operand") + (neg "altivec_register_operand") + (plus "altivec_register_operand") + (smax "altivec_register_operand") + (smin "altivec_register_operand") + (umax "altivec_register_operand") + (umin "altivec_register_operand") + (xor "vsx_register_operand")]) + +;; Iterator for creating the wrappers for vector pair built-ins +(define_int_iterator VPAIR_FP_WRAPPER [UNSPEC_VPAIR_V4DF + UNSPEC_VPAIR_V8SF]) + +(define_int_iterator VPAIR_INT_WRAPPER [UNSPEC_VPAIR_V4DI + UNSPEC_VPAIR_V8SI + UNSPEC_VPAIR_V16HI + UNSPEC_VPAIR_V32QI]) + +;; Map VPAIR_{INT,FP}_WRAPPER to vector the type of the arguments after they +;; are split +(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF") + (UNSPEC_VPAIR_V8SF "V4SF") + (UNSPEC_VPAIR_V32QI "V16QI") + (UNSPEC_VPAIR_V16HI "V8HI") + (UNSPEC_VPAIR_V8SI "V4SI") + (UNSPEC_VPAIR_V4DI "V2DI")]) + +;; Map VPAIR_{INT,FP}_WRAPPER to a lower case name to identify the vector pair. +(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df") + (UNSPEC_VPAIR_V8SF "v8sf") + (UNSPEC_VPAIR_V32QI "v32qi") + (UNSPEC_VPAIR_V16HI "v16hi") + (UNSPEC_VPAIR_V8SI "v8si") + (UNSPEC_VPAIR_V4DI "v4di")]) + +;; Map VPAIR_INT_WRAPPER to constraints used for the negate scratch register. +(define_int_attr vpair_neg_reg [(UNSPEC_VPAIR_V32QI "&v") + (UNSPEC_VPAIR_V16HI "&v") + (UNSPEC_VPAIR_V8SI "X") + (UNSPEC_VPAIR_V4DI "X")]) ;; Vector pair floating point unary operations (define_insn_and_split "vpair_2" [(set (match_operand:OO 0 "vsx_register_operand" "=wa") - (unspec:OO [(VPAIR_UNARY:OO + (unspec:OO [(VPAIR_FP_UNARY:OO (match_operand:OO 1 "vsx_register_operand" "wa"))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" - [(set (match_dup 2) (VPAIR_UNARY: (match_dup 3))) - (set (match_dup 4) (VPAIR_UNARY: (match_dup 5)))] + [(set (match_dup 2) (VPAIR_FP_UNARY: (match_dup 3))) + (set (match_dup 4) (VPAIR_FP_UNARY: (match_dup 5)))] { unsigned reg0 = reg_or_subregno (operands[0]); unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; - operands[2] = gen_rtx_REG (mode, reg0); - operands[3] = gen_rtx_REG (mode, reg1); - operands[4] = gen_rtx_REG (mode, reg0 + 1); - operands[5] = gen_rtx_REG (mode, reg1 + 1); + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); } [(set_attr "length" "8")]) @@ -93,8 +150,8 @@ [(neg:OO (unspec:OO [(abs:OO (match_operand:OO 1 "vsx_register_operand" "ww"))] - VPAIR_WRAPPER))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" @@ -107,42 +164,44 @@ { unsigned reg0 = reg_or_subregno (operands[0]); unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; - operands[2] = gen_rtx_REG (mode, reg0); - operands[3] = gen_rtx_REG (mode, reg1); - operands[4] = gen_rtx_REG (mode, reg0 + 1); - operands[5] = gen_rtx_REG (mode, reg1 + 1); + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); } [(set_attr "length" "8")]) ;; Vector pair floating binary operations (define_insn_and_split "vpair_3" [(set (match_operand:OO 0 "vsx_register_operand" "=wa") - (unspec:OO [(VPAIR_BINARY:OO + (unspec:OO [(VPAIR_FP_BINARY:OO (match_operand:OO 1 "vsx_register_operand" "wa") (match_operand:OO 2 "vsx_register_operand" "wa"))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" [(set (match_dup 3) - (VPAIR_BINARY: (match_dup 4) - (match_dup 5))) + (VPAIR_FP_BINARY: (match_dup 4) + (match_dup 5))) (set (match_dup 6) - (VPAIR_BINARY: (match_dup 7) - (match_dup 8)))] + (VPAIR_FP_BINARY: (match_dup 7) + (match_dup 8)))] { unsigned reg0 = reg_or_subregno (operands[0]); unsigned reg1 = reg_or_subregno (operands[1]); unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; - operands[3] = gen_rtx_REG (mode, reg0); - operands[4] = gen_rtx_REG (mode, reg1); - operands[5] = gen_rtx_REG (mode, reg2); + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); - operands[6] = gen_rtx_REG (mode, reg0 + 1); - operands[7] = gen_rtx_REG (mode, reg1 + 1); - operands[8] = gen_rtx_REG (mode, reg2 + 1); + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); } [(set_attr "length" "8")]) @@ -154,7 +213,7 @@ (match_operand:OO 1 "vsx_register_operand" "%wa,wa") (match_operand:OO 2 "vsx_register_operand" "wa,0") (match_operand:OO 3 "vsx_register_operand" "0,wa"))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" @@ -171,16 +230,17 @@ unsigned reg1 = reg_or_subregno (operands[1]); unsigned reg2 = reg_or_subregno (operands[2]); unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; - operands[4] = gen_rtx_REG (mode, reg0); - operands[5] = gen_rtx_REG (mode, reg1); - operands[6] = gen_rtx_REG (mode, reg2); - operands[7] = gen_rtx_REG (mode, reg3); + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); - operands[8] = gen_rtx_REG (mode, reg0 + 1); - operands[9] = gen_rtx_REG (mode, reg1 + 1); - operands[10] = gen_rtx_REG (mode, reg2 + 1); - operands[11] = gen_rtx_REG (mode, reg3 + 1); + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); } [(set_attr "length" "8")]) @@ -192,8 +252,8 @@ (match_operand:OO 2 "vsx_register_operand" "wa,0") (unspec:OO [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))] - VPAIR_WRAPPER))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" @@ -210,16 +270,17 @@ unsigned reg1 = reg_or_subregno (operands[1]); unsigned reg2 = reg_or_subregno (operands[2]); unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; - operands[4] = gen_rtx_REG (mode, reg0); - operands[5] = gen_rtx_REG (mode, reg1); - operands[6] = gen_rtx_REG (mode, reg2); - operands[7] = gen_rtx_REG (mode, reg3); + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); - operands[8] = gen_rtx_REG (mode, reg0 + 1); - operands[9] = gen_rtx_REG (mode, reg1 + 1); - operands[10] = gen_rtx_REG (mode, reg2 + 1); - operands[11] = gen_rtx_REG (mode, reg3 + 1); + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); } [(set_attr "length" "8")]) @@ -232,8 +293,8 @@ (match_operand:OO 1 "vsx_register_operand" "%wa,wa") (match_operand:OO 2 "vsx_register_operand" "wa,0") (match_operand:OO 3 "vsx_register_operand" "0,wa"))] - VPAIR_WRAPPER))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" @@ -252,16 +313,17 @@ unsigned reg1 = reg_or_subregno (operands[1]); unsigned reg2 = reg_or_subregno (operands[2]); unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; - operands[4] = gen_rtx_REG (mode, reg0); - operands[5] = gen_rtx_REG (mode, reg1); - operands[6] = gen_rtx_REG (mode, reg2); - operands[7] = gen_rtx_REG (mode, reg3); + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); - operands[8] = gen_rtx_REG (mode, reg0 + 1); - operands[9] = gen_rtx_REG (mode, reg1 + 1); - operands[10] = gen_rtx_REG (mode, reg2 + 1); - operands[11] = gen_rtx_REG (mode, reg3 + 1); + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); } [(set_attr "length" "8")]) @@ -275,9 +337,9 @@ (match_operand:OO 2 "vsx_register_operand" "wa,0") (unspec:OO [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))] - VPAIR_WRAPPER))] - VPAIR_WRAPPER))] - VPAIR_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] "TARGET_MMA" "#" "&& reload_completed" @@ -296,16 +358,17 @@ unsigned reg1 = reg_or_subregno (operands[1]); unsigned reg2 = reg_or_subregno (operands[2]); unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; - operands[4] = gen_rtx_REG (mode, reg0); - operands[5] = gen_rtx_REG (mode, reg1); - operands[6] = gen_rtx_REG (mode, reg2); - operands[7] = gen_rtx_REG (mode, reg3); + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); - operands[8] = gen_rtx_REG (mode, reg0 + 1); - operands[9] = gen_rtx_REG (mode, reg1 + 1); - operands[10] = gen_rtx_REG (mode, reg2 + 1); - operands[11] = gen_rtx_REG (mode, reg3 + 1); + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); } [(set_attr "length" "8")]) @@ -415,3 +478,105 @@ operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0); operands[7] = gen_rtx_REG (DFmode, reg3); }) + + +;; Vector pair integer negate support. +(define_insn_and_split "vpair_neg2" + [(set (match_operand:OO 0 "altivec_register_operand" "=v") + (unspec:OO [(neg:OO + (match_operand:OO 1 "altivec_register_operand" "v"))] + VPAIR_INT_WRAPPER)) + (clobber (match_scratch: 2 "="))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (minus: (match_dup 2) + (match_dup 5))) + (set (match_dup 6) (minus: (match_dup 2) + (match_dup 7)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[3] = CONST0_RTX (vmode); + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + + /* If the vector integer size is 32 or 64 bits, we can use the vneg{w,d} + instructions. */ + if (vmode == V4SImode) + { + emit_insn (gen_negv4si2 (operands[4], operands[5])); + emit_insn (gen_negv4si2 (operands[6], operands[7])); + DONE; + } + else if (vmode == V2DImode) + { + emit_insn (gen_negv2di2 (operands[4], operands[5])); + emit_insn (gen_negv2di2 (operands[6], operands[7])); + DONE; + } +} + [(set_attr "length" "8")]) + +;; Vector pair integer not support. +(define_insn_and_split "vpair_not2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(not:OO (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) (not: (match_dup 3))) + (set (match_dup 4) (not: (match_dup 5)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); +} + [(set_attr "length" "8")]) + +;; Vector pair integer binary operations. +(define_insn_and_split "vpair_3" + [(set (match_operand:OO 0 "" "=") + (unspec:OO [(VPAIR_INT_BINARY:OO + (match_operand:OO 1 "" "") + (match_operand:OO 2 "" ""))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (VPAIR_INT_BINARY: (match_dup 4) + (match_dup 5))) + (set (match_dup 6) + (VPAIR_INT_BINARY: (match_dup 7) + (match_dup 8)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) +