[gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support.
@ 2023-08-02  3:39 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2023-08-02  3:39 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:15f1121135323895ab8a4b3a91d90db60625c0fd

commit 15f1121135323895ab8a4b3a91d90db60625c0fd
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Aug 1 23:38:42 2023 -0400

    Add initial int built-in overload support.
    
    2023-08-01  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000-builtin.cc (fold_builtin_overload_arith): Rename
            from fold_builtin_overload_fp.
            (rs6000_gimple_fold_builtin): Add support for integer overload
            built-ins.
            * config/rs6000/rs6000-builtins.def (__builtin_*_i32_*): Add built-in
            integer functions for overloading.
            (__builtin_u32_*): Likewise.
            (__builtin_i64_*): Likewise.
            (__builtin_u64_*): Likewise.
            * config/rs6000/rs6000-gen-builtins.cc (MAXOVLDSTANZAS): Bump up to
            1,024.
            * config/rs6000/rs6000-overload.def (__builtin_*_i32): Add built-in
            overloaded integer functions.
            (__builtin_u32): Likewise.
            (__builtin_i64): Likewise.
            (__builtin_u64): Likewise.
            * config/rs6000/vector-pair.md (VPAIR_FP_UNARY): Rename from
            VPAIR_UNARY.
            (VPAIR_FP_BINARY): Rename from VPAIR_BINARY.
            (VPAIR_INT_BINARY): New code iterator.
            (vpair_op): Update for integer built-in functions.
            (vpair_ireg): New code attribute.
            (vpair_ipred): Likewise.
            (VPAIR_FP_WRAPPER): Rename from VPAIR_WRAPPER.
            (VPAIR_INT_WRAPPER): New int iterator.
            (VPAIR_VECTOR): New int attribute.
            (vpair_type): New int attribute.
            (vpair_neg_reg): New int attribute.
            (floating point insns): Update to use VPAIR_FP_WRAPPER, VPAIR_FP_UNARY,
            VPAIR_FP_BINARY.
            (integer insns): Add new integer insns for built-in functions.

Diff:
---
 gcc/config/rs6000/rs6000-builtin.cc      | 110 +++++++++--
 gcc/config/rs6000/rs6000-builtins.def    | 257 ++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-gen-builtins.cc |   2 +-
 gcc/config/rs6000/rs6000-overload.def    | 265 +++++++++++++++++++++++++
 gcc/config/rs6000/vector-pair.md         | 323 +++++++++++++++++++++++--------
 5 files changed, 864 insertions(+), 93 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index e32d9175a0c..62d56c28946 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1261,14 +1261,14 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
   return true;
 }
 
-/* Helper function to fold the overloaded fp functions for the scalar and
-   vector types that support the operation directly.  */
+/* Helper function to fold the overloaded arithmetic functions for the scalar
+   and vector types that support the operation directly.  */
 
 static void
-fold_builtin_overload_fp (gimple_stmt_iterator *gsi,
-			  gimple *stmt,
-			  enum tree_code code,
-			  int nargs)
+fold_builtin_overload_arith (gimple_stmt_iterator *gsi,
+			     gimple *stmt,
+			     enum tree_code code,
+			     int nargs)
 {
   location_t loc = gimple_location (stmt);
   tree lhs = gimple_call_lhs (stmt);
@@ -2280,28 +2280,88 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_ABS_F32_VECTOR:
     case RS6000_BIF_ABS_F64_SCALAR:
     case RS6000_BIF_ABS_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, ABS_EXPR, 1);
+      fold_builtin_overload_arith (gsi, stmt, ABS_EXPR, 1);
       return true;
 
+    case RS6000_BIF_ADD_I32_SCALAR:
+    case RS6000_BIF_ADD_I32_VECTOR:
+    case RS6000_BIF_ADD_I64_SCALAR:
+    case RS6000_BIF_ADD_I64_VECTOR:
     case RS6000_BIF_ADD_F32_SCALAR:
     case RS6000_BIF_ADD_F32_VECTOR:
     case RS6000_BIF_ADD_F64_SCALAR:
     case RS6000_BIF_ADD_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, PLUS_EXPR, 2);
+    case RS6000_BIF_ADD_U32_SCALAR:
+    case RS6000_BIF_ADD_U32_VECTOR:
+    case RS6000_BIF_ADD_U64_SCALAR:
+    case RS6000_BIF_ADD_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, PLUS_EXPR, 2);
+      return true;
+
+    case RS6000_BIF_AND_I32_SCALAR:
+    case RS6000_BIF_AND_I32_VECTOR:
+    case RS6000_BIF_AND_I64_SCALAR:
+    case RS6000_BIF_AND_I64_VECTOR:
+    case RS6000_BIF_AND_U32_SCALAR:
+    case RS6000_BIF_AND_U32_VECTOR:
+    case RS6000_BIF_AND_U64_SCALAR:
+    case RS6000_BIF_AND_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, BIT_AND_EXPR, 2);
+      return true;
+
+    case RS6000_BIF_IOR_I32_SCALAR:
+    case RS6000_BIF_IOR_I32_VECTOR:
+    case RS6000_BIF_IOR_I64_SCALAR:
+    case RS6000_BIF_IOR_I64_VECTOR:
+    case RS6000_BIF_IOR_U32_SCALAR:
+    case RS6000_BIF_IOR_U32_VECTOR:
+    case RS6000_BIF_IOR_U64_SCALAR:
+    case RS6000_BIF_IOR_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, BIT_IOR_EXPR, 2);
       return true;
 
     case RS6000_BIF_MULT_F32_SCALAR:
     case RS6000_BIF_MULT_F32_VECTOR:
     case RS6000_BIF_MULT_F64_SCALAR:
     case RS6000_BIF_MULT_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, MULT_EXPR, 2);
+      fold_builtin_overload_arith (gsi, stmt, MULT_EXPR, 2);
       return true;
 
+    case RS6000_BIF_NEG_I32_SCALAR:
+    case RS6000_BIF_NEG_I32_VECTOR:
+    case RS6000_BIF_NEG_I64_SCALAR:
+    case RS6000_BIF_NEG_I64_VECTOR:
     case RS6000_BIF_NEG_F32_SCALAR:
     case RS6000_BIF_NEG_F32_VECTOR:
     case RS6000_BIF_NEG_F64_SCALAR:
     case RS6000_BIF_NEG_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, NEGATE_EXPR, 1);
+    case RS6000_BIF_NEG_U32_SCALAR:
+    case RS6000_BIF_NEG_U32_VECTOR:
+    case RS6000_BIF_NEG_U64_SCALAR:
+    case RS6000_BIF_NEG_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, NEGATE_EXPR, 1);
+      return true;
+
+    case RS6000_BIF_NOT_I32_SCALAR:
+    case RS6000_BIF_NOT_I32_VECTOR:
+    case RS6000_BIF_NOT_I64_SCALAR:
+    case RS6000_BIF_NOT_I64_VECTOR:
+    case RS6000_BIF_NOT_U32_SCALAR:
+    case RS6000_BIF_NOT_U32_VECTOR:
+    case RS6000_BIF_NOT_U64_SCALAR:
+    case RS6000_BIF_NOT_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, BIT_NOT_EXPR, 1);
+      return true;
+
+    case RS6000_BIF_XOR_I32_SCALAR:
+    case RS6000_BIF_XOR_I32_VECTOR:
+    case RS6000_BIF_XOR_I64_SCALAR:
+    case RS6000_BIF_XOR_I64_VECTOR:
+    case RS6000_BIF_XOR_U32_SCALAR:
+    case RS6000_BIF_XOR_U32_VECTOR:
+    case RS6000_BIF_XOR_U64_SCALAR:
+    case RS6000_BIF_XOR_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, BIT_XOR_EXPR, 2);
       return true;
 
     case RS6000_BIF_REDUCE_F32_SCALAR:
@@ -2316,26 +2376,50 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	return true;
       }
 
+    case RS6000_BIF_SMAX_I32_SCALAR:
+    case RS6000_BIF_SMAX_I32_VECTOR:
+    case RS6000_BIF_SMAX_I64_SCALAR:
+    case RS6000_BIF_SMAX_I64_VECTOR:
     case RS6000_BIF_SMAX_F32_SCALAR:
     case RS6000_BIF_SMAX_F32_VECTOR:
     case RS6000_BIF_SMAX_F64_SCALAR:
     case RS6000_BIF_SMAX_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, MAX_EXPR, 2);
+    case RS6000_BIF_UMAX_U32_SCALAR:
+    case RS6000_BIF_UMAX_U32_VECTOR:
+    case RS6000_BIF_UMAX_U64_SCALAR:
+    case RS6000_BIF_UMAX_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, MAX_EXPR, 2);
       return true;
 
+    case RS6000_BIF_SMIN_I32_SCALAR:
+    case RS6000_BIF_SMIN_I32_VECTOR:
+    case RS6000_BIF_SMIN_I64_SCALAR:
+    case RS6000_BIF_SMIN_I64_VECTOR:
     case RS6000_BIF_SMIN_F32_SCALAR:
     case RS6000_BIF_SMIN_F32_VECTOR:
     case RS6000_BIF_SMIN_F64_SCALAR:
     case RS6000_BIF_SMIN_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, MIN_EXPR, 2);
+    case RS6000_BIF_UMIN_U32_SCALAR:
+    case RS6000_BIF_UMIN_U32_VECTOR:
+    case RS6000_BIF_UMIN_U64_SCALAR:
+    case RS6000_BIF_UMIN_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, MIN_EXPR, 2);
       return true;
 
 
+    case RS6000_BIF_SUB_I32_SCALAR:
+    case RS6000_BIF_SUB_I32_VECTOR:
+    case RS6000_BIF_SUB_I64_SCALAR:
+    case RS6000_BIF_SUB_I64_VECTOR:
     case RS6000_BIF_SUB_F32_SCALAR:
     case RS6000_BIF_SUB_F32_VECTOR:
     case RS6000_BIF_SUB_F64_SCALAR:
     case RS6000_BIF_SUB_F64_VECTOR:
-      fold_builtin_overload_fp (gsi, stmt, MINUS_EXPR, 2);
+    case RS6000_BIF_SUB_U32_SCALAR:
+    case RS6000_BIF_SUB_U32_VECTOR:
+    case RS6000_BIF_SUB_U64_SCALAR:
+    case RS6000_BIF_SUB_U64_VECTOR:
+      fold_builtin_overload_arith (gsi, stmt, MINUS_EXPR, 2);
       return true;
 
     default:
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index acc76adca12..6a991efa63e 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -4289,3 +4289,260 @@
     SUB_F64_VECTOR nothing {}
   v256 __builtin_sub_f64_vpair (v256, v256);
     SUB_F64_VPAIR vpair_subv4df3 {mma}
+
+; Builtins for overload integer operations, including scalar and
+; 128-bit vector codes that are converted into direct operations.
+; The 256 codes that are kept in vector pairs insns that are split
+; into separate operations after register allocation.
+
+  int __builtin_add_i32_scalar (int, int);
+    ADD_I32_SCALAR nothing {}
+  vsi __builtin_add_i32_vector (vsi, vsi);
+    ADD_I32_VECTOR nothing {}
+  v256 __builtin_add_i32_vpair (v256, v256);
+    ADD_I32_VPAIR vpair_addv8si3 {mma}
+
+  unsigned int __builtin_add_u32_scalar (unsigned int, unsigned int);
+    ADD_U32_SCALAR nothing {}
+  vui __builtin_add_u32_vector (vui, vui);
+    ADD_U32_VECTOR nothing {}
+  v256 __builtin_add_u32_vpair (v256, v256);
+    ADD_U32_VPAIR vpair_addv8si3 {mma}
+
+  long long __builtin_add_i64_scalar (long long, long long);
+    ADD_I64_SCALAR nothing {}
+  vsll __builtin_add_i64_vector (vsll, vsll);
+    ADD_I64_VECTOR nothing {}
+  v256 __builtin_add_i64_vpair (v256, v256);
+    ADD_I64_VPAIR vpair_addv4di3 {mma}
+
+  unsigned long long __builtin_add_u64_scalar (unsigned long long, unsigned long long);
+    ADD_U64_SCALAR nothing {}
+  vull __builtin_add_u64_vector (vull, vull);
+    ADD_U64_VECTOR nothing {}
+  v256 __builtin_add_u64_vpair (v256, v256);
+    ADD_U64_VPAIR vpair_addv4di3 {mma}
+
+  int __builtin_and_i32_scalar (int, int);
+    AND_I32_SCALAR nothing {}
+  vsi __builtin_and_i32_vector (vsi, vsi);
+    AND_I32_VECTOR nothing {}
+  v256 __builtin_and_i32_vpair (v256, v256);
+    AND_I32_VPAIR vpair_andv8si3 {mma}
+
+  unsigned int __builtin_and_u32_scalar (unsigned int, unsigned int);
+    AND_U32_SCALAR nothing {}
+  vui __builtin_and_u32_vector (vui, vui);
+    AND_U32_VECTOR nothing {}
+  v256 __builtin_and_u32_vpair (v256, v256);
+    AND_U32_VPAIR vpair_andv8si3 {mma}
+
+  long long __builtin_and_i64_scalar (long long, long long);
+    AND_I64_SCALAR nothing {}
+  vsll __builtin_and_i64_vector (vsll, vsll);
+    AND_I64_VECTOR nothing {}
+  v256 __builtin_and_i64_vpair (v256, v256);
+    AND_I64_VPAIR vpair_andv4di3 {mma}
+
+  unsigned long long __builtin_and_u64_scalar (unsigned long long, unsigned long long);
+    AND_U64_SCALAR nothing {}
+  vull __builtin_and_u64_vector (vull, vull);
+    AND_U64_VECTOR nothing {}
+  v256 __builtin_and_u64_vpair (v256, v256);
+    AND_U64_VPAIR vpair_andv4di3 {mma}
+
+  int __builtin_ior_i32_scalar (int, int);
+    IOR_I32_SCALAR nothing {}
+  vsi __builtin_ior_i32_vector (vsi, vsi);
+    IOR_I32_VECTOR nothing {}
+  v256 __builtin_ior_i32_vpair (v256, v256);
+    IOR_I32_VPAIR vpair_iorv8si3 {mma}
+
+  unsigned int __builtin_ior_u32_scalar (unsigned int, unsigned int);
+    IOR_U32_SCALAR nothing {}
+  vui __builtin_ior_u32_vector (vui, vui);
+    IOR_U32_VECTOR nothing {}
+  v256 __builtin_ior_u32_vpair (v256, v256);
+    IOR_U32_VPAIR vpair_iorv8si3 {mma}
+
+  long long __builtin_ior_i64_scalar (long long, long long);
+    IOR_I64_SCALAR nothing {}
+  vsll __builtin_ior_i64_vector (vsll, vsll);
+    IOR_I64_VECTOR nothing {}
+  v256 __builtin_ior_i64_vpair (v256, v256);
+    IOR_I64_VPAIR vpair_iorv4di3 {mma}
+
+  unsigned long long __builtin_ior_u64_scalar (unsigned long long, unsigned long long);
+    IOR_U64_SCALAR nothing {}
+  vull __builtin_ior_u64_vector (vull, vull);
+    IOR_U64_VECTOR nothing {}
+  v256 __builtin_ior_u64_vpair (v256, v256);
+    IOR_U64_VPAIR vpair_iorv4di3 {mma}
+
+  int __builtin_neg_i32_scalar (int);
+    NEG_I32_SCALAR nothing {}
+  vsi __builtin_neg_i32_vector (vsi);
+    NEG_I32_VECTOR nothing {}
+  v256 __builtin_neg_i32_vpair (v256);
+    NEG_I32_VPAIR vpair_negv8si2 {mma}
+
+  unsigned int __builtin_neg_u32_scalar (unsigned int);
+    NEG_U32_SCALAR nothing {}
+  vui __builtin_neg_u32_vector (vui);
+    NEG_U32_VECTOR nothing {}
+  v256 __builtin_neg_u32_vpair (v256);
+    NEG_U32_VPAIR vpair_negv8si2 {mma}
+
+  long long __builtin_neg_i64_scalar (long long);
+    NEG_I64_SCALAR nothing {}
+  vsll __builtin_neg_i64_vector (vsll);
+    NEG_I64_VECTOR nothing {}
+  v256 __builtin_neg_i64_vpair (v256);
+    NEG_I64_VPAIR vpair_negv8si2 {mma}
+
+  unsigned long long __builtin_neg_u64_scalar (unsigned long long);
+    NEG_U64_SCALAR nothing {}
+  vull __builtin_neg_u64_vector (vull);
+    NEG_U64_VECTOR nothing {}
+  v256 __builtin_neg_u64_vpair (v256);
+    NEG_U64_VPAIR vpair_negv4di2 {mma}
+
+  int __builtin_not_i32_scalar (int);
+    NOT_I32_SCALAR nothing {}
+  vsi __builtin_not_i32_vector (vsi);
+    NOT_I32_VECTOR nothing {}
+  v256 __builtin_not_i32_vpair (v256);
+    NOT_I32_VPAIR vpair_notv8si2 {mma}
+
+  unsigned int __builtin_not_u32_scalar (unsigned int);
+    NOT_U32_SCALAR nothing {}
+  vui __builtin_not_u32_vector (vui);
+    NOT_U32_VECTOR nothing {}
+  v256 __builtin_not_u32_vpair (v256);
+    NOT_U32_VPAIR vpair_notv8si2 {mma}
+
+  unsigned long long __builtin_not_i64_scalar (unsigned long long);
+    NOT_I64_SCALAR nothing {}
+  vsll __builtin_not_i64_vector (vsll);
+    NOT_I64_VECTOR nothing {}
+  v256 __builtin_not_i64_vpair (v256);
+    NOT_I64_VPAIR vpair_notv4di2 {mma}
+
+  unsigned int __builtin_not_u64_scalar (unsigned int);
+    NOT_U64_SCALAR nothing {}
+  vull __builtin_not_u64_vector (vull);
+    NOT_U64_VECTOR nothing {}
+  v256 __builtin_not_u64_vpair (v256);
+    NOT_U64_VPAIR vpair_notv8si2 {mma}
+
+  int __builtin_smax_i32_scalar (int, int);
+    SMAX_I32_SCALAR nothing {}
+  vsi __builtin_smax_i32_vector (vsi, vsi);
+    SMAX_I32_VECTOR nothing {}
+  v256 __builtin_smax_i32_vpair (v256, v256);
+    SMAX_I32_VPAIR vpair_smaxv8si3 {mma}
+
+  long long __builtin_smax_i64_scalar (long long, long long);
+    SMAX_I64_SCALAR nothing {}
+  vsll __builtin_smax_i64_vector (vsll, vsll);
+    SMAX_I64_VECTOR nothing {}
+  v256 __builtin_smax_i64_vpair (v256, v256);
+    SMAX_I64_VPAIR vpair_smaxv4di3 {mma}
+
+  int __builtin_smin_i32_scalar (int, int);
+    SMIN_I32_SCALAR nothing {}
+  vsi __builtin_smin_i32_vector (vsi, vsi);
+    SMIN_I32_VECTOR nothing {}
+  v256 __builtin_smin_i32_vpair (v256, v256);
+    SMIN_I32_VPAIR vpair_sminv8si3 {mma}
+
+  long long __builtin_smin_i64_scalar (long long, long long);
+    SMIN_I64_SCALAR nothing {}
+  vsll __builtin_smin_i64_vector (vsll, vsll);
+    SMIN_I64_VECTOR nothing {}
+  v256 __builtin_smin_i64_vpair (v256, v256);
+    SMIN_I64_VPAIR vpair_sminv4di3 {mma}
+
+  int __builtin_sub_i32_scalar (int, int);
+    SUB_I32_SCALAR nothing {}
+  vsi __builtin_sub_i32_vector (vsi, vsi);
+    SUB_I32_VECTOR nothing {}
+  v256 __builtin_sub_i32_vpair (v256, v256);
+    SUB_I32_VPAIR vpair_subv8si3 {mma}
+
+  unsigned int __builtin_sub_u32_scalar (unsigned int, unsigned int);
+    SUB_U32_SCALAR nothing {}
+  vui __builtin_sub_u32_vector (vui, vui);
+    SUB_U32_VECTOR nothing {}
+  v256 __builtin_sub_u32_vpair (v256, v256);
+    SUB_U32_VPAIR vpair_subv8si3 {mma}
+
+  long long __builtin_sub_i64_scalar (long long, long long);
+    SUB_I64_SCALAR nothing {}
+  vsll __builtin_sub_i64_vector (vsll, vsll);
+    SUB_I64_VECTOR nothing {}
+  v256 __builtin_sub_i64_vpair (v256, v256);
+    SUB_I64_VPAIR vpair_subv4di3 {mma}
+
+  unsigned long long __builtin_sub_u64_scalar (unsigned long long, unsigned long long);
+    SUB_U64_SCALAR nothing {}
+  vull __builtin_sub_u64_vector (vull, vull);
+    SUB_U64_VECTOR nothing {}
+  v256 __builtin_sub_u64_vpair (v256, v256);
+    SUB_U64_VPAIR vpair_subv4di3 {mma}
+
+  unsigned int __builtin_umax_u32_scalar (unsigned int, unsigned int);
+    UMAX_U32_SCALAR nothing {}
+  vui __builtin_umax_u32_vector (vui, vui);
+    UMAX_U32_VECTOR nothing {}
+  v256 __builtin_umax_u32_vpair (v256, v256);
+    UMAX_U32_VPAIR vpair_umaxv8si3 {mma}
+
+  unsigned long long __builtin_umax_u64_scalar (unsigned long long, unsigned long long);
+    UMAX_U64_SCALAR nothing {}
+  vull __builtin_umax_u64_vector (vull, vull);
+    UMAX_U64_VECTOR nothing {}
+  v256 __builtin_umax_u64_vpair (v256, v256);
+    UMAX_U64_VPAIR vpair_umaxv4di3 {mma}
+
+  unsigned int __builtin_umin_u32_scalar (unsigned int, unsigned int);
+    UMIN_U32_SCALAR nothing {}
+  vui __builtin_umin_u32_vector (vui, vui);
+    UMIN_U32_VECTOR nothing {}
+  v256 __builtin_umin_u32_vpair (v256, v256);
+    UMIN_U32_VPAIR vpair_uminv8si3 {mma}
+
+  unsigned long long __builtin_umin_u64_scalar (unsigned long long, unsigned long long);
+    UMIN_U64_SCALAR nothing {}
+  vull __builtin_umin_u64_vector (vull, vull);
+    UMIN_U64_VECTOR nothing {}
+  v256 __builtin_umin_u64_vpair (v256, v256);
+    UMIN_U64_VPAIR vpair_uminv4di3 {mma}
+
+  int __builtin_xor_i32_scalar (int, int);
+    XOR_I32_SCALAR nothing {}
+  vsi __builtin_xor_i32_vector (vsi, vsi);
+    XOR_I32_VECTOR nothing {}
+  v256 __builtin_xor_i32_vpair (v256, v256);
+    XOR_I32_VPAIR vpair_xorv8si3 {mma}
+
+  unsigned int __builtin_xor_u32_scalar (unsigned int, unsigned int);
+    XOR_U32_SCALAR nothing {}
+  vui __builtin_xor_u32_vector (vui, vui);
+    XOR_U32_VECTOR nothing {}
+  v256 __builtin_xor_u32_vpair (v256, v256);
+    XOR_U32_VPAIR vpair_xorv8si3 {mma}
+
+  long long __builtin_xor_i64_scalar (long long, long long);
+    XOR_I64_SCALAR nothing {}
+  vsll __builtin_xor_i64_vector (vsll, vsll);
+    XOR_I64_VECTOR nothing {}
+  v256 __builtin_xor_i64_vpair (v256, v256);
+    XOR_I64_VPAIR vpair_xorv4di3 {mma}
+
+  unsigned long long __builtin_xor_u64_scalar (unsigned long long, unsigned long long);
+    XOR_U64_SCALAR nothing {}
+  vull __builtin_xor_u64_vector (vull, vull);
+    XOR_U64_VECTOR nothing {}
+  v256 __builtin_xor_u64_vpair (v256, v256);
+    XOR_U64_VPAIR vpair_xorv4di3 {mma}
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs6000-gen-builtins.cc
index a2f442ed90d..cbd7b916c03 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.cc
+++ b/gcc/config/rs6000/rs6000-gen-builtins.cc
@@ -444,7 +444,7 @@ struct ovld_stanza
   char *ifdef;
 };
 
-#define MAXOVLDSTANZAS 512
+#define MAXOVLDSTANZAS 1024
 static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS];
 static int num_ovld_stanzas;
 static int curr_ovld_stanza;
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index bbc26de4568..1bf0d1ceee9 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -6365,3 +6365,268 @@
     SUB_F64_VECTOR
   v256 __builtin_sub_f64 (v256, v256);
     SUB_F64_VPAIR
+
+;; Overloaded integer built-in funtions
+[ADD_I32, SKIP, __builtin_add_i32]
+  int __builtin_add_i32 (int, int);
+    ADD_I32_SCALAR
+  vsi __builtin_add_i32 (vsi, vsi);
+    ADD_I32_VECTOR
+  v256 __builtin_add_i32 (v256, v256);
+    ADD_I32_VPAIR
+
+[ADD_U32, SKIP, __builtin_add_u32]
+  unsigned int __builtin_add_u32 (unsigned int, unsigned int);
+    ADD_U32_SCALAR
+  vui __builtin_add_u32 (vui, vui);
+    ADD_U32_VECTOR
+  v256 __builtin_add_u32 (v256, v256);
+    ADD_U32_VPAIR
+
+[ADD_I64, SKIP, __builtin_add_i64]
+  long long __builtin_add_i64 (long long, long long);
+    ADD_I64_SCALAR
+  vsll __builtin_add_i64 (vsll, vsll);
+    ADD_I64_VECTOR
+  v256 __builtin_add_i64 (v256, v256);
+    ADD_I64_VPAIR
+
+[ADD_U64, SKIP, __builtin_add_u64]
+  unsigned long long __builtin_add_u64 (unsigned long long, unsigned long long);
+    ADD_U64_SCALAR
+  vull __builtin_add_u64 (vull, vull);
+    ADD_U64_VECTOR
+  v256 __builtin_add_u64 (v256, v256);
+    ADD_U64_VPAIR
+
+[AND_U32, SKIP, __builtin_and_u32]
+  unsigned int __builtin_and_u32 (unsigned int, unsigned int);
+    AND_U32_SCALAR
+  vui __builtin_and_u32 (vui, vui);
+    AND_U32_VECTOR
+  v256 __builtin_and_u32 (v256, v256);
+    AND_U32_VPAIR
+
+[AND_I64, SKIP, __builtin_and_i64]
+  long long __builtin_and_i64 (long long, long long);
+    AND_I64_SCALAR
+  vsll __builtin_and_i64 (vsll, vsll);
+    AND_I64_VECTOR
+  v256 __builtin_and_i64 (v256, v256);
+    AND_I64_VPAIR
+
+[AND_U64, SKIP, __builtin_and_u64]
+  unsigned long long __builtin_and_u64 (unsigned long long, unsigned long long);
+    AND_U64_SCALAR
+  vull __builtin_and_u64 (vull, vull);
+    AND_U64_VECTOR
+  v256 __builtin_and_u64 (v256, v256);
+    AND_U64_VPAIR
+
+[IOR_U32, SKIP, __builtin_ior_u32]
+  unsigned int __builtin_ior_u32 (unsigned int, unsigned int);
+    IOR_U32_SCALAR
+  vui __builtin_ior_u32 (vui, vui);
+    IOR_U32_VECTOR
+  v256 __builtin_ior_u32 (v256, v256);
+    IOR_U32_VPAIR
+
+[IOR_I64, SKIP, __builtin_ior_i64]
+  long long __builtin_ior_i64 (long long, long long);
+    IOR_I64_SCALAR
+  vsll __builtin_ior_i64 (vsll, vsll);
+    IOR_I64_VECTOR
+  v256 __builtin_ior_i64 (v256, v256);
+    IOR_I64_VPAIR
+
+[IOR_U64, SKIP, __builtin_ior_u64]
+  unsigned long long __builtin_ior_u64 (unsigned long long, unsigned long long);
+    IOR_U64_SCALAR
+  vull __builtin_ior_u64 (vull, vull);
+    IOR_U64_VECTOR
+  v256 __builtin_ior_u64 (v256, v256);
+    IOR_U64_VPAIR
+
+[NEG_I32, SKIP, __builtin_neg_i32]
+  int __builtin_neg_i32 (int);
+    NEG_I32_SCALAR
+  vsi __builtin_neg_i32 (vsi);
+    NEG_I32_VECTOR
+  v256 __builtin_neg_i32 (v256);
+    NEG_I32_VPAIR
+
+[NEG_U32, SKIP, __builtin_neg_u32]
+  unsigned int __builtin_neg_u32 (unsigned int);
+    NEG_U32_SCALAR
+  vui __builtin_neg_u32 (vui);
+    NEG_U32_VECTOR
+  v256 __builtin_neg_u32 (v256);
+    NEG_U32_VPAIR
+
+[NEG_I64, SKIP, __builtin_neg_i64]
+  long long __builtin_neg_i64 (long long);
+    NEG_I64_SCALAR
+  vsll __builtin_neg_i64 (vsll);
+    NEG_I64_VECTOR
+  v256 __builtin_neg_i64 (v256);
+    NEG_I64_VPAIR
+
+[NEG_U64, SKIP, __builtin_neg_u64]
+  unsigned long long __builtin_neg_u64 (unsigned long long);
+    NEG_U64_SCALAR
+  vull __builtin_neg_u64 (vull);
+    NEG_U64_VECTOR
+  v256 __builtin_neg_u64 (v256);
+    NEG_U64_VPAIR
+
+[NOT_I32, SKIP, __builtin_not_i32]
+  int __builtin_not_i32 (int);
+    NOT_I32_SCALAR
+  vsi __builtin_not_i32 (vsi);
+    NOT_I32_VECTOR
+  v256 __builtin_not_i32 (v256);
+    NOT_I32_VPAIR
+
+[NOT_U32, SKIP, __builtin_not_u32]
+  unsigned int __builtin_not_u32 (unsigned int);
+    NOT_U32_SCALAR
+  vui __builtin_not_u32 (vui);
+    NOT_U32_VECTOR
+  v256 __builtin_not_u32 (v256);
+    NOT_U32_VPAIR
+
+[NOT_I64, SKIP, __builtin_not_i64]
+  long long __builtin_not_i64 (long long);
+    NOT_I64_SCALAR
+  vsll __builtin_not_i64 (vsll);
+    NOT_I64_VECTOR
+  v256 __builtin_not_i64 (v256);
+    NOT_I64_VPAIR
+
+[NOT_U64, SKIP, __builtin_not_u64]
+  unsigned long long __builtin_not_u64 (unsigned long long);
+    NOT_U64_SCALAR
+  vull __builtin_not_u64 (vull);
+    NOT_U64_VECTOR
+  v256 __builtin_not_u64 (v256);
+    NOT_U64_VPAIR
+
+[SUB_I32, SKIP, __builtin_sub_i32]
+  int __builtin_sub_i32 (int, int);
+    SUB_I32_SCALAR
+  vsi __builtin_sub_i32 (vsi, vsi);
+    SUB_I32_VECTOR
+  v256 __builtin_sub_i32 (v256, v256);
+    SUB_I32_VPAIR
+
+[SUB_U32, SKIP, __builtin_sub_u32]
+  unsigned int __builtin_sub_u32 (unsigned int, unsigned int);
+    SUB_U32_SCALAR
+  vui __builtin_sub_u32 (vui, vui);
+    SUB_U32_VECTOR
+  v256 __builtin_sub_u32 (v256, v256);
+    SUB_U32_VPAIR
+
+[SUB_I64, SKIP, __builtin_sub_i64]
+  long long __builtin_sub_i64 (long long, long long);
+    SUB_I64_SCALAR
+  vsll __builtin_sub_i64 (vsll, vsll);
+    SUB_I64_VECTOR
+  v256 __builtin_sub_i64 (v256, v256);
+    SUB_I64_VPAIR
+
+[SUB_U64, SKIP, __builtin_sub_u64]
+  unsigned long long __builtin_sub_u64 (unsigned long long, unsigned long long);
+    SUB_U64_SCALAR
+  vull __builtin_sub_u64 (vull, vull);
+    SUB_U64_VECTOR
+  v256 __builtin_sub_u64 (v256, v256);
+    SUB_U64_VPAIR
+
+[SMAX_I32, SKIP, __builtin_smax_i32]
+  int __builtin_smax_i32 (int, int);
+    SMAX_I32_SCALAR
+  vsi __builtin_smax_i32 (vsi, vsi);
+    SMAX_I32_VECTOR
+  v256 __builtin_smax_i32 (v256, v256);
+    SMAX_I32_VPAIR
+
+[SMAX_I64, SKIP, __builtin_smax_i64]
+  long long __builtin_smax_i64 (long long, long long);
+    SMAX_I64_SCALAR
+  vsll __builtin_smax_i64 (vsll, vsll);
+    SMAX_I64_VECTOR
+  v256 __builtin_smax_i64 (v256, v256);
+    SMAX_I64_VPAIR
+
+[SMIN_I32, SKIP, __builtin_smin_i32]
+  int __builtin_smin_i32 (int, int);
+    SMIN_I32_SCALAR
+  vsi __builtin_smin_i32 (vsi, vsi);
+    SMIN_I32_VECTOR
+  v256 __builtin_smin_i32 (v256, v256);
+    SMIN_I32_VPAIR
+
+[SMIN_I64, SKIP, __builtin_smin_i64]
+  long long __builtin_smin_i64 (long long, long long);
+    SMIN_I64_SCALAR
+  vsll __builtin_smin_i64 (vsll, vsll);
+    SMIN_I64_VECTOR
+  v256 __builtin_smin_i64 (v256, v256);
+    SMIN_I64_VPAIR
+
+[UMAX_U32, SKIP, __builtin_umax_u32]
+  unsigned int __builtin_umax_u32 (unsigned int, unsigned int);
+    UMAX_U32_SCALAR
+  vui __builtin_umax_u32 (vui, vui);
+    UMAX_U32_VECTOR
+  v256 __builtin_umax_u32 (v256, v256);
+    UMAX_U32_VPAIR
+
+[UMAX_U64, SKIP, __builtin_umax_u64]
+  unsigned long long __builtin_umax_u64 (unsigned long long, unsigned long long);
+    UMAX_U64_SCALAR
+  vull __builtin_umax_u64 (vull, vull);
+    UMAX_U64_VECTOR
+  v256 __builtin_umax_u64 (v256, v256);
+    UMAX_U64_VPAIR
+
+[UMIN_U32, SKIP, __builtin_umin_u32]
+  unsigned int __builtin_umin_u32 (unsigned int, unsigned int);
+    UMIN_U32_SCALAR
+  vui __builtin_umin_u32 (vui, vui);
+    UMIN_U32_VECTOR
+  v256 __builtin_umin_u32 (v256, v256);
+    UMIN_U32_VPAIR
+
+[UMIN_U64, SKIP, __builtin_umin_u64]
+  unsigned long long __builtin_umin_u64 (unsigned long long, unsigned long long);
+    UMIN_U64_SCALAR
+  vull __builtin_umin_u64 (vull, vull);
+    UMIN_U64_VECTOR
+  v256 __builtin_umin_u64 (v256, v256);
+    UMIN_U64_VPAIR
+
+[XOR_U32, SKIP, __builtin_xor_u32]
+  unsigned int __builtin_xor_u32 (unsigned int, unsigned int);
+    XOR_U32_SCALAR
+  vui __builtin_xor_u32 (vui, vui);
+    XOR_U32_VECTOR
+  v256 __builtin_xor_u32 (v256, v256);
+    XOR_U32_VPAIR
+
+[XOR_I64, SKIP, __builtin_xor_i64]
+  long long __builtin_xor_i64 (long long, long long);
+    XOR_I64_SCALAR
+  vsll __builtin_xor_i64 (vsll, vsll);
+    XOR_I64_VECTOR
+  v256 __builtin_xor_i64 (v256, v256);
+    XOR_I64_VPAIR
+
+[XOR_U64, SKIP, __builtin_xor_u64]
+  unsigned long long __builtin_xor_u64 (unsigned long long, unsigned long long);
+    XOR_U64_SCALAR
+  vull __builtin_xor_u64 (vull, vull);
+    XOR_U64_VECTOR
+  v256 __builtin_xor_u64 (v256, v256);
+    XOR_U64_VPAIR
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index 13f6e0464b5..22b3ce54511 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -38,51 +38,108 @@
    ])
 
 ;; Iterator doing unary/binary arithmetic on vector pairs
-(define_code_iterator VPAIR_UNARY  [neg abs sqrt])
-(define_code_iterator VPAIR_BINARY [plus minus mult div copysign smin smax])
+(define_code_iterator VPAIR_FP_UNARY  [abs neg sqrt])
+(define_code_iterator VPAIR_FP_BINARY [copysign div minus mult plus smin smax])
+
+(define_code_iterator VPAIR_INT_BINARY  [and ior minus plus smax smin umax umin xor])
 
 ;; Give the insn name from the opertion
 (define_code_attr vpair_op [(abs      "abs")
+			    (and      "and")
 			    (copysign "copysign")
 			    (div      "div")
+			    (ior      "ior")
 			    (minus    "sub")
 			    (mult     "mul")
+			    (not      "not")
 			    (neg      "neg")
 			    (plus     "add")
 			    (smin     "smin")
 			    (smax     "smax")
-			    (sqrt     "sqrt")])
-
-;; Iterator for creating the wrapper for vector pair built-ins
-(define_int_iterator VPAIR_WRAPPER [UNSPEC_VPAIR_V4DF UNSPEC_VPAIR_V8SF])
-
-;; Map VPAIR_WRAPPER to vector type (i.e. V2DF or V4SF)
-(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF")
-			       (UNSPEC_VPAIR_V8SF "V4SF")])
-
-(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df")
-			     (UNSPEC_VPAIR_V8SF "v8sf")])
+			    (sqrt     "sqrt")
+			    (umin     "umin")
+			    (umax     "umax")
+			    (xor      "xor")])
+
+;; Give the register constraint ("v" or "wa") for the integer operation used
+(define_code_attr vpair_ireg [(and   "wa")
+			      (ior   "wa")
+			      (minus "v")
+			      (not   "wa")
+			      (neg   "v")
+			      (plus  "v")
+			      (smax  "v")
+			      (smin  "v")
+			      (umax  "v")
+			      (umin  "v")
+			      (xor   "wa")])
+
+;; Give the register previdcate for the integer operation used
+(define_code_attr vpair_ipred [(and   "vsx_register_operand")
+			       (ior   "vsx_register_operand")
+			       (minus "altivec_register_operand")
+			       (not   "vsx_register_operand")
+			       (neg   "altivec_register_operand")
+			       (plus  "altivec_register_operand")
+			       (smax  "altivec_register_operand")
+			       (smin  "altivec_register_operand")
+			       (umax  "altivec_register_operand")
+			       (umin  "altivec_register_operand")
+			       (xor   "vsx_register_operand")])
+
+;; Iterator for creating the wrappers for vector pair built-ins
+(define_int_iterator VPAIR_FP_WRAPPER [UNSPEC_VPAIR_V4DF
+				       UNSPEC_VPAIR_V8SF])
+
+(define_int_iterator VPAIR_INT_WRAPPER [UNSPEC_VPAIR_V4DI
+					UNSPEC_VPAIR_V8SI
+					UNSPEC_VPAIR_V16HI
+					UNSPEC_VPAIR_V32QI])
+
+;; Map VPAIR_{INT,FP}_WRAPPER to vector the type of the arguments after they
+;; are split
+(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF  "V2DF")
+			       (UNSPEC_VPAIR_V8SF  "V4SF")
+			       (UNSPEC_VPAIR_V32QI "V16QI")
+			       (UNSPEC_VPAIR_V16HI "V8HI")
+			       (UNSPEC_VPAIR_V8SI  "V4SI")
+			       (UNSPEC_VPAIR_V4DI  "V2DI")])
+
+;; Map VPAIR_{INT,FP}_WRAPPER to a lower case name to identify the vector pair.
+(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF  "v4df")
+			     (UNSPEC_VPAIR_V8SF  "v8sf")
+			     (UNSPEC_VPAIR_V32QI "v32qi")
+			     (UNSPEC_VPAIR_V16HI "v16hi")
+			     (UNSPEC_VPAIR_V8SI  "v8si")
+			     (UNSPEC_VPAIR_V4DI  "v4di")])
+
+;; Map VPAIR_INT_WRAPPER to constraints used for the negate scratch register.
+(define_int_attr vpair_neg_reg [(UNSPEC_VPAIR_V32QI "&v")
+				(UNSPEC_VPAIR_V16HI "&v")
+				(UNSPEC_VPAIR_V8SI  "X")
+				(UNSPEC_VPAIR_V4DI  "X")])
 
 \f
 ;; Vector pair floating point unary operations
 (define_insn_and_split "vpair_<vpair_op><vpair_type>2"
   [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
-	(unspec:OO [(VPAIR_UNARY:OO
+	(unspec:OO [(VPAIR_FP_UNARY:OO
 		     (match_operand:OO 1 "vsx_register_operand" "wa"))]
-		   VPAIR_WRAPPER))]
+		   VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
-  [(set (match_dup 2) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 3)))
-   (set (match_dup 4) (VPAIR_UNARY:<VPAIR_VECTOR> (match_dup 5)))]
+  [(set (match_dup 2) (VPAIR_FP_UNARY:<VPAIR_VECTOR> (match_dup 3)))
+   (set (match_dup 4) (VPAIR_FP_UNARY:<VPAIR_VECTOR> (match_dup 5)))]
 {
   unsigned reg0 = reg_or_subregno (operands[0]);
   unsigned reg1 = reg_or_subregno (operands[1]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -93,8 +150,8 @@
 	 [(neg:OO
 	   (unspec:OO
 	    [(abs:OO (match_operand:OO 1 "vsx_register_operand" "ww"))]
-	    VPAIR_WRAPPER))]
-	 VPAIR_WRAPPER))]
+	    VPAIR_FP_WRAPPER))]
+	 VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -107,42 +164,44 @@
 {
   unsigned reg0 = reg_or_subregno (operands[0]);
   unsigned reg1 = reg_or_subregno (operands[1]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[2] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
 }
   [(set_attr "length" "8")])
 
 ;; Vector pair floating binary operations
 (define_insn_and_split "vpair_<vpair_op><vpair_type>3"
   [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
-	(unspec:OO [(VPAIR_BINARY:OO
+	(unspec:OO [(VPAIR_FP_BINARY:OO
 		     (match_operand:OO 1 "vsx_register_operand" "wa")
 		     (match_operand:OO 2 "vsx_register_operand" "wa"))]
-		   VPAIR_WRAPPER))]
+		   VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
   [(set (match_dup 3)
-	(VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 4)
-				      (match_dup 5)))
+	(VPAIR_FP_BINARY:<VPAIR_VECTOR> (match_dup 4)
+					(match_dup 5)))
    (set (match_dup 6)
-	(VPAIR_BINARY:<VPAIR_VECTOR> (match_dup 7)
-				      (match_dup 8)))]
+	(VPAIR_FP_BINARY:<VPAIR_VECTOR> (match_dup 7)
+					(match_dup 8)))]
 {
   unsigned reg0 = reg_or_subregno (operands[0]);
   unsigned reg1 = reg_or_subregno (operands[1]);
   unsigned reg2 = reg_or_subregno (operands[2]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[3] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
+  operands[3] = gen_rtx_REG (vmode, reg0);
+  operands[4] = gen_rtx_REG (vmode, reg1);
+  operands[5] = gen_rtx_REG (vmode, reg2);
 
-  operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
-  operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg2 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -154,7 +213,7 @@
 	   (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
 	   (match_operand:OO 2 "vsx_register_operand" "wa,0")
 	   (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
-	 VPAIR_WRAPPER))]
+	 VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -171,16 +230,17 @@
   unsigned reg1 = reg_or_subregno (operands[1]);
   unsigned reg2 = reg_or_subregno (operands[2]);
   unsigned reg3 = reg_or_subregno (operands[3]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
-  operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+  operands[4] = gen_rtx_REG (vmode, reg0);
+  operands[5] = gen_rtx_REG (vmode, reg1);
+  operands[6] = gen_rtx_REG (vmode, reg2);
+  operands[7] = gen_rtx_REG (vmode, reg3);
 
-  operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
-  operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
-  operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+  operands[11] = gen_rtx_REG (vmode, reg3 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -192,8 +252,8 @@
 	   (match_operand:OO 2 "vsx_register_operand" "wa,0")
 	   (unspec:OO
 	    [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
-	     VPAIR_WRAPPER))]
-	 VPAIR_WRAPPER))]
+	     VPAIR_FP_WRAPPER))]
+	 VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -210,16 +270,17 @@
   unsigned reg1 = reg_or_subregno (operands[1]);
   unsigned reg2 = reg_or_subregno (operands[2]);
   unsigned reg3 = reg_or_subregno (operands[3]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
-  operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+  operands[4] = gen_rtx_REG (vmode, reg0);
+  operands[5] = gen_rtx_REG (vmode, reg1);
+  operands[6] = gen_rtx_REG (vmode, reg2);
+  operands[7] = gen_rtx_REG (vmode, reg3);
 
-  operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
-  operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
-  operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+  operands[11] = gen_rtx_REG (vmode, reg3 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -232,8 +293,8 @@
 	      (match_operand:OO 1 "vsx_register_operand" "%wa,wa")
 	      (match_operand:OO 2 "vsx_register_operand" "wa,0")
 	      (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
-	    VPAIR_WRAPPER))]
-	 VPAIR_WRAPPER))]
+	    VPAIR_FP_WRAPPER))]
+	 VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -252,16 +313,17 @@
   unsigned reg1 = reg_or_subregno (operands[1]);
   unsigned reg2 = reg_or_subregno (operands[2]);
   unsigned reg3 = reg_or_subregno (operands[3]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
-  operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+  operands[4] = gen_rtx_REG (vmode, reg0);
+  operands[5] = gen_rtx_REG (vmode, reg1);
+  operands[6] = gen_rtx_REG (vmode, reg2);
+  operands[7] = gen_rtx_REG (vmode, reg3);
 
-  operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
-  operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
-  operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+  operands[11] = gen_rtx_REG (vmode, reg3 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -275,9 +337,9 @@
 	      (match_operand:OO 2 "vsx_register_operand" "wa,0")
 	      (unspec:OO
 	       [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))]
-	       VPAIR_WRAPPER))]
-	   VPAIR_WRAPPER))]
-	 VPAIR_WRAPPER))]
+	       VPAIR_FP_WRAPPER))]
+	   VPAIR_FP_WRAPPER))]
+	 VPAIR_FP_WRAPPER))]
   "TARGET_MMA"
   "#"
   "&& reload_completed"
@@ -296,16 +358,17 @@
   unsigned reg1 = reg_or_subregno (operands[1]);
   unsigned reg2 = reg_or_subregno (operands[2]);
   unsigned reg3 = reg_or_subregno (operands[3]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
 
-  operands[4] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0);
-  operands[5] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1);
-  operands[6] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2);
-  operands[7] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3);
+  operands[4] = gen_rtx_REG (vmode, reg0);
+  operands[5] = gen_rtx_REG (vmode, reg1);
+  operands[6] = gen_rtx_REG (vmode, reg2);
+  operands[7] = gen_rtx_REG (vmode, reg3);
 
-  operands[8] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg0 + 1);
-  operands[9] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg1 + 1);
-  operands[10] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg2 + 1);
-  operands[11] = gen_rtx_REG (<VPAIR_VECTOR>mode, reg3 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[9] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[10] = gen_rtx_REG (vmode, reg2 + 1);
+  operands[11] = gen_rtx_REG (vmode, reg3 + 1);
 }
   [(set_attr "length" "8")])
 
@@ -415,3 +478,105 @@
   operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0);
   operands[7] = gen_rtx_REG (DFmode, reg3);
 })
+
+\f
+;; Vector pair integer negate support.
+(define_insn_and_split "vpair_neg<vpair_type>2"
+  [(set (match_operand:OO 0 "altivec_register_operand" "=v")
+	(unspec:OO [(neg:OO
+		     (match_operand:OO 1 "altivec_register_operand" "v"))]
+		   VPAIR_INT_WRAPPER))
+   (clobber (match_scratch:<VPAIR_VECTOR> 2 "=<vpair_neg_reg>"))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (minus:<VPAIR_VECTOR> (match_dup 2)
+					    (match_dup 5)))
+   (set (match_dup 6) (minus:<VPAIR_VECTOR> (match_dup 2)
+					    (match_dup 7)))]
+{
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
+
+  operands[3] = CONST0_RTX (vmode);
+
+  operands[4] = gen_rtx_REG (vmode, reg0);
+  operands[5] = gen_rtx_REG (vmode, reg1);
+
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+
+  /* If the vector integer size is 32 or 64 bits, we can use the vneg{w,d}
+     instructions.  */
+  if (vmode == V4SImode)
+    {
+      emit_insn (gen_negv4si2 (operands[4], operands[5]));
+      emit_insn (gen_negv4si2 (operands[6], operands[7]));
+      DONE;
+    }
+  else if (vmode == V2DImode)
+    {
+      emit_insn (gen_negv2di2 (operands[4], operands[5]));
+      emit_insn (gen_negv2di2 (operands[6], operands[7]));
+      DONE;
+    }
+}
+  [(set_attr "length" "8")])
+
+;; Vector pair integer not support.
+(define_insn_and_split "vpair_not<vpair_type>2"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa")
+	(unspec:OO [(not:OO (match_operand:OO 1 "vsx_register_operand" "wa"))]
+		   VPAIR_INT_WRAPPER))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (not:<VPAIR_VECTOR> (match_dup 3)))
+   (set (match_dup 4) (not:<VPAIR_VECTOR> (match_dup 5)))]
+{
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
+
+  operands[2] = gen_rtx_REG (vmode, reg0);
+  operands[3] = gen_rtx_REG (vmode, reg1);
+
+  operands[4] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[5] = gen_rtx_REG (vmode, reg1 + 1);
+}
+  [(set_attr "length" "8")])
+
+;; Vector pair integer binary operations.
+(define_insn_and_split "vpair_<vpair_op><vpair_type>3"
+  [(set (match_operand:OO 0 "<vpair_ipred>" "=<vpair_ireg>")
+	(unspec:OO [(VPAIR_INT_BINARY:OO
+		     (match_operand:OO 1 "<vpair_ipred>" "<vpair_ireg>")
+		     (match_operand:OO 2 "<vpair_ipred>" "<vpair_ireg>"))]
+		   VPAIR_INT_WRAPPER))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3)
+	(VPAIR_INT_BINARY:<VPAIR_VECTOR> (match_dup 4)
+					 (match_dup 5)))
+   (set (match_dup 6)
+	(VPAIR_INT_BINARY:<VPAIR_VECTOR> (match_dup 7)
+					 (match_dup 8)))]
+{
+  unsigned reg0 = reg_or_subregno (operands[0]);
+  unsigned reg1 = reg_or_subregno (operands[1]);
+  unsigned reg2 = reg_or_subregno (operands[2]);
+  machine_mode vmode = <VPAIR_VECTOR>mode;
+
+  operands[3] = gen_rtx_REG (vmode, reg0);
+  operands[4] = gen_rtx_REG (vmode, reg1);
+  operands[5] = gen_rtx_REG (vmode, reg2);
+
+  operands[6] = gen_rtx_REG (vmode, reg0 + 1);
+  operands[7] = gen_rtx_REG (vmode, reg1 + 1);
+  operands[8] = gen_rtx_REG (vmode, reg2 + 1);
+}
+  [(set_attr "length" "8")])
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-02  3:39 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02  3:39 [gcc(refs/users/meissner/heads/work129-vpair)] Add initial int built-in overload support Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).