From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 10BBA3858C50; Thu, 3 Aug 2023 22:27:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 10BBA3858C50 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1691101674; bh=jUJ1SJGhE9XgWuC1ub261zYvDJ8puOl2Oy9BpCJUHD8=; h=From:To:Subject:Date:From; b=GV8ZT3+mcbXZtXM4B8geBZUMSYd8KYC7Z4cWobx5Eoq30FE9UknNPy1ceMSlP7WN2 JuJ0xpGqx1JY/+k+ZpOeQ8Les8IHyXuKd29duWSAhN8mENmUirGAyiAGQYJo3KZy+L HZBDRhqQAI+FVMvT3zVuiQu4gWbZNuemvwo2zGBk= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work130-vpair)] Add vector pair built-in functions. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work130-vpair X-Git-Oldrev: be428a84bf4b34c0d7fbabab77d06336ff661dcd X-Git-Newrev: d4d4c364a9259389193cb155e2fe6bb507076f76 Message-Id: <20230803222754.10BBA3858C50@sourceware.org> Date: Thu, 3 Aug 2023 22:27:54 +0000 (GMT) List-Id: https://gcc.gnu.org/g:d4d4c364a9259389193cb155e2fe6bb507076f76 commit d4d4c364a9259389193cb155e2fe6bb507076f76 Author: Michael Meissner Date: Thu Aug 3 18:27:34 2023 -0400 Add vector pair built-in functions. 2023-08-03 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair*): New vector pair built-in functions. * config/rs6000/rs6000-call.cc (USE_ALTIVEC_FOR_ARG_P): Allow __vector_pair to be passed and returned. (rs6000_return_in_memory): Likewise. (rs6000_function_arg): Likewise. * config/rs6000/rs6000.md (vector-pair.md): Add new .md file for vector pair built-in support. * config/rs6000/t-rs6000 (MD_INCLUDES): Likewise. * config/rs6000/vector-pair.md: New file. * doc/extend.texi (PowerPC Built-in Functions): Document the vector pair built-in functions. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 194 +++++++++ gcc/config/rs6000/rs6000-call.cc | 12 +- gcc/config/rs6000/rs6000.md | 1 + gcc/config/rs6000/t-rs6000 | 1 + gcc/config/rs6000/vector-pair.md | 794 ++++++++++++++++++++++++++++++++++ gcc/doc/extend.texi | 118 +++++ 6 files changed, 1114 insertions(+), 6 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 35c4cdf74c5..a779e4866c2 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4116,3 +4116,197 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} + +;; Vector pair built-in functions + + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + + v256 __builtin_vpair_f32_abs (v256); + VPAIR_F32_ABS vpair_abs_v8sf2 {mma} + + v256 __builtin_vpair_f32_add (v256, v256); + VPAIR_F32_ADD vpair_add_v8sf3 {mma} + + v256 __builtin_vpair_f32_fma (v256, v256, v256); + VPAIR_F32_FMA vpair_fma_v8sf4 {mma} + + v256 __builtin_vpair_f32_mul (v256, v256); + VPAIR_F32_MUL vpair_mul_v8sf3 {mma} + + v256 __builtin_vpair_f32_neg (v256); + VPAIR_F32_NEG vpair_neg_v8sf2 {mma} + + v256 __builtin_vpair_f32_smax (v256, v256); + VPAIR_F32_SMAX vpair_smax_v8sf3 {mma} + + v256 __builtin_vpair_f32_smin (v256, v256); + VPAIR_F32_SMIN vpair_smin_v8sf3 {mma} + + v256 __builtin_vpair_f32_sub (v256, v256); + VPAIR_F32_SUB vpair_sub_v8sf3 {mma} + + float __builtin_vpair_f32_add_elements (v256); + VPAIR_F32_ADD_ELEMENTS vpair_reduc_plus_scale_v8sf {mma} + + v256 __builtin_vpair_f64_abs (v256); + VPAIR_F64_ABS vpair_abs_v4df2 {mma} + + v256 __builtin_vpair_f64_add (v256, v256); + VPAIR_F64_ADD vpair_add_v4df3 {mma} + + v256 __builtin_vpair_f64_fma (v256, v256, v256); + VPAIR_F64_FMA vpair_fma_v4df4 {mma} + + v256 __builtin_vpair_f64_mul (v256, v256); + VPAIR_F64_MUL vpair_mul_v4df3 {mma} + + v256 __builtin_vpair_f64_neg (v256); + VPAIR_F64_NEG vpair_neg_v4df2 {mma} + + v256 __builtin_vpair_f64_smax (v256, v256); + VPAIR_F64_SMAX vpair_smax_v4df3 {mma} + + v256 __builtin_vpair_f64_smin (v256, v256); + VPAIR_F64_SMIN vpair_smin_v4df3 {mma} + + v256 __builtin_vpair_f64_sub (v256, v256); + VPAIR_F64_SUB vpair_sub_v4df3 {mma} + + double __builtin_vpair_f64_add_elements (v256); + VPAIR_F64_ADD_ELEMENTS vpair_reduc_plus_scale_v4df {mma} + + v256 __builtin_vpair_i8_add (v256, v256); + VPAIR_I8_ADD vpair_add_v32qi3 {mma} + + v256 __builtin_vpair_i8_and (v256, v256); + VPAIR_I8_AND vpair_and_v32qi3 {mma} + + v256 __builtin_vpair_i8_ior (v256, v256); + VPAIR_I8_IOR vpair_ior_v32qi3 {mma} + + v256 __builtin_vpair_i8_neg (v256); + VPAIR_I8_NEG vpair_neg_v32qi2 {mma} + + v256 __builtin_vpair_i8_not (v256); + VPAIR_I8_NOT vpair_not_v32qi2 {mma} + + v256 __builtin_vpair_i8_smax (v256, v256); + VPAIR_I8_SMAX vpair_smax_v32qi3 {mma} + + v256 __builtin_vpair_i8_smin (v256, v256); + VPAIR_I8_SMIN vpair_smin_v32qi3 {mma} + + v256 __builtin_vpair_i8_sub (v256, v256); + VPAIR_I8_SUB vpair_sub_v32qi3 {mma} + + v256 __builtin_vpair_i8_umax (v256, v256); + VPAIR_I8_UMAX vpair_umax_v32qi3 {mma} + + v256 __builtin_vpair_i8_umin (v256, v256); + VPAIR_I8_UMIN vpair_umin_v32qi3 {mma} + + v256 __builtin_vpair_i8_xor (v256, v256); + VPAIR_I8_XOR vpair_xor_v32qi3 {mma} + + v256 __builtin_vpair_i16_add (v256, v256); + VPAIR_I16_ADD vpair_add_v16hi3 {mma} + + v256 __builtin_vpair_i16_and (v256, v256); + VPAIR_I16_AND vpair_and_v16hi3 {mma} + + v256 __builtin_vpair_i16_ior (v256, v256); + VPAIR_I16_IOR vpair_ior_v16hi3 {mma} + + v256 __builtin_vpair_i16_neg (v256); + VPAIR_I16_NEG vpair_neg_v16hi2 {mma} + + v256 __builtin_vpair_i16_not (v256); + VPAIR_I16_NOT vpair_not_v16hi2 {mma} + + v256 __builtin_vpair_i16_smax (v256, v256); + VPAIR_I16_SMAX vpair_smax_v16hi3 {mma} + + v256 __builtin_vpair_i16_smin (v256, v256); + VPAIR_I16_SMIN vpair_smin_v16hi3 {mma} + + v256 __builtin_vpair_i16_sub (v256, v256); + VPAIR_I16_SUB vpair_sub_v16hi3 {mma} + + v256 __builtin_vpair_i16_umax (v256, v256); + VPAIR_I16_UMAX vpair_umax_v16hi3 {mma} + + v256 __builtin_vpair_i16_umin (v256, v256); + VPAIR_I16_UMIN vpair_umin_v16hi3 {mma} + + v256 __builtin_vpair_i16_xor (v256, v256); + VPAIR_I16_XOR vpair_xor_v16hi3 {mma} + + v256 __builtin_vpair_i32_add (v256, v256); + VPAIR_I32_ADD vpair_add_v8si3 {mma} + + v256 __builtin_vpair_i32_and (v256, v256); + VPAIR_I32_AND vpair_and_v8si3 {mma} + + v256 __builtin_vpair_i32_ior (v256, v256); + VPAIR_I32_IOR vpair_ior_v8si3 {mma} + + v256 __builtin_vpair_i32_neg (v256); + VPAIR_I32_NEG vpair_neg_v8si2 {mma} + + v256 __builtin_vpair_i32_not (v256); + VPAIR_I32_NOT vpair_not_v8si2 {mma} + + v256 __builtin_vpair_i32_smax (v256, v256); + VPAIR_I32_SMAX vpair_smax_v8si3 {mma} + + v256 __builtin_vpair_i32_smin (v256, v256); + VPAIR_I32_SMIN vpair_smin_v8si3 {mma} + + v256 __builtin_vpair_i32_sub (v256, v256); + VPAIR_I32_SUB vpair_sub_v8si3 {mma} + + v256 __builtin_vpair_i32_umax (v256, v256); + VPAIR_I32_UMAX vpair_umax_v8si3 {mma} + + v256 __builtin_vpair_i32_umin (v256, v256); + VPAIR_I32_UMIN vpair_umin_v8si3 {mma} + + v256 __builtin_vpair_i32_xor (v256, v256); + VPAIR_I32_XOR vpair_xor_v8si3 {mma} + + v256 __builtin_vpair_i64_add (v256, v256); + VPAIR_I64_ADD vpair_add_v4di3 {mma} + + v256 __builtin_vpair_i64_and (v256, v256); + VPAIR_I64_AND vpair_and_v4di3 {mma} + + v256 __builtin_vpair_i64_ior (v256, v256); + VPAIR_I64_IOR vpair_ior_v4di3 {mma} + + v256 __builtin_vpair_i64_neg (v256); + VPAIR_I64_NEG vpair_neg_v4di2 {mma} + + v256 __builtin_vpair_i64_not (v256); + VPAIR_I64_NOT vpair_not_v4di2 {mma} + + v256 __builtin_vpair_i64_smax (v256, v256); + VPAIR_I64_SMAX vpair_smax_v4di3 {mma} + + v256 __builtin_vpair_i64_smin (v256, v256); + VPAIR_I64_SMIN vpair_smin_v4di3 {mma} + + v256 __builtin_vpair_i64_sub (v256, v256); + VPAIR_I64_SUB vpair_sub_v4di3 {mma} + + v256 __builtin_vpair_i64_umax (v256, v256); + VPAIR_I64_UMAX vpair_umax_v4di3 {mma} + + v256 __builtin_vpair_i64_umin (v256, v256); + VPAIR_I64_UMIN vpair_umin_v4di3 {mma} + + v256 __builtin_vpair_i64_xor (v256, v256); + VPAIR_I64_XOR vpair_xor_v4di3 {mma} + + long long __builtin_vpair_i64_add_elements (v256); + VPAIR_I64_ADD_ELEMENTS vpair_reduc_plus_scale_v4di {mma,no32bit} diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 5384c10b986..1cf363d9af2 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -89,7 +89,7 @@ /* Nonzero if we can use an AltiVec register to pass this arg. */ #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ - (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \ + ((ALTIVEC_OR_VSX_VECTOR_MODE (MODE) || (MODE) == OOmode) \ && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \ && TARGET_ALTIVEC_ABI \ && (NAMED)) @@ -432,12 +432,12 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type, bool rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) { - /* We do not allow MMA types being used as return values. Only report - the invalid return value usage the first time we encounter it. */ + /* We do not allow vector quad being used as return values. Only report the + invalid return value usage the first time we encounter it. */ if (cfun && !cfun->machine->mma_return_type_error && TREE_TYPE (cfun->decl) == fntype - && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode)) + && TYPE_MODE (type) == XOmode) { /* Record we have now handled function CFUN, so the next time we are called, we do not re-report the same error. */ @@ -1631,8 +1631,8 @@ rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) machine_mode elt_mode; int n_elts; - /* We do not allow MMA types being used as function arguments. */ - if (mode == OOmode || mode == XOmode) + /* We do not allow vector quad being used as function arguments. */ + if (mode == XOmode) { if (TYPE_CANONICAL (type) != NULL_TREE) type = TYPE_CANONICAL (type); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index be615c3584e..1cf0ed31fb8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15778,6 +15778,7 @@ (include "vsx.md") (include "altivec.md") (include "mma.md") +(include "vector-pair.md") (include "dfp.md") (include "crypto.md") (include "htm.md") diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index f183b42ce1d..5fc89499795 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -128,6 +128,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \ $(srcdir)/config/rs6000/vsx.md \ $(srcdir)/config/rs6000/altivec.md \ $(srcdir)/config/rs6000/mma.md \ + $(srcdir)/config/rs6000/vector-pair.md \ $(srcdir)/config/rs6000/crypto.md \ $(srcdir)/config/rs6000/htm.md \ $(srcdir)/config/rs6000/dfp.md \ diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md new file mode 100644 index 00000000000..0c3afd049a4 --- /dev/null +++ b/gcc/config/rs6000/vector-pair.md @@ -0,0 +1,794 @@ +;; Vector pair arithmetic support. +;; Copyright (C) 2020-2023 Free Software Foundation, Inc. +;; Contributed by Peter Bergner and +;; Michael Meissner +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; This file adds support for doing vector operations on pairs of vector +;; registers. Most of the instructions use vector pair instructions to load +;; and possibly store registers, but splitting the operation after register +;; allocation to do 2 separate operations. The second scheduler pass can +;; interleave other instructions between these pairs of instructions if +;; possible. + +(define_c_enum "unspec" + [UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_V4DF + UNSPEC_VPAIR_V8SF + UNSPEC_VPAIR_V32QI + UNSPEC_VPAIR_V16HI + UNSPEC_VPAIR_V8SI + UNSPEC_VPAIR_V4DI + UNSPEC_VPAIR_REDUCE_PLUS_F32 + UNSPEC_VPAIR_REDUCE_PLUS_F64 + UNSPEC_VPAIR_REDUCE_PLUS_I64 + ]) + +;; Iterator doing unary/binary arithmetic on vector pairs +(define_code_iterator VPAIR_FP_UNARY [abs neg ]) +(define_code_iterator VPAIR_FP_BINARY [minus mult plus smin smax]) + +(define_code_iterator VPAIR_INT_BINARY [and ior minus plus smax smin umax umin xor]) + +;; Return the insn name from the VPAIR_* code iterator +(define_code_attr vpair_op [(abs "abs") + (and "and") + (ior "ior") + (minus "sub") + (mult "mul") + (not "not") + (neg "neg") + (plus "add") + (smin "smin") + (smax "smax") + (umin "umin") + (umax "umax") + (xor "xor")]) + +;; Return the register constraint ("v" or "wa") for the integer code iterator +;; used +(define_code_attr vpair_ireg [(and "wa") + (ior "wa") + (minus "v") + (not "wa") + (neg "v") + (plus "v") + (smax "v") + (smin "v") + (umax "v") + (umin "v") + (xor "wa")]) + +;; Return the register previdcate for the integer code iterator used +(define_code_attr vpair_ipred [(and "vsx_register_operand") + (ior "vsx_register_operand") + (minus "altivec_register_operand") + (not "vsx_register_operand") + (neg "altivec_register_operand") + (plus "altivec_register_operand") + (smax "altivec_register_operand") + (smin "altivec_register_operand") + (umax "altivec_register_operand") + (umin "altivec_register_operand") + (xor "vsx_register_operand")]) + +;; Iterator for creating the wrappers for vector pair built-ins +(define_int_iterator VPAIR_FP_WRAPPER [UNSPEC_VPAIR_V4DF + UNSPEC_VPAIR_V8SF]) + +(define_int_iterator VPAIR_INT_WRAPPER [UNSPEC_VPAIR_V4DI + UNSPEC_VPAIR_V8SI + UNSPEC_VPAIR_V16HI + UNSPEC_VPAIR_V32QI]) + +;; Map VPAIR_{INT,FP}_WRAPPER to vector the type of the arguments after they +;; are split +(define_int_attr VPAIR_VECTOR [(UNSPEC_VPAIR_V4DF "V2DF") + (UNSPEC_VPAIR_V8SF "V4SF") + (UNSPEC_VPAIR_V32QI "V16QI") + (UNSPEC_VPAIR_V16HI "V8HI") + (UNSPEC_VPAIR_V8SI "V4SI") + (UNSPEC_VPAIR_V4DI "V2DI")]) + +;; Map VPAIR_{INT,FP}_WRAPPER to a lower case name to identify the vector pair. +(define_int_attr vpair_type [(UNSPEC_VPAIR_V4DF "v4df") + (UNSPEC_VPAIR_V8SF "v8sf") + (UNSPEC_VPAIR_V32QI "v32qi") + (UNSPEC_VPAIR_V16HI "v16hi") + (UNSPEC_VPAIR_V8SI "v8si") + (UNSPEC_VPAIR_V4DI "v4di")]) + +;; Map VPAIR_INT_WRAPPER to constraints used for the negate scratch register. +(define_int_attr vpair_neg_reg [(UNSPEC_VPAIR_V32QI "&v") + (UNSPEC_VPAIR_V16HI "&v") + (UNSPEC_VPAIR_V8SI "X") + (UNSPEC_VPAIR_V4DI "X")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + rtvec vec_zero = gen_rtvec (2, const0_rtx, const0_rtx); + + operands[1] = gen_rtx_REG (V2DImode, reg0); + operands[2] = gen_rtx_REG (V2DImode, reg0 + 1); + operands[3] = gen_rtx_CONST_VECTOR (V2DImode, vec_zero); +} + [(set_attr "length" "8")]) + + +;; Vector pair floating point unary operations +(define_insn_and_split "vpair__2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(VPAIR_FP_UNARY:OO + (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) (VPAIR_FP_UNARY: (match_dup 3))) + (set (match_dup 4) (VPAIR_FP_UNARY: (match_dup 5)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); +} + [(set_attr "length" "8")]) + +;; Optimize vector pair negate of absolute value +(define_insn_and_split "vpair_nabs_2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(neg:OO + (unspec:OO + [(abs:OO (match_operand:OO 1 "vsx_register_operand" "ww"))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) + (neg: + (abs: (match_dup 3)))) + (set (match_dup 4) + (neg: + (abs: (match_dup 5))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); +} + [(set_attr "length" "8")]) + +;; Vector pair floating binary operations +(define_insn_and_split "vpair__3" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(VPAIR_FP_BINARY:OO + (match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (VPAIR_FP_BINARY: (match_dup 4) + (match_dup 5))) + (set (match_dup 6) + (VPAIR_FP_BINARY: (match_dup 7) + (match_dup 8)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Vector pair fused multiply-add floating point operations +(define_insn_and_split "vpair_fma_4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(fma:OO + (match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (match_operand:OO 3 "vsx_register_operand" "0,wa"))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 4) + (fma: (match_dup 5) + (match_dup 6) + (match_dup 7))) + (set (match_dup 8) + (fma: (match_dup 9) + (match_dup 10) + (match_dup 11)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); + + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "vpair_fms_4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(fma:OO + (match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (unspec:OO + [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 4) + (fma: (match_dup 5) + (match_dup 6) + (neg: (match_dup 7)))) + (set (match_dup 8) + (fma: (match_dup 9) + (match_dup 10) + (neg: (match_dup 11))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); + + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "vpair_nfma_4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(neg:OO + (unspec:OO + [(fma:OO + (match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (match_operand:OO 3 "vsx_register_operand" "0,wa"))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 4) + (neg: + (fma: (match_dup 5) + (match_dup 6) + (match_dup 7)))) + (set (match_dup 8) + (neg: + (fma: (match_dup 9) + (match_dup 10) + (match_dup 11))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); + + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "vpair_nfms_4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(neg:OO + (unspec:OO + [(fma:OO + (match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (unspec:OO + [(neg:OO (match_operand:OO 3 "vsx_register_operand" "0,wa"))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + VPAIR_FP_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 4) + (neg: + (fma: (match_dup 5) + (match_dup 6) + (neg: (match_dup 7))))) + (set (match_dup 8) + (neg: + (fma: (match_dup 9) + (match_dup 10) + (neg: (match_dup 11)))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + unsigned reg3 = reg_or_subregno (operands[3]); + machine_mode vmode = mode; + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + operands[6] = gen_rtx_REG (vmode, reg2); + operands[7] = gen_rtx_REG (vmode, reg3); + + operands[8] = gen_rtx_REG (vmode, reg0 + 1); + operands[9] = gen_rtx_REG (vmode, reg1 + 1); + operands[10] = gen_rtx_REG (vmode, reg2 + 1); + operands[11] = gen_rtx_REG (vmode, reg3 + 1); +} + [(set_attr "length" "8")]) + +;; Add all elements in a pair of V4SF vectors. +(define_insn_and_split "vpair_reduc_plus_scale_v8sf" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:OO 1 "vsx_register_operand" "v")] + UNSPEC_VPAIR_REDUCE_PLUS_F32)) + (clobber (match_scratch:V4SF 2 "=&v")) + (clobber (match_scratch:V4SF 3 "=&v"))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(pc)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx tmp1 = operands[2]; + rtx tmp2 = operands[3]; + unsigned r = reg_or_subregno (op1); + rtx op1_hi = gen_rtx_REG (V4SFmode, r); + rtx op1_lo = gen_rtx_REG (V4SFmode, r + 1); + + emit_insn (gen_addv4sf3 (tmp1, op1_hi, op1_lo)); + emit_insn (gen_altivec_vsldoi_v4sf (tmp2, tmp1, tmp1, GEN_INT (8))); + emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2)); + emit_insn (gen_altivec_vsldoi_v4sf (tmp1, tmp2, tmp2, GEN_INT (4))); + emit_insn (gen_addv4sf3 (tmp2, tmp1, tmp2)); + emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp2)); + DONE; +} + [(set_attr "length" "24")]) + +;; Add all elements in a pair of V2DF vectors +(define_insn_and_split "vpair_reduc_plus_scale_v4df" + [(set (match_operand:DF 0 "vsx_register_operand" "=&wa") + (unspec:DF [(match_operand:OO 1 "vsx_register_operand" "wa")] + UNSPEC_VPAIR_REDUCE_PLUS_F64)) + (clobber (match_scratch:DF 2 "=&wa")) + (clobber (match_scratch:V2DF 3 "=&wa"))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (plus:V2DF (match_dup 4) + (match_dup 5))) + (set (match_dup 2) + (vec_select:DF (match_dup 3) + (parallel [(match_dup 6)]))) + (set (match_dup 0) + (plus:DF (match_dup 7) + (match_dup 2)))] +{ + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg3 = reg_or_subregno (operands[3]); + + operands[4] = gen_rtx_REG (V2DFmode, reg1); + operands[5] = gen_rtx_REG (V2DFmode, reg1 + 1); + operands[6] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 0); + operands[7] = gen_rtx_REG (DFmode, reg3); +}) + + +;; Vector pair integer negate support. +(define_insn_and_split "vpair_neg_2" + [(set (match_operand:OO 0 "altivec_register_operand" "=v") + (unspec:OO [(neg:OO + (match_operand:OO 1 "altivec_register_operand" "v"))] + VPAIR_INT_WRAPPER)) + (clobber (match_scratch: 2 "="))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (minus: (match_dup 2) + (match_dup 5))) + (set (match_dup 6) (minus: (match_dup 2) + (match_dup 7)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[3] = CONST0_RTX (vmode); + + operands[4] = gen_rtx_REG (vmode, reg0); + operands[5] = gen_rtx_REG (vmode, reg1); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + + /* If the vector integer size is 32 or 64 bits, we can use the vneg{w,d} + instructions. */ + if (vmode == V4SImode) + { + emit_insn (gen_negv4si2 (operands[4], operands[5])); + emit_insn (gen_negv4si2 (operands[6], operands[7])); + DONE; + } + else if (vmode == V2DImode) + { + emit_insn (gen_negv2di2 (operands[4], operands[5])); + emit_insn (gen_negv2di2 (operands[6], operands[7])); + DONE; + } +} + [(set_attr "length" "8")]) + +;; Vector pair integer not support. +(define_insn_and_split "vpair_not_2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(not:OO (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 2) (not: (match_dup 3))) + (set (match_dup 4) (not: (match_dup 5)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + machine_mode vmode = mode; + + operands[2] = gen_rtx_REG (vmode, reg0); + operands[3] = gen_rtx_REG (vmode, reg1); + + operands[4] = gen_rtx_REG (vmode, reg0 + 1); + operands[5] = gen_rtx_REG (vmode, reg1 + 1); +} + [(set_attr "length" "8")]) + +;; Vector pair integer binary operations. +(define_insn_and_split "vpair__3" + [(set (match_operand:OO 0 "" "=") + (unspec:OO [(VPAIR_INT_BINARY:OO + (match_operand:OO 1 "" "") + (match_operand:OO 2 "" ""))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (VPAIR_INT_BINARY: (match_dup 4) + (match_dup 5))) + (set (match_dup 6) + (VPAIR_INT_BINARY: (match_dup 7) + (match_dup 8)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Optimize vector pair a & ~b +(define_insn_and_split "*vpair_andc_" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(and:OO + (unspec:OO + [(not:OO + (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER) + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (and: (not: (match_dup 4)) + (match_dup 5))) + (set (match_dup 6) + (and: (not: (match_dup 7)) + (match_dup 8)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Optimize vector pair a | ~b +(define_insn_and_split "*vpair_iorc_" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(ior:OO + (unspec:OO + [(not:OO + (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER) + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (ior: (not: (match_dup 4)) + (match_dup 5))) + (set (match_dup 6) + (ior: (not: (match_dup 7)) + (match_dup 8)))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Optiomize vector pair ~(a & b) or ((~a) | (~b)) +(define_insn_and_split "*vpair_nand__1" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(not:OO + (unspec:OO [(and:OO + (match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (ior: (not: (match_dup 4)) + (not: (match_dup 5)))) + (set (match_dup 6) + (ior: (not: (match_dup 7)) + (not: (match_dup 8))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "*vpair_nand__2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(ior:OO + (unspec:OO [(not:OO + (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER) + (unspec:OO [(not:OO + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (ior: (not: (match_dup 4)) + (not: (match_dup 5)))) + (set (match_dup 6) + (ior: (not: (match_dup 7)) + (not: (match_dup 8))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Optiomize vector pair ~(a | b) or ((~a) & (~b)) +(define_insn_and_split "*vpair_nor__1" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(not:OO + (unspec:OO [(ior:OO + (match_operand:OO 1 "vsx_register_operand" "wa") + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (and: (not: (match_dup 4)) + (not: (match_dup 5)))) + (set (match_dup 6) + (and: (not: (match_dup 7)) + (not: (match_dup 8))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +(define_insn_and_split "*vpair_nor__2" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO + [(ior:OO + (unspec:OO [(not:OO + (match_operand:OO 1 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER) + (unspec:OO [(not:OO + (match_operand:OO 2 "vsx_register_operand" "wa"))] + VPAIR_INT_WRAPPER))] + VPAIR_INT_WRAPPER))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 3) + (and: (not: (match_dup 4)) + (not: (match_dup 5)))) + (set (match_dup 6) + (and: (not: (match_dup 7)) + (not: (match_dup 8))))] +{ + unsigned reg0 = reg_or_subregno (operands[0]); + unsigned reg1 = reg_or_subregno (operands[1]); + unsigned reg2 = reg_or_subregno (operands[2]); + machine_mode vmode = mode; + + operands[3] = gen_rtx_REG (vmode, reg0); + operands[4] = gen_rtx_REG (vmode, reg1); + operands[5] = gen_rtx_REG (vmode, reg2); + + operands[6] = gen_rtx_REG (vmode, reg0 + 1); + operands[7] = gen_rtx_REG (vmode, reg1 + 1); + operands[8] = gen_rtx_REG (vmode, reg2 + 1); +} + [(set_attr "length" "8")]) + +;; Add all elements in a pair of V2DI vectors +(define_insn_and_split "vpair_reduc_plus_scale_v4di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=&r") + (unspec:DI [(match_operand:OO 1 "altivec_register_operand" "v")] + UNSPEC_VPAIR_REDUCE_PLUS_I64)) + (clobber (match_scratch:DI 2 "=&r")) + (clobber (match_scratch:V2DI 3 "=&v"))] + "TARGET_MMA && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 3) + (plus:V2DI (match_dup 4) + (match_dup 5))) + (set (match_dup 2) + (vec_select:DI (match_dup 3) + (parallel [(const_int 0)]))) + (set (match_dup 0) + (vec_select:DI (match_dup 3) + (parallel [(const_int 1)]))) + (set (match_dup 0) + (plus:DI (match_dup 0) + (match_dup 2)))] +{ + unsigned reg1 = reg_or_subregno (operands[1]); + + operands[4] = gen_rtx_REG (V2DImode, reg1); + operands[5] = gen_rtx_REG (V2DImode, reg1 + 1); +} + [(set_attr "length" "16")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 97eaacf8a7e..c3501f06572 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -19150,6 +19150,7 @@ The PVIPR documents the following overloaded functions: * PowerPC AltiVec Built-in Functions Available on ISA 2.07:: * PowerPC AltiVec Built-in Functions Available on ISA 3.0:: * PowerPC AltiVec Built-in Functions Available on ISA 3.1:: +* PowerPC Vector Pair Built-in Functions Available on ISA 3.1:: @end menu @node PowerPC AltiVec Built-in Functions on ISA 2.05 @@ -21103,6 +21104,123 @@ int vec_any_le (vector unsigned __int128, vector unsigned __int128); @end smallexample +@node PowerPC Vector Pair Built-in Functions Available on ISA 3.1 +@subsection PowerPC Vector Pair Built-in Functions Available on ISA 3.1 + +GCC provides functions to speed up processing by using +@code{__vector_pair} to hold two vectors. The load vector pair and +store vector pair instructions are used to load the values into +registers and store the values. The operation itself is split into +two separate vector instructions. To use the vector pair built-in +functions, you need to have MMA support enabled (@option{-mmma}, which +is enabled by default with @option{-mcpu=power10}). + +The following built-in functions are independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector float} values: + +@smallexample +__vector_pair __builtin_vpair_f32_abs (__vector_pair); +__vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_neg (__vector_pair); +__vector_pair __builtin_vpair_f32_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); +float __builtin_vpair_f32_add_elements (__vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector double} values: + +@smallexample +__vector_pair __builtin_vpair_f64_abs (__vector_pair); +__vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_neg (__vector_pair); +__vector_pair __builtin_vpair_f64_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); +double __builtin_vpair_f64_add_elements (__vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector long long} or @code{vector unsigned long long} values: + +@smallexample +__vector_pair __builtin_vpair_i64_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_ior (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_neg (__vector_pair); +__vector_pair __builtin_vpair_i64_not (__vector_pair); +__vector_pair __builtin_vpair_i64_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_sub (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_umax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_umin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i64_xor (__vector_pair, __vector_pair); +long long __builtin_vpair_i64_add_elements (__vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector int} or @code{vector unsigned int} values: + +@smallexample +__vector_pair __builtin_vpair_i32_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_ior (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_neg (__vector_pair); +__vector_pair __builtin_vpair_i32_not (__vector_pair); +__vector_pair __builtin_vpair_i32_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_sub (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_umax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_umin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i32_xor (__vector_pair, __vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector short} or @code{vector unsigned short} values: + +@smallexample +__vector_pair __builtin_vpair_i16_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_ior (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_neg (__vector_pair); +__vector_pair __builtin_vpair_i16_not (__vector_pair); +__vector_pair __builtin_vpair_i16_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_sub (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_umax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_umin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i16_xor (__vector_pair, __vector_pair); +@end smallexample + +The following built-in functions operate on pairs of +@code{vector signed char} or @code{vector unsigned char} values: + +@smallexample +__vector_pair __builtin_vpair_i8_add (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_and (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_ior (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_neg (__vector_pair); +__vector_pair __builtin_vpair_i8_not (__vector_pair); +__vector_pair __builtin_vpair_i8_smax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_smin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_sub (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_umax (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_umin (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_i8_xor (__vector_pair, __vector_pair); +@end smallexample + @node PowerPC Hardware Transactional Memory Built-in Functions @subsection PowerPC Hardware Transactional Memory Built-in Functions GCC provides two interfaces for accessing the Hardware Transactional