From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id 3BC433858281; Tue, 23 Jan 2024 07:10:54 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3BC433858281 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705993854; bh=iDixY0M8s4uMqCftCUTco4Ado7z2bBVNMbx7je524xY=; h=From:To:Subject:Date:From; b=tXAfWH8fQDxyy+93b/AzZl5VuasMdy7THXJ8o6qyi/+xeW0CiQQZ2PVhccYtThJik tMow1BdYO1KJa4URRdcS9YVc5jjteGDLhaxakj4QLu+JPdTmk8Fx+GXCuaoWFMOjUN 5eM4CBuVLTmfHzHsG+P5Bx5eR68yq+l7apUGc0U4= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work154-vpair)] Add vector pair init and splat. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work154-vpair X-Git-Oldrev: d6c48ffd5b8e43023e3efbdd1aa20b7830b82fb8 X-Git-Newrev: 0ef492d559dd4505072f9511510f6bba3faeb995 Message-Id: <20240123071054.3BC433858281@sourceware.org> Date: Tue, 23 Jan 2024 07:10:54 +0000 (GMT) List-Id: https://gcc.gnu.org/g:0ef492d559dd4505072f9511510f6bba3faeb995 commit 0ef492d559dd4505072f9511510f6bba3faeb995 Author: Michael Meissner Date: Tue Jan 23 02:09:44 2024 -0500 Add vector pair init and splat. 2024-01-23 Michael Meissner gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_zero): New built-in function. (__builtin_vpair_f32_splat): Likewise. (__builtin_vpair_f64_splat): Likewise. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_ZERO): New unspec. (UNSPEC_VPAIR_SPLAT): Likewise. (VPAIR_SPLAT_VMODE): New mode iterator. (VPAIR_SPLAT_ELEMENT_TO_VMODE): New mode attribute. (vpair_splat_name): Likewise. (vpair_zero): New insn. (vpair_splat_): New define_expand. (vpair_splat__internal): New insns. gcc/testsuite/ * gcc.target/powerpc/vector-pair-5.c: New test. * gcc.target/powerpc/vector-pair-6.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 10 +++ gcc/config/rs6000/vector-pair.md | 102 ++++++++++++++++++++++- gcc/doc/extend.texi | 9 ++ gcc/testsuite/gcc.target/powerpc/vector-pair-5.c | 56 +++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-6.c | 56 +++++++++++++ 5 files changed, 232 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 4362cbb8fc7..b757a8630ff 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4132,6 +4132,10 @@ void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); STXVP nothing {mma,pair} +;; Vector pair built-in functions. + v256 __builtin_vpair_zero (); + VPAIR_ZERO vpair_zero {mma} + ;; Vector pair built-in functions with float elements v256 __builtin_vpair_f32_abs (v256); VPAIR_F32_ABS vpair_abs_v8sf2 {mma} @@ -4169,6 +4173,9 @@ v256 __builtin_vpair_f32_nfms (v256, v256, v256); VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_splat (float); + VPAIR_F32_SPLAT vpair_splat_v8sf {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -4209,5 +4216,8 @@ v256 __builtin_vpair_f64_nfms (v256, v256, v256); VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_splat (double); + VPAIR_F64_SPLAT vpair_splat_v4df {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index 73ae46e6d40..39b419c6814 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -38,7 +38,9 @@ UNSPEC_VPAIR_NEG UNSPEC_VPAIR_PLUS UNSPEC_VPAIR_SMAX - UNSPEC_VPAIR_SMIN]) + UNSPEC_VPAIR_SMIN + UNSPEC_VPAIR_ZERO + UNSPEC_VPAIR_SPLAT]) ;; Vector pair element ID that defines the scaler element within the vector pair. (define_c_enum "vpair_element" @@ -98,6 +100,104 @@ ;; Map the scalar element ID into the appropriate insn type for divide. (define_int_attr vpair_divtype [(VPAIR_ELEMENT_FLOAT "vecfdiv") (VPAIR_ELEMENT_DOUBLE "vecdiv")]) + +;; Mode iterator for the vector modes that we provide splat operations for. +(define_mode_iterator VPAIR_SPLAT_VMODE [V4SF V2DF]) + +;; Map element mode to 128-bit vector mode for splat operations +(define_mode_attr VPAIR_SPLAT_ELEMENT_TO_VMODE [(SF "V4SF") + (DF "V2DF")]) + +;; Map either element mode or vector mode into the name for the splat insn. +(define_mode_attr vpair_splat_name [(SF "v8sf") + (DF "v4df") + (V4SF "v8sf") + (V2DF "v4df")]) + +;; Initialize a vector pair to 0 +(define_insn_and_split "vpair_zero" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa") + (unspec:OO [(const_int 0)] UNSPEC_VPAIR_ZERO))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(set (match_dup 1) (match_dup 3)) + (set (match_dup 2) (match_dup 3))] +{ + rtx op0 = operands[0]; + + operands[1] = simplify_gen_subreg (V2DFmode, op0, OOmode, 0); + operands[2] = simplify_gen_subreg (V2DFmode, op0, OOmode, 16); + operands[3] = CONST0_RTX (V2DFmode); +} + [(set_attr "length" "8") + (set_attr "type" "vecperm")]) + +;; Create a vector pair with a value splat'ed (duplicated) to all of the +;; elements. +(define_expand "vpair_splat_" + [(use (match_operand:OO 0 "vsx_register_operand")) + (use (match_operand:SFDF 1 "input_operand"))] + "TARGET_MMA" +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + machine_mode element_mode = mode; + + if (op1 == CONST0_RTX (element_mode)) + { + emit_insn (gen_vpair_zero (op0)); + DONE; + } + + machine_mode vector_mode = mode; + rtx vec = gen_reg_rtx (vector_mode); + unsigned num_elements = GET_MODE_NUNITS (vector_mode); + rtvec elements = rtvec_alloc (num_elements); + for (size_t i = 0; i < num_elements; i++) + RTVEC_ELT (elements, i) = copy_rtx (op1); + + rs6000_expand_vector_init (vec, gen_rtx_PARALLEL (vector_mode, elements)); + emit_insn (gen_vpair_splat__internal (op0, vec)); + DONE; +}) + +;; Inner splat support. Operand1 is the vector splat created above. Allow +;; operand 1 to overlap with the output registers to eliminate one move +;; instruction. +(define_insn_and_split "vpair_splat__internal" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:VPAIR_SPLAT_VMODE 1 "vsx_register_operand" "0,wa")] + UNSPEC_VPAIR_SPLAT))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op0_a = simplify_gen_subreg (mode, op0, OOmode, 0); + rtx op0_b = simplify_gen_subreg (mode, op0, OOmode, 16); + rtx op1 = operands[1]; + unsigned op1_regno = reg_or_subregno (op1); + + /* Check if the input is one of the output registers. */ + if (op1_regno == reg_or_subregno (op0_a)) + emit_move_insn (op0_b, op1); + + else if (op1_regno == reg_or_subregno (op0_b)) + emit_move_insn (op0_a, op1); + + else + { + emit_move_insn (op0_a, op1); + emit_move_insn (op0_b, op1); + } + + DONE; +} + [(set_attr "length" "*,8") + (set_attr "type" "vecmove")]) ;; Vector pair unary operations. The last argument in the UNSPEC is a ;; CONST_INT which identifies what the scalar element is. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 08d977515dc..d455d0c5624 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -23887,6 +23887,13 @@ The @code{nfma} built-in is a combination of @code{neg} of the The @code{nfms} built-in is a combination of @code{neg} of the @code{fms} built-in. +The following built-in function is independent on the type of the +underlying vector: + +@smallexample +__vector_pair __builtin_vpair_zero (); +@end smallexample + The following built-in functions operate on pairs of @code{vector float} values: @@ -23907,6 +23914,7 @@ __vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_splat (float); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -23930,6 +23938,7 @@ __vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_splat (double); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c new file mode 100644 index 00000000000..9b645e626e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-5.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + doubles. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f64_splat (0.0); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltidp, xxlor. */ + *p = __builtin_vpair_f64_splat (1.0); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* plxv, xxlor (note, we cannot use xxspltidp). */ + *p = __builtin_vpair_f64_splat (3.1415926535); +} + +void +test_splat_arg (__vector_pair *p, double x) +{ + /* xxpermdi, xxlor. */ + *p = __builtin_vpair_f64_splat (x); +} + +void +test_splat_mem (__vector_pair *p, double *q) +{ + /* lxvdsx, xxlor. */ + *p = __builtin_vpair_f64_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvdsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mp?lxvx?\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c new file mode 100644 index 00000000000..5ec53d4bfc3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-6.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected instructions for + vector pairs zero and splat functions for vector pairs containing + floats. */ + +void +test_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_zero (); +} + +void +test_splat_zero (__vector_pair *p) +{ + /* 2 xxspltib/xxlxor. */ + *p = __builtin_vpair_f32_splat (0.0f); +} + +void +test_splat_one (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (1.0f); +} + +void +test_splat_pi (__vector_pair *p) +{ + /* xxspltiw, xxlor. */ + *p = __builtin_vpair_f32_splat (3.1415926535f); +} + +void +test_splat_arg (__vector_pair *p, float x) +{ + /* xscvdpspn, xxspltw, xxlor. */ + *p = __builtin_vpair_f32_splat (x); +} + +void +test_splat_mem (__vector_pair *p, float *q) +{ + /* xlvwsx, xxlor. */ + *p = __builtin_vpair_f32_splat (*q); +} + +/* { dg-final { scan-assembler-times {\mlxvwsx\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxscvdpspn\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M|\mxxlxor\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltw\M} 1 } } */