From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id F22DC3858C60; Mon, 18 Oct 2021 18:51:48 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org F22DC3858C60 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work071)] Generate XXSPLTIW on power10. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work071 X-Git-Oldrev: d8efdc5887a864b8842e7d4053d9cff4e5324a64 X-Git-Newrev: 4af9bc8155455aade656a31a820d222346406501 Message-Id: <20211018185148.F22DC3858C60@sourceware.org> Date: Mon, 18 Oct 2021 18:51:48 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 18 Oct 2021 18:51:49 -0000 https://gcc.gnu.org/g:4af9bc8155455aade656a31a820d222346406501 commit 4af9bc8155455aade656a31a820d222346406501 Author: Michael Meissner Date: Mon Oct 18 14:51:29 2021 -0400 Generate XXSPLTIW on power10. This patch adds support to automatically generate the ISA 3.1 XXSPLTIW instruction for V8HImode, V4SImode, and V4SFmode vectors. It does this by adding support for vector constants that can be used, and adding a VEC_DUPLICATE pattern to generate the actual XXSPLTIW instruction. The eP constraint added with the XXSPLTIDP patch will also recognize use of the XXSPLTIW instruction. I rewrote the XXSPLTW built-in functions to use VEC_DUPLICATE instead of UNSPEC. I added 4 new tests to test loading up V16QI, V8HI, V4SI, and V4SF vector constants. 2021-10-18 Michael Meissner gcc/ * config/rs6000/predicates.md (easy_fp_constant): Add support for XXSPLTIW. (vsx_prefixed_constant): Likewise. (easy_vector_constant): Likewise. * config/rs6000/rs6000-protos.h (vec_const_use_xxspltiw): New declaration. * config/rs6000/rs6000.c (xxspltib_constant_p): If we can generate XXSPLTIW, don't do XXSPLTIB and sign extend. (output_vec_const_move): Add support for XXSPLTIW. (prefixed_xxsplti_p): Recognize XXSPLTIW instructions as prefixed. (vec_const_use_xxspltiw): New function. * config/rs6000/rs6000.md (UNSPEC_XXSPLTIW_CONST): New unspec. (xxspltiw__internal): New insns. (VSX prefixed constant splitter): Add XXSPLTIW support. * config/rs6000/rs6000.opt (-mxxspltiw): New debug switch. * config/rs6000/vsx.md (vsx_mov_64bit): Update comment. (vsx_mov_32bit): Likewise. gcc/testsuite/ * gcc.target/powerpc/vec-splat-constant-v16qi.c: New test. * gcc.target/powerpc/vec-splat-constant-v4sf.c: New test. * gcc.target/powerpc/vec-splat-constant-v4si.c: New test. * gcc.target/powerpc/vec-splat-constant-v8hi.c: New test. * gcc.target/powerpc/vec-splati-runnable.c: Update insn count. Diff: --- gcc/config/rs6000/predicates.md | 14 +++++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 43 ++++++++++++++ gcc/config/rs6000/rs6000.md | 17 ++++++ gcc/config/rs6000/rs6000.opt | 4 ++ gcc/config/rs6000/vsx.md | 4 +- .../gcc.target/powerpc/vec-splat-constant-v16qi.c | 27 +++++++++ .../gcc.target/powerpc/vec-splat-constant-v4sf.c | 67 ++++++++++++++++++++++ .../gcc.target/powerpc/vec-splat-constant-v4si.c | 51 ++++++++++++++++ .../gcc.target/powerpc/vec-splat-constant-v8hi.c | 62 ++++++++++++++++++++ .../gcc.target/powerpc/vec-splati-runnable.c | 2 +- 11 files changed, 289 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 4b2bbdf40e8..40c4cba68ff 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -608,6 +608,9 @@ { if (vec_const_use_xxspltidp (&vec_const)) return true; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; } /* Otherwise consider floating point constants hard, so that the @@ -644,6 +647,14 @@ if (!vec_const_to_bytes (op, mode, &vec_const)) return false; + /* If we can generate the constant with 1-2 Altivec instructions, don't + generate a prefixed instruction. */ + if (CONST_VECTOR_P (op) && easy_altivec_constant (op, mode)) + return false; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; + if (vec_const_use_xxspltidp (&vec_const)) return true; @@ -706,6 +717,9 @@ { if (vec_const_use_xxspltidp (&vec_const)) return true; + + if (vec_const_use_xxspltiw (&vec_const)) + return true; } return easy_altivec_constant (op, mode); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 8eef955237a..b12f6b10c13 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -249,6 +249,7 @@ typedef struct { extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *); extern bool vec_const_use_xxspltidp (rs6000_vec_const *); +extern bool vec_const_use_xxspltiw (rs6000_vec_const *); #endif /* RTX_CODE */ #ifdef TREE_CODE diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 353ec2b572d..20226169ba2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -6939,6 +6939,11 @@ xxspltib_constant_p (rtx op, else if (IN_RANGE (value, -1, 0)) *num_insns_ptr = 1; + /* If we can generate XXSPLTIW, don't generate XXSPLTIB and a sign extend + operation. */ + else if (vsx_prefixed_constant (op, mode)) + return false; + else *num_insns_ptr = 2; @@ -6998,6 +7003,12 @@ output_vec_const_move (rtx *operands) operands[2] = GEN_INT (vec_const.xxspltidp_immediate); return "xxspltidp %x0,%2"; } + + if (vec_const_use_xxspltiw (&vec_const)) + { + operands[2] = GEN_INT (vec_const.words[0]); + return "xxspltiw %x0,%2"; + } } if (TARGET_P9_VECTOR @@ -28784,6 +28795,38 @@ vec_const_use_xxspltidp (rs6000_vec_const *vec_const) return true; } +/* Determine if a vector constant can be loaded with XXSPLTIW. */ + +bool +vec_const_use_xxspltiw (rs6000_vec_const *vec_const) +{ + if (!TARGET_XXSPLTIW || !TARGET_PREFIXED || !TARGET_VSX) + return false; + + if (!vec_const->all_words_same) + return false; + + /* If we can use XXSPLTIB, don't generate XXSPLTIW. */ + if (vec_const->all_bytes_same) + return false; + + /* See if we can use VSPLTISH or VSPLTISW. */ + if (vec_const->all_half_words_same) + { + unsigned short h_word = vec_const->half_words[0]; + short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000; + if (EASY_VECTOR_15 (sign_h_word)) + return false; + } + + unsigned int word = vec_const->words[0]; + int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000; + if (EASY_VECTOR_15 (sign_word)) + return false; + + return true; +} + /* Convert a vector constant to an internal structure, breaking it out to bytes, half words, words, and double words. Return true if we have successfully broken it out. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 5d830e0db15..1963eb01ed7 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -157,6 +157,7 @@ UNSPEC_HASHST UNSPEC_HASHCHK UNSPEC_XXSPLTIDP_CONST + UNSPEC_XXSPLTIW_CONST ]) ;; @@ -8232,6 +8233,15 @@ [(set_attr "type" "vecperm") (set_attr "prefixed" "yes")]) +(define_insn "xxspltiw__internal" + [(set (match_operand:SFDF 0 "register_operand" "=wa") + (unspec:SFDF [(match_operand:SI 1 "c32bit_cint_operand" "n")] + UNSPEC_XXSPLTIW_CONST))] + "TARGET_POWER10" + "xxspltidp %x0,%1" + [(set_attr "type" "vecperm") + (set_attr "prefixed" "yes")]) + (define_split [(set (match_operand:SFDF 0 "vsx_register_operand") (match_operand:SFDF 1 "vsx_prefixed_constant"))] @@ -8252,6 +8262,13 @@ DONE; } + if (vec_const_use_xxspltiw (&vec_const)) + { + rtx imm = GEN_INT (vec_const.words[0]); + emit_insn (gen_xxspltiw__internal (dest, imm)); + DONE; + } + else gcc_unreachable (); }) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 1d7ce4cc94a..332f61be0ba 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -644,6 +644,10 @@ mxxspltidp Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save Generate (do not generate) XXSPLTIDP instructions. +mxxspltiw +Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save +Generate (do not generate) XXSPLTIW instructions. + -param=rs6000-density-pct-threshold= Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param When costing for loop vectorization, we probably need to penalize the loop body diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index c8518496339..0ceecc1975c 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1192,7 +1192,7 @@ ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW -;; XXLSPLTIDP +;; XXLSPLTI* ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) (define_insn "vsx_mov_64bit" [(set (match_operand:VSX_M 0 "nonimmediate_operand" @@ -1241,7 +1241,7 @@ ;; VSX store VSX load VSX move GPR load GPR store GPR move ;; XXSPLTIB VSPLTISW VSX 0/-1 -;; XXSPLTIDP +;; XXSPLTI* ;; VMX const GPR const ;; LVX (VMX) STVX (VMX) (define_insn "*vsx_mov_32bit" diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c new file mode 100644 index 00000000000..2707d86e6fd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v16qi.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V16HI vector constants where the + first 4 elements are the same as the next 4 elements, etc. */ + +vector unsigned char +v16qi_const_1 (void) +{ + return (vector unsigned char) { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, }; /* VSLTPISB. */ +} + +vector unsigned char +v16qi_const_2 (void) +{ + return (vector unsigned char) { 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mvspltisb\M|\mxxspltib\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c new file mode 100644 index 00000000000..05d4ee3f5cb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4sf.c @@ -0,0 +1,67 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SF vector constants. */ + +vector float +v4sf_const_1 (void) +{ + return (vector float) { 1.0f, 1.0f, 1.0f, 1.0f }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_nan (void) +{ + return (vector float) { __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf (""), + __builtin_nanf ("") }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_inf (void) +{ + return (vector float) { __builtin_inff (), + __builtin_inff (), + __builtin_inff (), + __builtin_inff () }; /* XXSPLTIW. */ +} + +vector float +v4sf_const_m0 (void) +{ + return (vector float) { -0.0f, -0.0f, -0.0f, -0.0f }; /* XXSPLTIB/VSLW. */ +} + +vector float +v4sf_splats_1 (void) +{ + return vec_splats (1.0f); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_nan (void) +{ + return vec_splats (__builtin_nanf ("")); /* XXSPLTIW. */ +} + +vector float +v4sf_splats_inf (void) +{ + return vec_splats (__builtin_inff ()); /* XXSPLTIW. */ +} + +vector float +v8hi_splats_m0 (void) +{ + return vec_splats (-0.0f); /* XXSPLTIB/VSLW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mxxspltib\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mvslw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c new file mode 100644 index 00000000000..da909e948b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v4si.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V4SI vector constants. We make sure + the power9 support (XXSPLTIB/VEXTSB2W) is not done. */ + +vector int +v4si_const_1 (void) +{ + return (vector int) { 1, 1, 1, 1 }; /* VSLTPISW. */ +} + +vector int +v4si_const_126 (void) +{ + return (vector int) { 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector int +v4si_const_1023 (void) +{ + return (vector int) { 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector int +v4si_splats_1 (void) +{ + return vec_splats (1); /* VSLTPISW. */ +} + +vector int +v4si_splats_126 (void) +{ + return vec_splats (126); /* XXSPLTIW. */ +} + +vector int +v8hi_splats_1023 (void) +{ + return vec_splats (1023); /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mvspltisw\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvextsb2w\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c new file mode 100644 index 00000000000..290e05d4a64 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v8hi.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mxxspltiw" } */ + +#include + +/* Test whether XXSPLTIW is generated for V8HI vector constants. We make sure + the power9 support (XXSPLTIB/VUPKLSB) is not done. */ + +vector short +v8hi_const_1 (void) +{ + return (vector short) { 1, 1, 1, 1, 1, 1, 1, 1 }; /* VSLTPISH. */ +} + +vector short +v8hi_const_126 (void) +{ + return (vector short) { 126, 126, 126, 126, + 126, 126, 126, 126 }; /* XXSPLTIW. */ +} + +vector short +v8hi_const_1023 (void) +{ + return (vector short) { 1023, 1023, 1023, 1023, + 1023, 1023, 1023, 1023 }; /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1 (void) +{ + return vec_splats ((short)1); /* VSLTPISH. */ +} + +vector short +v8hi_splats_126 (void) +{ + return vec_splats ((short)126); /* XXSPLTIW. */ +} + +vector short +v8hi_splats_1023 (void) +{ + return vec_splats ((short)1023); /* XXSPLTIW. */ +} + +/* Test that we can optimiza V8HI where all of the even elements are the same + and all of the odd elements are the same. */ +vector short +v8hi_const_1023_1000 (void) +{ + return (vector short) { 1023, 1000, 1023, 1000, + 1023, 1000, 1023, 1000 }; /* XXSPLTIW. */ +} + +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 5 } } */ +/* { dg-final { scan-assembler-times {\mvspltish\M} 2 } } */ +/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */ +/* { dg-final { scan-assembler-not {\mvupklsb\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mplxv\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c index 5f84930e1a7..6c01666b625 100644 --- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c @@ -149,7 +149,7 @@ main (int argc, char *argv []) return 0; } -/* { dg-final { scan-assembler-times {\mxxspltiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxspltiw\M} 3 } } */ /* { dg-final { scan-assembler-times {\mxxspltidp\M} 3 } } */ /* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */