From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1005) id A743D38346B4; Thu, 21 Apr 2022 18:24:48 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A743D38346B4 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Michael Meissner To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work086)] Add -mstore-vector-pair and -mno-store-vector-pair. X-Act-Checkin: gcc X-Git-Author: Michael Meissner X-Git-Refname: refs/users/meissner/heads/work086 X-Git-Oldrev: a072126e929c37b1976bb091e5236004bbaecb24 X-Git-Newrev: 0a1fcebce6f3c83c8ee6c4ef2f41dc3598812a18 Message-Id: <20220421182448.A743D38346B4@sourceware.org> Date: Thu, 21 Apr 2022 18:24:48 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 21 Apr 2022 18:24:48 -0000 https://gcc.gnu.org/g:0a1fcebce6f3c83c8ee6c4ef2f41dc3598812a18 commit 0a1fcebce6f3c83c8ee6c4ef2f41dc3598812a18 Author: Michael Meissner Date: Thu Apr 21 14:24:26 2022 -0400 Add -mstore-vector-pair and -mno-store-vector-pair. 2022-04-21 Michael Meissner gcc/ * config/rs6000/mma.md (movoo): Delete. (movoo_stxvp): New insn for -mstore-vector-pair. (movoo_no_stxvp): New insn for -mno-store-vector-pair. (movxo): Delete. (movxo_stxvp): New insn for -mstore-vector-pair. (movxo_no_stxvp): New insn for -mno-store-vector-pair. * config/rs6000/rs6000.opt (-mstore-vector-pair): New option. * doc/invoke.texi (RS/6000 & PowerPC Options): Document -mstore-vector-pair and -mno-store-vector-pair. gcc/testsuite/ * gcc.target/powerpc/p10-store-vector-pair-1.c: New test. * gcc.target/powerpc/p10-store-vector-pair-2.c: New test. Diff: --- gcc/config/rs6000/mma.md | 53 ++++++++++++-- gcc/config/rs6000/rs6000.opt | 4 ++ gcc/doc/invoke.texi | 8 ++- .../gcc.target/powerpc/p10-store-vector-pair-1.c | 82 ++++++++++++++++++++++ .../gcc.target/powerpc/p10-store-vector-pair-2.c | 81 +++++++++++++++++++++ 5 files changed, 223 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 907c9d6d516..6715da6226d 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -274,10 +274,12 @@ DONE; }) -(define_insn_and_split "*movoo" +;; Possibly generate store vector pair instructions or split them to two +;; separate store vector instructions. +(define_insn_and_split "*movoo_stxvp" [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,m,wa") (match_operand:OO 1 "input_operand" "m,wa,wa"))] - "TARGET_MMA + "TARGET_MMA && TARGET_STORE_VECTOR_PAIR && (gpc_reg_operand (operands[0], OOmode) || gpc_reg_operand (operands[1], OOmode))" "@ @@ -295,6 +297,27 @@ (set_attr "size" "256") (set_attr "length" "*,*,8")]) +(define_insn_and_split "*movoo_no_stxvp" + [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,oQ,wa") + (match_operand:OO 1 "input_operand" "m,wa,wa"))] + "TARGET_MMA && !TARGET_STORE_VECTOR_PAIR + && (gpc_reg_operand (operands[0], OOmode) + || gpc_reg_operand (operands[1], OOmode))" + "@ + lxvp%X1 %x0,%1 + # + #" + "&& reload_completed && !MEM_P (operands[1])" + [(const_int 0)] +{ + rs6000_split_multireg_move (operands[0], operands[1]); + DONE; +} + [(set_attr "type" "vecload,vecstore,veclogical") + (set_attr "size" "256") + (set_attr "length" "*,8,8") + (set_attr "max_prefixed_insns" "*,2,*")]) + ;; Vector quad support. XOmode can only live in FPRs. (define_expand "movxo" @@ -306,10 +329,12 @@ DONE; }) -(define_insn_and_split "*movxo" +;; Possibly generate store vector pair instructions or split them to two +;; separate store vector instructions. +(define_insn_and_split "*movxo_stxvp" [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d") (match_operand:XO 1 "input_operand" "m,d,d"))] - "TARGET_MMA + "TARGET_MMA && TARGET_STORE_VECTOR_PAIR && (gpc_reg_operand (operands[0], XOmode) || gpc_reg_operand (operands[1], XOmode))" "@ @@ -326,6 +351,26 @@ (set_attr "length" "*,*,16") (set_attr "max_prefixed_insns" "2,2,*")]) +(define_insn_and_split "*movxo_no_stxvp" + [(set (match_operand:XO 0 "nonimmediate_operand" "=d,m,d") + (match_operand:XO 1 "input_operand" "m,d,d"))] + "TARGET_MMA && !TARGET_STORE_VECTOR_PAIR + && (gpc_reg_operand (operands[0], XOmode) + || gpc_reg_operand (operands[1], XOmode))" + "@ + # + # + #" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_multireg_move (operands[0], operands[1]); + DONE; +} + [(set_attr "type" "vecload,vecstore,veclogical") + (set_attr "length" "*,*,16") + (set_attr "max_prefixed_insns" "2,4,*")]) + (define_expand "vsx_assemble_pair" [(match_operand:OO 0 "vsx_register_operand") (match_operand:V16QI 1 "mma_assemble_input_operand") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 6c4caf4c9ee..0330eb6c60e 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -624,6 +624,10 @@ mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. +mstore-vector-pair +Target Var(TARGET_STORE_VECTOR_PAIR) Init(1) Save +Generate (do not generate) code that uses the store vector pair instructions. + -param=rs6000-density-pct-threshold= Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param When costing for loop vectorization, we probably need to penalize the loop body diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a55bd047509..d7f1caf8fb4 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1275,7 +1275,7 @@ See RS/6000 and PowerPC Options. -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol -mstack-protector-guard-offset=@var{offset} -mprefixed -mno-prefixed @gol -mpcrel -mno-pcrel -mmma -mno-mmma -mrop-protect -mno-rop-protect @gol --mprivileged -mno-privileged} +-mprivileged -mno-privileged -mstore-vector-pair -mno-store-vector-pair} @emph{RX Options} @gccoptlist{-m64bit-doubles -m32bit-doubles -fpu -nofpu@gol @@ -29359,6 +29359,12 @@ Generate (do not generate) code that will run in privileged state. @opindex no-block-ops-unaligned-vsx Generate (do not generate) unaligned vsx loads and stores for inline expansion of @code{memcpy} and @code{memmove}. + +@item -mstore-vector-pair +@itemx -mno-store-vector-pair +@opindex mstore-vector-pair +@opindex mno-store-vector-pair +Generate (do not generate) code that uses the store vector pair instructions. @end table @node RX Options diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c new file mode 100644 index 00000000000..197ec2e058e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-1.c @@ -0,0 +1,82 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mstore-vector-pair -mmma" } */ + +/* Test if we generate store vector pair instructions if the user uses the + -mstore-vector-pair option. */ +static __vector_quad sq; +static __vector_pair sp; + +void +load_store_pair (__vector_pair *p, __vector_pair *q) +{ + *p = *q; /* lxvp, stxvp. */ +} + +void +load_store_pair_1 (__vector_pair *p, __vector_pair *q) +{ + p[1] = q[1]; /* lxvp, stxvp. */ +} + +void +load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q) +{ + p[0x10000] = q[0x10000]; /* plxvp, pstxvp. */ +} + +void +load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n) +{ + p[n] = q[n]; /* lxvpx, 2x stxvp. */ +} + +void +load_pair_static (__vector_pair *p) +{ + *p = sp; /* plxvp, stxvp. */ +} + +void +store_pair_static (__vector_pair *p) +{ + sp = *p; /* lxvp, pstxvp. */ +} + +void +load_store_quad (__vector_quad *p, __vector_quad *q) +{ + *p = *q; /* 2x lxvp, 2x stxvp. */ +} + +void +load_store_quad_1 (__vector_quad *p, __vector_quad *q) +{ + p[1] = q[1]; /* 2x lxvp, 2x stxvp. */ +} + +void +load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q) +{ + p[0x10000] = q[0x10000]; /* 2x plxvp, 2x pstxvp. */ +} + +void +load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n) +{ + p[n] = q[n]; /* 2x lxvp, 2x stxv. */ +} + +void +load_quad_static (__vector_quad *p) +{ + *p = sq; /* 2x plxvp, 2x stxvp. */ +} + +void +store_quad_static (__vector_quad *p) +{ + sq = *p; /* 2x lxvp, 2x stxvp. */ +} + +/* { dg-final { scan-assembler {\mp?vstxvpx?\M} } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c new file mode 100644 index 00000000000..b8c3bdbfd89 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p10-store-vector-pair-2.c @@ -0,0 +1,81 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -mno-store-vector-pair -mmma" } */ + +/* Test if we do not generate store vector pair instructions if the user uses + the -mno-store-vector-pair option. */ +static __vector_quad sq; +static __vector_pair sp; + +void +load_store_pair (__vector_pair *p, __vector_pair *q) +{ + *p = *q; /* lxvp, 2x stxv. */ +} + +void +load_store_pair_1 (__vector_pair *p, __vector_pair *q) +{ + p[1] = q[1]; /* lxvp, 2x stxv. */ +} + +void +load_store_pair_0x10000 (__vector_pair *p, __vector_pair *q) +{ + p[0x10000] = q[0x10000]; /* plxvp, 2x pstxv. */ +} + +void +load_store_pair_n (__vector_pair *p, __vector_pair *q, unsigned long n) +{ + p[n] = q[n]; /* lxvpx, 2x stxv. */ +} + +void +load_pair_static (__vector_pair *p) +{ + *p = sp; /* plxvp, 2x stxv. */ +} + +void +store_pair_static (__vector_pair *p) +{ + sp = *p; /* lxvp, 2x pstxv. */ +} + +void +load_store_quad (__vector_quad *p, __vector_quad *q) +{ + *p = *q; /* 2x lxvp, 4x stxv. */ +} + +void +load_store_quad_1 (__vector_quad *p, __vector_quad *q) +{ + p[1] = q[1]; /* 2x lxvp, 4x stxv. */ +} + +void +load_store_quad_0x10000 (__vector_quad *p, __vector_quad *q) +{ + p[0x10000] = q[0x10000]; /* 2x plxvp, 4x pstxv. */ +} + +void +load_store_quad_n (__vector_quad *p, __vector_quad *q, unsigned long n) +{ + p[n] = q[n]; /* 2x lxvp, 4x stxv. */ +} + +void +load_quad_static (__vector_quad *p) +{ + *p = sq; /* 2x plxvp, 4x stxv. */ +} + +void +store_quad_static (__vector_quad *p) +{ + sq = *p; /* 2x lxvp, 4x pstxv. */ +} + +/* { dg-final { scan-assembler-not {\mp?vstxvpx?\M} } } */