public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Michael Meissner <meissner@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/users/meissner/heads/work084)] Revert patch. Date: Sat, 2 Apr 2022 02:24:11 +0000 (GMT) [thread overview] Message-ID: <20220402022411.E1FA13858D1E@sourceware.org> (raw) https://gcc.gnu.org/g:83e94819074f54e1c7c44d01d7c0d221a2a268fc commit 83e94819074f54e1c7c44d01d7c0d221a2a268fc Author: Michael Meissner <meissner@linux.ibm.com> Date: Fri Apr 1 22:23:40 2022 -0400 Revert patch. 2022-04-01 Michael Meissner <meissner@linux.ibm.com> gcc/ Revert patch. * config/rs6000/vsx.md (vsx_lxvr<wd>x): Add support for loading to GPR registers. (vsx_stxvr<wd>x): Add support for storing from GPR registers. (zero_extendditi2): New insn. gcc/testsuite/ Revert patch. * gcc.target/powerpc/vsx-load-element-extend-int.c: Use -O2 instead of -O0 and update insn counts. * gcc.target/powerpc/vsx-load-element-extend-short.c: Likewise. * gcc.target/powerpc/zero-extend-di-ti.c: New test. Diff: --- gcc/config/rs6000/vsx.md | 82 +++------------------- .../powerpc/vsx-load-element-extend-int.c | 36 ++++++---- .../powerpc/vsx-load-element-extend-short.c | 35 +++++---- .../gcc.target/powerpc/zero-extend-di-ti.c | 62 ---------------- 4 files changed, 51 insertions(+), 164 deletions(-) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index ad971e3a1de..c091e5e2f47 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1315,32 +1315,14 @@ } }) -;; Load rightmost element from load_data using lxvrbx, lxvrhx, lxvrwx, lxvrdx. -;; Support TImode being in a GPR register to prevent generating lvxr{d,w,b}x -;; and then two direct moves if we ultimately need the value in a GPR register. -(define_insn_and_split "vsx_lxvr<wd>x" - [(set (match_operand:TI 0 "register_operand" "=r,wa") - (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "m,Z")))] - "TARGET_POWERPC64 && TARGET_POWER10" - "@ - # - lxvr<wd>x %x0,%y1" - "&& reload_completed && int_reg_operand (operands[0], TImode)" - [(set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (const_int 0))] -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - - operands[2] = gen_lowpart (DImode, op0); - operands[3] = (<MODE>mode == DImode - ? op1 - : gen_rtx_ZERO_EXTEND (DImode, op1)); - - operands[4] = gen_highpart (DImode, op0); -} - [(set_attr "type" "load,vecload") - (set_attr "num_insns" "2,*")]) +;; Load rightmost element from load_data +;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx. +(define_insn "vsx_lxvr<wd>x" + [(set (match_operand:TI 0 "vsx_register_operand" "=wa") + (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))] + "TARGET_POWER10" + "lxvr<wd>x %x0,%y1" + [(set_attr "type" "vecload")]) ;; Store rightmost element into store_data ;; using stxvrbx, stxvrhx, strvxwx, strvxdx. @@ -5037,54 +5019,6 @@ DONE; }) -;; Zero extend DI to TI. If we don't have the MTVSRDD instruction (and LXVRDX -;; in the case of power10), we use the machine independent code. If we are -;; loading up GPRs, we fall back to the old code. -(define_insn_and_split "zero_extendditi2" - [(set (match_operand:TI 0 "register_operand" "=r,r, wa,&wa") - (zero_extend:TI (match_operand:DI 1 "register_operand" "r,wa,r, wa")))] - "TARGET_POWERPC64 && TARGET_P9_VECTOR" - "@ - # - # - mtvsrdd %x0,0,%1 - #" - "&& reload_completed - && (int_reg_operand (operands[0], TImode) - || vsx_register_operand (operands[1], DImode))" - [(pc)] -{ - rtx dest = operands[0]; - rtx src = operands[1]; - int dest_regno = reg_or_subregno (dest); - - /* Handle conversion to GPR registers. Load up the low part and then do - zero out the upper part. */ - if (INT_REGNO_P (dest_regno)) - { - rtx dest_hi = gen_highpart (DImode, dest); - rtx dest_lo = gen_lowpart (DImode, dest); - - emit_move_insn (dest_lo, src); - emit_move_insn (dest_hi, const0_rtx); - DONE; - } - - /* For settomg a VSX register from another VSX register, clear the result - register, and use XXPERMDI to shift the value into the lower 64-bits. */ - rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); - rtx dest_di = gen_rtx_REG (DImode, dest_regno); - - emit_move_insn (dest_v2di, CONST0_RTX (V2DImode)); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_vsx_concat_v2di (dest_v2di, dest_di, src)); - else - emit_insn (gen_vsx_concat_v2di (dest_v2di, src, dest_di)); - DONE; -} - [(set_attr "type" "integer,mfvsr,vecmove,vecperm") - (set_attr "length" "8, 8, *, 8")]) - ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on ;; power10. On earlier systems, the machine independent code will generate a ;; shift left to sign extend the 64-bit value to 128-bit. diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c index 1f1281d6b75..c40e1a3a0f7 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c @@ -6,25 +6,33 @@ /* { dg-do compile { target { ! power10_hw } } } */ /* { dg-require-effective-target power10_ok } */ /* { dg-require-effective-target int128 } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ + +/* Deliberately set optization to zero for this test to confirm + the lxvr*x instruction is generated. At higher optimization levels + the instruction we are looking for is sometimes replaced by other + load instructions. */ +/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */ + /* { dg-final { scan-assembler-times {\mlxvrwx\M} 2 } } */ #define NUM_VEC_ELEMS 4 #define ITERS 16 -/* Codegen at time of writing is a single lxvrwx for the zero extended test, - and a lxvrwx + vexts* sign extension instructions for the sign extended - test. - - 0000000000000000 <test_sign_extended_load>: - 0: 9b 18 44 7c lxvrwx vs34,r4,r3 - 4: 02 16 5a 10 vextsw2d v2,v2 - 8: 02 16 5b 10 vextsd2q v2,v2 - c: 20 00 80 4e blr - - 0000000000000020 <test_zero_extended_unsigned_load>: - 20: 9b 18 44 7c lxvrwx vs34,r4,r3 - 24: 20 00 80 4e blr */ +/* +Codegen at time of writing is a single lxvrwx for the zero +extended test, and a lwax,mtvsrdd,vextsd2q for the sign +extended test. + +0000000010000c90 <test_sign_extended_load>: + 10000c90: aa 1a 24 7d lwax r9,r4,r3 + 10000c94: 67 4b 40 7c mtvsrdd vs34,0,r9 + 10000c98: 02 16 5b 10 vextsd2q v2,v2 + 10000c9c: 20 00 80 4e blr + +0000000010000cb0 <test_zero_extended_unsigned_load>: + 10000cb0: 9b 18 44 7c lxvrwx vs34,r4,r3 + 10000cb4: 20 00 80 4e blr +*/ #include <altivec.h> #include <stdio.h> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c index a7721318812..837ba79c9ab 100644 --- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c +++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c @@ -6,26 +6,33 @@ /* { dg-do compile { target { ! power10_hw } } } */ /* { dg-require-effective-target power10_ok } */ /* { dg-require-effective-target int128 } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */ + +/* Deliberately set optization to zero for this test to confirm + the lxvr*x instruction is generated. At higher optimization levels + the instruction we are looking for is sometimes replaced by other + load instructions. */ +/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */ /* { dg-final { scan-assembler-times {\mlxvrhx\M} 2 } } */ #define NUM_VEC_ELEMS 8 #define ITERS 16 -/* Codegen at time of writing is a single lxvrwx for the zero extended test, - and a lxvrwx + vexts* sign extension instructions for the sign extended - test. - - 0000000000000000 <test_sign_extended_load>: - 0: 5b 18 44 7c lxvrhx vs34,r4,r3 - 4: 02 16 59 10 vextsh2d v2,v2 - 8: 02 16 5b 10 vextsd2q v2,v2 - c: 20 00 80 4e blr - - 0000000000000020 <test_zero_extended_unsigned_load>: - 20: 5b 18 44 7c lxvrhx vs34,r4,r3 - 24: 20 00 80 4e blr */ +/* +Codegen at time of writing uses lxvrhx for the zero +extension test and lhax,mtvsrdd,vextsd2q for the +sign extended test. + +0000000010001810 <test_sign_extended_load>: + 10001810: ae 1a 24 7d lhax r9,r4,r3 + 10001814: 67 4b 40 7c mtvsrdd vs34,0,r9 + 10001818: 02 16 5b 10 vextsd2q v2,v2 + 1000181c: 20 00 80 4e blr + +0000000010001830 <test_zero_extended_unsigned_load>: + 10001830: 5b 18 44 7c lxvrhx vs34,r4,r3 + 10001834: 20 00 80 4e blr +*/ #include <altivec.h> #include <stdio.h> diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c deleted file mode 100644 index ab5f5d89d4d..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c +++ /dev/null @@ -1,62 +0,0 @@ -/* { dg-require-effective-target int128 } */ -/* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ - -/* This patch makes sure the various optimization and code paths are done for - zero extending DImode to TImode on power10. */ - -__uint128_t -gpr_to_gpr (unsigned long long a) -{ - /* li 4,0. */ - return a; -} - -__uint128_t -mem_to_gpr (unsigned long long *p) -{ - /* ld 3,0(3); li 4,0. */ - return *p; -} - -__uint128_t -vsx_to_gpr (__uint128_t *p, double d) -{ - /* fctiduz 1,1; li 4,0;mfvsrd 3,1. */ - return (unsigned long long)d; -} - -void -gpr_to_vsx (__uint128_t *p, unsigned long long a) -{ - /* mtvsrdd 0,0,4; stxv 0,0(3). */ - __uint128_t b = a; - __asm__ (" # %x0" : "+wa" (b)); - *p = b; -} - -void -mem_to_vsx (__uint128_t *p, unsigned long long *q) -{ - /* lxvrdx 0,0,4; stxv 0,0(3). */ - __uint128_t a = *q; - __asm__ (" # %x0" : "+wa" (a)); - *p = a; -} - -void -vsx_to_vsx (__uint128_t *p, double d) -{ - /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3). */ - __uint128_t a = (unsigned long long)d; - __asm__ (" # %x0" : "+wa" (a)); - *p = a; -} - -/* { dg-final { scan-assembler-times {\mli\M} 3 } } */ -/* { dg-final { scan-assembler-times {\mld\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mlxvrdx\M} 3 } } */ -/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
next reply other threads:[~2022-04-02 2:24 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-04-02 2:24 Michael Meissner [this message] -- strict thread matches above, loose matches on Subject: below -- 2022-04-02 0:03 Michael Meissner 2022-04-01 20:15 Michael Meissner 2022-03-31 14:00 Michael Meissner 2022-03-31 14:00 Michael Meissner 2022-03-31 14:00 Michael Meissner 2022-03-31 14:00 Michael Meissner 2022-03-30 17:57 Michael Meissner 2022-03-29 23:35 Michael Meissner 2022-03-29 23:29 Michael Meissner 2022-03-29 2:58 Michael Meissner
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220402022411.E1FA13858D1E@sourceware.org \ --to=meissner@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).