public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4270ed1887b785280ff58268bc6bab4a5356d519
commit 4270ed1887b785280ff58268bc6bab4a5356d519
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Mar 30 13:56:20 2022 -0400
Revert patch.
2022-03-29 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/builtins-1.c: Update insn count.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 19 +++++++-------
gcc/testsuite/gcc.target/powerpc/builtins-1.c | 2 +-
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ---------------------------
4 files changed, 11 insertions(+), 47 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index a1a1ce95195..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -4672,18 +4673,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 98783668bce..28cd1aa6b1a 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
/* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
/* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
/* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-02 2:24 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-02 2:24 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:83e94819074f54e1c7c44d01d7c0d221a2a268fc
commit 83e94819074f54e1c7c44d01d7c0d221a2a268fc
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 1 22:23:40 2022 -0400
Revert patch.
2022-04-01 Michael Meissner <meissner@linux.ibm.com>
gcc/
Revert patch.
* config/rs6000/vsx.md (vsx_lxvr<wd>x): Add support for loading to
GPR registers.
(vsx_stxvr<wd>x): Add support for storing from GPR registers.
(zero_extendditi2): New insn.
gcc/testsuite/
Revert patch.
* gcc.target/powerpc/vsx-load-element-extend-int.c: Use -O2
instead of -O0 and update insn counts.
* gcc.target/powerpc/vsx-load-element-extend-short.c: Likewise.
* gcc.target/powerpc/zero-extend-di-ti.c: New test.
Diff:
---
gcc/config/rs6000/vsx.md | 82 +++-------------------
.../powerpc/vsx-load-element-extend-int.c | 36 ++++++----
.../powerpc/vsx-load-element-extend-short.c | 35 +++++----
.../gcc.target/powerpc/zero-extend-di-ti.c | 62 ----------------
4 files changed, 51 insertions(+), 164 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index ad971e3a1de..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1315,32 +1315,14 @@
}
})
-;; Load rightmost element from load_data using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
-;; Support TImode being in a GPR register to prevent generating lvxr{d,w,b}x
-;; and then two direct moves if we ultimately need the value in a GPR register.
-(define_insn_and_split "vsx_lxvr<wd>x"
- [(set (match_operand:TI 0 "register_operand" "=r,wa")
- (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "m,Z")))]
- "TARGET_POWERPC64 && TARGET_POWER10"
- "@
- #
- lxvr<wd>x %x0,%y1"
- "&& reload_completed && int_reg_operand (operands[0], TImode)"
- [(set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (const_int 0))]
-{
- rtx op0 = operands[0];
- rtx op1 = operands[1];
-
- operands[2] = gen_lowpart (DImode, op0);
- operands[3] = (<MODE>mode == DImode
- ? op1
- : gen_rtx_ZERO_EXTEND (DImode, op1));
-
- operands[4] = gen_highpart (DImode, op0);
-}
- [(set_attr "type" "load,vecload")
- (set_attr "num_insns" "2,*")])
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+ [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+ (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
+ "TARGET_POWER10"
+ "lxvr<wd>x %x0,%y1"
+ [(set_attr "type" "vecload")])
;; Store rightmost element into store_data
;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
@@ -5037,54 +5019,6 @@
DONE;
})
-;; Zero extend DI to TI. If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code. If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
- [(set (match_operand:TI 0 "register_operand" "=r,r, wa,&wa")
- (zero_extend:TI (match_operand:DI 1 "register_operand" "r,wa,r, wa")))]
- "TARGET_POWERPC64 && TARGET_P9_VECTOR"
- "@
- #
- #
- mtvsrdd %x0,0,%1
- #"
- "&& reload_completed
- && (int_reg_operand (operands[0], TImode)
- || vsx_register_operand (operands[1], DImode))"
- [(pc)]
-{
- rtx dest = operands[0];
- rtx src = operands[1];
- int dest_regno = reg_or_subregno (dest);
-
- /* Handle conversion to GPR registers. Load up the low part and then do
- zero out the upper part. */
- if (INT_REGNO_P (dest_regno))
- {
- rtx dest_hi = gen_highpart (DImode, dest);
- rtx dest_lo = gen_lowpart (DImode, dest);
-
- emit_move_insn (dest_lo, src);
- emit_move_insn (dest_hi, const0_rtx);
- DONE;
- }
-
- /* For settomg a VSX register from another VSX register, clear the result
- register, and use XXPERMDI to shift the value into the lower 64-bits. */
- rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
- rtx dest_di = gen_rtx_REG (DImode, dest_regno);
-
- emit_move_insn (dest_v2di, CONST0_RTX (V2DImode));
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vsx_concat_v2di (dest_v2di, dest_di, src));
- else
- emit_insn (gen_vsx_concat_v2di (dest_v2di, src, dest_di));
- DONE;
-}
- [(set_attr "type" "integer,mfvsr,vecmove,vecperm")
- (set_attr "length" "8, 8, *, 8")])
-
;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
;; power10. On earlier systems, the machine independent code will generate a
;; shift left to sign extend the 64-bit value to 128-bit.
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
index 1f1281d6b75..c40e1a3a0f7 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
@@ -6,25 +6,33 @@
/* { dg-do compile { target { ! power10_hw } } } */
/* { dg-require-effective-target power10_ok } */
/* { dg-require-effective-target int128 } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
+
+/* Deliberately set optization to zero for this test to confirm
+ the lxvr*x instruction is generated. At higher optimization levels
+ the instruction we are looking for is sometimes replaced by other
+ load instructions. */
+/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */
+
/* { dg-final { scan-assembler-times {\mlxvrwx\M} 2 } } */
#define NUM_VEC_ELEMS 4
#define ITERS 16
-/* Codegen at time of writing is a single lxvrwx for the zero extended test,
- and a lxvrwx + vexts* sign extension instructions for the sign extended
- test.
-
- 0000000000000000 <test_sign_extended_load>:
- 0: 9b 18 44 7c lxvrwx vs34,r4,r3
- 4: 02 16 5a 10 vextsw2d v2,v2
- 8: 02 16 5b 10 vextsd2q v2,v2
- c: 20 00 80 4e blr
-
- 0000000000000020 <test_zero_extended_unsigned_load>:
- 20: 9b 18 44 7c lxvrwx vs34,r4,r3
- 24: 20 00 80 4e blr */
+/*
+Codegen at time of writing is a single lxvrwx for the zero
+extended test, and a lwax,mtvsrdd,vextsd2q for the sign
+extended test.
+
+0000000010000c90 <test_sign_extended_load>:
+ 10000c90: aa 1a 24 7d lwax r9,r4,r3
+ 10000c94: 67 4b 40 7c mtvsrdd vs34,0,r9
+ 10000c98: 02 16 5b 10 vextsd2q v2,v2
+ 10000c9c: 20 00 80 4e blr
+
+0000000010000cb0 <test_zero_extended_unsigned_load>:
+ 10000cb0: 9b 18 44 7c lxvrwx vs34,r4,r3
+ 10000cb4: 20 00 80 4e blr
+*/
#include <altivec.h>
#include <stdio.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
index a7721318812..837ba79c9ab 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
@@ -6,26 +6,33 @@
/* { dg-do compile { target { ! power10_hw } } } */
/* { dg-require-effective-target power10_ok } */
/* { dg-require-effective-target int128 } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
+
+/* Deliberately set optization to zero for this test to confirm
+ the lxvr*x instruction is generated. At higher optimization levels
+ the instruction we are looking for is sometimes replaced by other
+ load instructions. */
+/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */
/* { dg-final { scan-assembler-times {\mlxvrhx\M} 2 } } */
#define NUM_VEC_ELEMS 8
#define ITERS 16
-/* Codegen at time of writing is a single lxvrwx for the zero extended test,
- and a lxvrwx + vexts* sign extension instructions for the sign extended
- test.
-
- 0000000000000000 <test_sign_extended_load>:
- 0: 5b 18 44 7c lxvrhx vs34,r4,r3
- 4: 02 16 59 10 vextsh2d v2,v2
- 8: 02 16 5b 10 vextsd2q v2,v2
- c: 20 00 80 4e blr
-
- 0000000000000020 <test_zero_extended_unsigned_load>:
- 20: 5b 18 44 7c lxvrhx vs34,r4,r3
- 24: 20 00 80 4e blr */
+/*
+Codegen at time of writing uses lxvrhx for the zero
+extension test and lhax,mtvsrdd,vextsd2q for the
+sign extended test.
+
+0000000010001810 <test_sign_extended_load>:
+ 10001810: ae 1a 24 7d lhax r9,r4,r3
+ 10001814: 67 4b 40 7c mtvsrdd vs34,0,r9
+ 10001818: 02 16 5b 10 vextsd2q v2,v2
+ 1000181c: 20 00 80 4e blr
+
+0000000010001830 <test_zero_extended_unsigned_load>:
+ 10001830: 5b 18 44 7c lxvrhx vs34,r4,r3
+ 10001834: 20 00 80 4e blr
+*/
#include <altivec.h>
#include <stdio.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
deleted file mode 100644
index ab5f5d89d4d..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* { dg-require-effective-target int128 } */
-/* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
-
-/* This patch makes sure the various optimization and code paths are done for
- zero extending DImode to TImode on power10. */
-
-__uint128_t
-gpr_to_gpr (unsigned long long a)
-{
- /* li 4,0. */
- return a;
-}
-
-__uint128_t
-mem_to_gpr (unsigned long long *p)
-{
- /* ld 3,0(3); li 4,0. */
- return *p;
-}
-
-__uint128_t
-vsx_to_gpr (__uint128_t *p, double d)
-{
- /* fctiduz 1,1; li 4,0;mfvsrd 3,1. */
- return (unsigned long long)d;
-}
-
-void
-gpr_to_vsx (__uint128_t *p, unsigned long long a)
-{
- /* mtvsrdd 0,0,4; stxv 0,0(3). */
- __uint128_t b = a;
- __asm__ (" # %x0" : "+wa" (b));
- *p = b;
-}
-
-void
-mem_to_vsx (__uint128_t *p, unsigned long long *q)
-{
- /* lxvrdx 0,0,4; stxv 0,0(3). */
- __uint128_t a = *q;
- __asm__ (" # %x0" : "+wa" (a));
- *p = a;
-}
-
-void
-vsx_to_vsx (__uint128_t *p, double d)
-{
- /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3). */
- __uint128_t a = (unsigned long long)d;
- __asm__ (" # %x0" : "+wa" (a));
- *p = a;
-}
-
-/* { dg-final { scan-assembler-times {\mli\M} 3 } } */
-/* { dg-final { scan-assembler-times {\mld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mlxvrdx\M} 3 } } */
-/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-02 0:03 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-02 0:03 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:e13ad3cdd6655382e05105b8e3b47ce4aa0be70d
commit e13ad3cdd6655382e05105b8e3b47ce4aa0be70d
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 1 20:03:00 2022 -0400
Revert patch.
2022-04-01 Michael Meissner <meissner@linux.ibm.com>
gcc/
Revert patch.
* config/rs6000/vsx.md (vsx_lxvr<wd>x): Add support for loading to
GPR registers.
(vsx_stxvr<wd>x): Add support for storing from GPR registers.
(zero_extendditi2): New insn.
gcc/testsuite/
Revert patch.
* gcc.target/powerpc/zero-extend-di-ti.c: New test.
Diff:
---
gcc/config/rs6000/vsx.md | 103 +++------------------
.../gcc.target/powerpc/zero-extend-di-ti.c | 62 -------------
2 files changed, 15 insertions(+), 150 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27810287ad7..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1315,48 +1315,23 @@
}
})
-;; Load rightmost element from load_data using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
-;; Support TImode being in a GPR register to prevent generating lvxr{d,w,b}x
-;; and then two direct moves if we ultimately need the value in a GPR register.
-(define_insn_and_split "vsx_lxvr<wd>x"
- [(set (match_operand:TI 0 "register_operand" "=r,wa")
- (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "m,Z")))]
- "TARGET_POWERPC64 && TARGET_POWER10"
- "@
- #
- lxvr<wd>x %x0,%y1"
- "&& reload_completed && int_reg_operand (operands[0], TImode)"
- [(set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (const_int 0))]
-{
- rtx op0 = operands[0];
- rtx op1 = operands[1];
-
- operands[2] = gen_lowpart (DImode, op0);
- operands[3] = (<MODE>mode == DImode
- ? op1
- : gen_rtx_ZERO_EXTEND (DImode, op1));
-
- operands[4] = gen_highpart (DImode, op0);
-}
- [(set_attr "type" "load,vecload")
- (set_attr "num_insns" "2,*")])
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+ [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+ (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))]
+ "TARGET_POWER10"
+ "lxvr<wd>x %x0,%y1"
+ [(set_attr "type" "vecload")])
-;; Store rightmost element into store_data using stxvrbx, stxvrhx, strvxwx,
-;; strvxdx. Support TImode being in a GPR register to prevent generating
-;; mtvsrdd and then stvxr{d,w,b}x instead of a GPR store.
+;; Store rightmost element into store_data
+;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
(define_insn "vsx_stxvr<wd>x"
- [(set (match_operand:INT_ISA3 0 "memory_operand" "=m,Z")
- (truncate:INT_ISA3 (match_operand:TI 1 "register_operand" "r,wa")))]
- "TARGET_POWERPC64 && TARGET_POWER10"
-{
- if (vsx_register_operand (operands[1], TImode))
- return "stxvr<wc>x %x1,%y0";
-
- operands[2] = gen_lowpart (<MODE>mode, operands[1]);
- return "st<wd>%U0%X0 %2,%0";
-}
- [(set_attr "type" "store,vecstore")])
+ [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
+ (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
+ "TARGET_POWER10"
+ "stxvr<wd>x %x1,%y0"
+ [(set_attr "type" "vecstore")])
;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
;; when you really want their element-reversing behavior.
@@ -5044,54 +5019,6 @@
DONE;
})
-;; Zero extend DI to TI. If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code. If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
- [(set (match_operand:TI 0 "register_operand" "=r,r, wa,&wa")
- (zero_extend:TI (match_operand:DI 1 "register_operand" "r,wa,r, wa")))]
- "TARGET_POWERPC64 && TARGET_P9_VECTOR"
- "@
- #
- #
- mtvsrdd %x0,0,%1
- #"
- "&& reload_completed
- && (int_reg_operand (operands[0], TImode)
- || vsx_register_operand (operands[1], DImode))"
- [(pc)]
-{
- rtx dest = operands[0];
- rtx src = operands[1];
- int dest_regno = reg_or_subregno (dest);
-
- /* Handle conversion to GPR registers. Load up the low part and then do
- zero out the upper part. */
- if (INT_REGNO_P (dest_regno))
- {
- rtx dest_hi = gen_highpart (DImode, dest);
- rtx dest_lo = gen_lowpart (DImode, dest);
-
- emit_move_insn (dest_lo, src);
- emit_move_insn (dest_hi, const0_rtx);
- DONE;
- }
-
- /* For settomg a VSX register from another VSX register, clear the result
- register, and use XXPERMDI to shift the value into the lower 64-bits. */
- rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
- rtx dest_di = gen_rtx_REG (DImode, dest_regno);
-
- emit_move_insn (dest_v2di, CONST0_RTX (V2DImode));
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vsx_concat_v2di (dest_v2di, dest_di, src));
- else
- emit_insn (gen_vsx_concat_v2di (dest_v2di, src, dest_di));
- DONE;
-}
- [(set_attr "type" "integer,mfvsr,vecmove,vecperm")
- (set_attr "length" "8, 8, *, 8")])
-
;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
;; power10. On earlier systems, the machine independent code will generate a
;; shift left to sign extend the 64-bit value to 128-bit.
diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
deleted file mode 100644
index ab5f5d89d4d..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* { dg-require-effective-target int128 } */
-/* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
-
-/* This patch makes sure the various optimization and code paths are done for
- zero extending DImode to TImode on power10. */
-
-__uint128_t
-gpr_to_gpr (unsigned long long a)
-{
- /* li 4,0. */
- return a;
-}
-
-__uint128_t
-mem_to_gpr (unsigned long long *p)
-{
- /* ld 3,0(3); li 4,0. */
- return *p;
-}
-
-__uint128_t
-vsx_to_gpr (__uint128_t *p, double d)
-{
- /* fctiduz 1,1; li 4,0;mfvsrd 3,1. */
- return (unsigned long long)d;
-}
-
-void
-gpr_to_vsx (__uint128_t *p, unsigned long long a)
-{
- /* mtvsrdd 0,0,4; stxv 0,0(3). */
- __uint128_t b = a;
- __asm__ (" # %x0" : "+wa" (b));
- *p = b;
-}
-
-void
-mem_to_vsx (__uint128_t *p, unsigned long long *q)
-{
- /* lxvrdx 0,0,4; stxv 0,0(3). */
- __uint128_t a = *q;
- __asm__ (" # %x0" : "+wa" (a));
- *p = a;
-}
-
-void
-vsx_to_vsx (__uint128_t *p, double d)
-{
- /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3). */
- __uint128_t a = (unsigned long long)d;
- __asm__ (" # %x0" : "+wa" (a));
- *p = a;
-}
-
-/* { dg-final { scan-assembler-times {\mli\M} 3 } } */
-/* { dg-final { scan-assembler-times {\mld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mlxvrdx\M} 3 } } */
-/* { dg-final { scan-assembler-times {\mmfvsrd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-01 20:15 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-01 20:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:08fe7c705fa13e4e4f52b6dce37463ed80aa61a2
commit 08fe7c705fa13e4e4f52b6dce37463ed80aa61a2
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Fri Apr 1 16:15:05 2022 -0400
Revert patch.
2022-03-31 Michael Meissner <meissner@linux.ibm.com>
gcc/
Revert patch.
* config/rs6000/vsx.md (zero_extendditi2): New insn.
Diff:
---
gcc/config/rs6000/vsx.md | 52 ------------------------------------------------
1 file changed, 52 deletions(-)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index cc8c80863f2..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5019,58 +5019,6 @@
DONE;
})
-;; Zero extend DI to TI. If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code. If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
- [(set (match_operand:TI 0 "register_operand" "=r,r,r, wa,wa,wa")
- (zero_extend:TI (match_operand:DI 1 "input_operand" "r,m,wa,r, Z, wa")))
- (clobber (match_scratch:DI 2 "=X,X,X, X, X, &wa"))]
- "TARGET_POWERPC64 && TARGET_POWER10"
- "@
- #
- #
- #
- mtvsrdd %x0,0,%1
- lxvrdx %x0,%y1
- #"
- "&& reload_completed
- && (int_reg_operand (operands[0], TImode)
- || vsx_register_operand (operands[1], DImode))"
- [(pc)]
-{
- rtx dest = operands[0];
- rtx src = operands[1];
- int dest_regno = reg_or_subregno (dest);
-
- /* Handle conversion to GPR registers. Load up the low part and then do
- a sign extension to the upper part. */
- if (INT_REGNO_P (dest_regno))
- {
- rtx dest_hi = gen_highpart (DImode, dest);
- rtx dest_lo = gen_lowpart (DImode, dest);
-
- emit_move_insn (dest_lo, src);
- emit_move_insn (dest_hi, const0_rtx);
- DONE;
- }
-
- /* For settomg a VSX register from another VSX register, clear a scratch
- register, and use XXPERMDI to shift the value into the lower 64-bits. */
- rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
- rtx zero = operands[2];
-
- emit_move_insn (zero, const0_rtx);
- if (BYTES_BIG_ENDIAN)
- emit_insn (gen_vsx_concat_v2di (dest_v2di, zero, src));
- else
- emit_insn (gen_vsx_concat_v2di (dest_v2di, src, zero));
- DONE;
-}
- [(set_attr "type" "integer,load,mfvsr,vecmove,vecload,vecperm")
- (set_attr "isa" "*, *, *, *, p10, *")
- (set_attr "length" "8, 8, 8, *, *, 8")])
-
;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on
;; power10. On earlier systems, the machine independent code will generate a
;; shift left to sign extend the 64-bit value to 128-bit.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:5a252165aabe7df56202c33a90c22a30d4ea508e
commit 5a252165aabe7df56202c33a90c22a30d4ea508e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 29 19:35:18 2022 -0400
Revert patch.
2022-03-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/builtins-1.c: Update insn count.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:6d781fee3576aa121054b4b156bb92020e8d8f59
commit 6d781fee3576aa121054b4b156bb92020e8d8f59
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 29 19:29:04 2022 -0400
Revert patch.
2022-03-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4b06b3b008d424f48bddb787c20897745383277e
commit 4b06b3b008d424f48bddb787c20897745383277e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Mon Mar 28 22:57:56 2022 -0400
Revert patch.
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-30 17:57 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-30 17:57 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:4cb58fd9c112236b76047e2575b2503d3d18f15e
commit 4cb58fd9c112236b76047e2575b2503d3d18f15e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Wed Mar 30 13:56:20 2022 -0400
Revert patch.
2022-03-29 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/builtins-1.c: Update insn count.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 19 +++++++-------
gcc/testsuite/gcc.target/powerpc/builtins-1.c | 2 +-
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ---------------------------
4 files changed, 11 insertions(+), 47 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 82fa4bbbfc4..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -4675,18 +4676,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 98783668bce..28cd1aa6b1a 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
/* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
/* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
/* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29 23:35 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29 23:35 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:f89498c4aa4dbf9fcea45f7a754d4a438ac01a4e
commit f89498c4aa4dbf9fcea45f7a754d4a438ac01a4e
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 29 19:35:18 2022 -0400
Revert patch.
2022-03-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/builtins-1.c: Update insn count.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29 23:29 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29 23:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:b3b51cdf4e2c61d90703f6d173e945ca1bf1f375
commit b3b51cdf4e2c61d90703f6d173e945ca1bf1f375
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Tue Mar 29 19:29:04 2022 -0400
Revert patch.
2022-03-28 Michael Meissner <meissner@linux.ibm.com>
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29 2:58 Michael Meissner
0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29 2:58 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:3a929c2d268d2b9803eca3b396f638182d178076
commit 3a929c2d268d2b9803eca3b396f638182d178076
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Mon Mar 28 22:57:56 2022 -0400
Revert patch.
gcc/
PR target/99293
Revert patch.
* config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
UNSPEC_VSX_XXSPLTD case.
* config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
(vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
gcc/testsuite:
PR target/99293
Revert patch.
* gcc.target/powerpc/pr99293.c: New test.
Diff:
---
gcc/config/rs6000/rs6000-p8swap.cc | 1 +
gcc/config/rs6000/vsx.md | 38 +++++++-----------------------
gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
3 files changed, 10 insertions(+), 65 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VUPKLU_V4SF:
return 0;
case UNSPEC_VSPLT_DIRECT:
+ case UNSPEC_VSX_XXSPLTD:
*special = SH_SPLAT;
return 1;
case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
UNSPEC_VSX_XXPERM
UNSPEC_VSX_XXSPLTW
+ UNSPEC_VSX_XXSPLTD
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
}
[(set_attr "type" "vecperm")])
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
-}
- [(set_attr "type" "vecperm")])
-
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
;; V2DF/V2DI splat for use by vec_splat builtin
(define_insn "vsx_xxspltd_<mode>"
[(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
- (vec_duplicate:VSX_D
- (vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
- (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+ (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "u5bit_cint_operand" "i")]
+ UNSPEC_VSX_XXSPLTD))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- HOST_WIDE_INT dword = INTVAL (operands[2]);
- if (!BYTES_BIG_ENDIAN)
- dword = !dword;
-
- operands[3] = GEN_INT (3*dword);
- return "xxpermdi %x0,%x1,%x1,%3";
+ if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+ || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+ return "xxpermdi %x0,%x1,%x1,0";
+ else
+ return "xxpermdi %x0,%x1,%x1,3";
}
[(set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
- __builtin_vec_splats (__builtin_vec_extract (v, n))
-
- where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the
- compiler would do a direct move to the GPR registers to select the item and
- a direct move from the GPR registers to do the splat. */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-
-vector double
-splat_dup_d_1 (vector double v)
-{
- return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2022-04-02 2:24 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31 14:00 [gcc(refs/users/meissner/heads/work084)] Revert patch Michael Meissner
-- strict thread matches above, loose matches on Subject: below --
2022-04-02 2:24 Michael Meissner
2022-04-02 0:03 Michael Meissner
2022-04-01 20:15 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-30 17:57 Michael Meissner
2022-03-29 23:35 Michael Meissner
2022-03-29 23:29 Michael Meissner
2022-03-29 2:58 Michael Meissner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).