public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4270ed1887b785280ff58268bc6bab4a5356d519

commit 4270ed1887b785280ff58268bc6bab4a5356d519
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Mar 30 13:56:20 2022 -0400

    Revert patch.
    
    2022-03-29   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/builtins-1.c: Update insn count.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc            |  1 +
 gcc/config/rs6000/vsx.md                      | 19 +++++++-------
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c    | 36 ---------------------------
 4 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index a1a1ce95195..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -4672,18 +4673,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 98783668bce..28cd1aa6b1a 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-02  2:24 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-02  2:24 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:83e94819074f54e1c7c44d01d7c0d221a2a268fc

commit 83e94819074f54e1c7c44d01d7c0d221a2a268fc
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 1 22:23:40 2022 -0400

    Revert patch.
    
    2022-04-01   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            Revert patch.
            * config/rs6000/vsx.md (vsx_lxvr<wd>x): Add support for loading to
            GPR registers.
            (vsx_stxvr<wd>x): Add support for storing from GPR registers.
            (zero_extendditi2): New insn.
    
    gcc/testsuite/
            Revert patch.
            * gcc.target/powerpc/vsx-load-element-extend-int.c: Use -O2
            instead of -O0 and update insn counts.
            * gcc.target/powerpc/vsx-load-element-extend-short.c: Likewise.
            * gcc.target/powerpc/zero-extend-di-ti.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 82 +++-------------------
 .../powerpc/vsx-load-element-extend-int.c          | 36 ++++++----
 .../powerpc/vsx-load-element-extend-short.c        | 35 +++++----
 .../gcc.target/powerpc/zero-extend-di-ti.c         | 62 ----------------
 4 files changed, 51 insertions(+), 164 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index ad971e3a1de..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1315,32 +1315,14 @@
     }
 })
 
-;; Load rightmost element from load_data using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
-;; Support TImode being in a GPR register to prevent generating lvxr{d,w,b}x
-;; and then two direct moves if we ultimately need the value in a GPR register.
-(define_insn_and_split "vsx_lxvr<wd>x"
-  [(set (match_operand:TI 0 "register_operand" "=r,wa")
-	(zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "m,Z")))]
-  "TARGET_POWERPC64 && TARGET_POWER10"
-  "@
-   #
-   lxvr<wd>x %x0,%y1"
-  "&& reload_completed && int_reg_operand (operands[0], TImode)"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (const_int 0))]
-{
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
-
-  operands[2] = gen_lowpart (DImode, op0);
-  operands[3] = (<MODE>mode == DImode
-		 ? op1
-		 : gen_rtx_ZERO_EXTEND (DImode, op1));
-
-  operands[4] = gen_highpart (DImode, op0);
-}
-  [(set_attr "type" "load,vecload")
-   (set_attr "num_insns" "2,*")])
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+	(zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "Z")))]
+  "TARGET_POWER10"
+  "lxvr<wd>x %x0,%y1"
+  [(set_attr "type" "vecload")])
 
 ;; Store rightmost element into store_data
 ;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
@@ -5037,54 +5019,6 @@
   DONE;
 })
 
-;; Zero extend DI to TI.  If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code.  If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
-  [(set (match_operand:TI 0 "register_operand"                 "=r,r, wa,&wa")
-	(zero_extend:TI (match_operand:DI 1 "register_operand"  "r,wa,r,  wa")))]
-  "TARGET_POWERPC64 && TARGET_P9_VECTOR"
-  "@
-   #
-   #
-   mtvsrdd %x0,0,%1
-   #"
-  "&& reload_completed
-   && (int_reg_operand (operands[0], TImode)
-       || vsx_register_operand (operands[1], DImode))"
-  [(pc)]
-{
-  rtx dest = operands[0];
-  rtx src = operands[1];
-  int dest_regno = reg_or_subregno (dest);
-
-  /* Handle conversion to GPR registers.  Load up the low part and then do
-     zero out the upper part.  */
-  if (INT_REGNO_P (dest_regno))
-    {
-      rtx dest_hi = gen_highpart (DImode, dest);
-      rtx dest_lo = gen_lowpart (DImode, dest);
-
-      emit_move_insn (dest_lo, src);
-      emit_move_insn (dest_hi, const0_rtx);
-      DONE;
-    }
-
-  /* For settomg a VSX register from another VSX register, clear the result
-     register, and use XXPERMDI to shift the value into the lower 64-bits.  */
-  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
-  rtx dest_di = gen_rtx_REG (DImode, dest_regno);
-
-  emit_move_insn (dest_v2di, CONST0_RTX (V2DImode));
-  if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, dest_di, src));
-  else
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, src, dest_di));
-  DONE;
-}
-  [(set_attr "type"   "integer,mfvsr,vecmove,vecperm")
-   (set_attr "length" "8,      8,    *,      8")])
-
 ;; Sign extend DI to TI.  We provide both GPR targets and Altivec targets on
 ;; power10.  On earlier systems, the machine independent code will generate a
 ;; shift left to sign extend the 64-bit value to 128-bit.
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
index 1f1281d6b75..c40e1a3a0f7 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
@@ -6,25 +6,33 @@
 /* { dg-do compile { target { ! power10_hw } } } */
 /* { dg-require-effective-target power10_ok } */
 /* { dg-require-effective-target int128 } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
+
+/* Deliberately set optization to zero for this test to confirm
+   the lxvr*x instruction is generated. At higher optimization levels
+   the instruction we are looking for is sometimes replaced by other
+   load instructions. */
+/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */
+
 /* { dg-final { scan-assembler-times {\mlxvrwx\M} 2 } } */
 
 #define NUM_VEC_ELEMS 4
 #define ITERS 16
 
-/* Codegen at time of writing is a single lxvrwx for the zero extended test,
-   and a lxvrwx + vexts* sign extension instructions for the sign extended
-   test.
-
-   0000000000000000 <test_sign_extended_load>:
-	0:   9b 18 44 7c     lxvrwx  vs34,r4,r3
-	4:   02 16 5a 10     vextsw2d v2,v2
-	8:   02 16 5b 10     vextsd2q v2,v2
-	c:   20 00 80 4e     blr
-
-   0000000000000020 <test_zero_extended_unsigned_load>:
-	20:   9b 18 44 7c     lxvrwx  vs34,r4,r3
-	24:   20 00 80 4e     blr  */
+/*
+Codegen at time of writing is a single lxvrwx for the zero
+extended test, and a lwax,mtvsrdd,vextsd2q for the sign
+extended test.
+
+0000000010000c90 <test_sign_extended_load>:
+    10000c90:	aa 1a 24 7d 	lwax    r9,r4,r3
+    10000c94:	67 4b 40 7c 	mtvsrdd vs34,0,r9
+    10000c98:	02 16 5b 10 	vextsd2q v2,v2
+    10000c9c:	20 00 80 4e 	blr
+
+0000000010000cb0 <test_zero_extended_unsigned_load>:
+    10000cb0:	9b 18 44 7c 	lxvrwx  vs34,r4,r3
+    10000cb4:	20 00 80 4e 	blr
+*/
 
 #include <altivec.h>
 #include <stdio.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
index a7721318812..837ba79c9ab 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-short.c
@@ -6,26 +6,33 @@
 /* { dg-do compile { target { ! power10_hw } } } */
 /* { dg-require-effective-target power10_ok } */
 /* { dg-require-effective-target int128 } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2 -save-temps" } */
+
+/* Deliberately set optization to zero for this test to confirm
+   the lxvr*x instruction is generated. At higher optimization levels
+   the instruction we are looking for is sometimes replaced by other
+   load instructions.  */
+/* { dg-options "-mdejagnu-cpu=power10 -O0 -save-temps" } */
 
 /* { dg-final { scan-assembler-times {\mlxvrhx\M} 2 } } */
 
 #define NUM_VEC_ELEMS 8
 #define ITERS 16
 
-/* Codegen at time of writing is a single lxvrwx for the zero extended test,
-   and a lxvrwx + vexts* sign extension instructions for the sign extended
-   test.
-
-	0000000000000000 <test_sign_extended_load>:
-	   0:   5b 18 44 7c     lxvrhx  vs34,r4,r3
-	   4:   02 16 59 10     vextsh2d v2,v2
-	   8:   02 16 5b 10     vextsd2q v2,v2
-	   c:   20 00 80 4e     blr
-
-	0000000000000020 <test_zero_extended_unsigned_load>:
-	  20:   5b 18 44 7c     lxvrhx  vs34,r4,r3
-	  24:   20 00 80 4e     blr  */
+/*
+Codegen at time of writing uses lxvrhx for the zero
+extension test and lhax,mtvsrdd,vextsd2q for the
+sign extended test.
+
+0000000010001810 <test_sign_extended_load>:
+    10001810:	ae 1a 24 7d 	lhax    r9,r4,r3
+    10001814:	67 4b 40 7c 	mtvsrdd vs34,0,r9
+    10001818:	02 16 5b 10 	vextsd2q v2,v2
+    1000181c:	20 00 80 4e 	blr
+
+0000000010001830 <test_zero_extended_unsigned_load>:
+    10001830:	5b 18 44 7c 	lxvrhx  vs34,r4,r3
+    10001834:	20 00 80 4e 	blr
+*/
 
 #include <altivec.h>
 #include <stdio.h>
diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
deleted file mode 100644
index ab5f5d89d4d..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* { dg-require-effective-target int128     } */
-/* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
-
-/* This patch makes sure the various optimization and code paths are done for
-   zero extending DImode to TImode on power10.  */
-
-__uint128_t
-gpr_to_gpr (unsigned long long a)
-{
-  /* li 4,0.  */
-  return a;
-}
-
-__uint128_t
-mem_to_gpr (unsigned long long *p)
-{
-  /* ld 3,0(3); li 4,0.  */
-  return *p;
-}
-
-__uint128_t
-vsx_to_gpr (__uint128_t *p, double d)
-{
-  /* fctiduz 1,1; li 4,0;mfvsrd 3,1.  */
-  return (unsigned long long)d;
-}
-
-void
-gpr_to_vsx (__uint128_t *p, unsigned long long a)
-{
-  /* mtvsrdd 0,0,4; stxv 0,0(3).  */
-  __uint128_t b = a;
-  __asm__ (" # %x0" : "+wa" (b));
-  *p = b;
-}
-
-void
-mem_to_vsx (__uint128_t *p, unsigned long long *q)
-{
-  /* lxvrdx 0,0,4; stxv 0,0(3).  */
-  __uint128_t a = *q;
-  __asm__ (" # %x0" : "+wa" (a));
-  *p = a;
-}
-
-void
-vsx_to_vsx (__uint128_t *p, double d)
-{
-  /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3).  */
-  __uint128_t a = (unsigned long long)d;
-  __asm__ (" # %x0" : "+wa" (a));
-  *p = a;
-}
-
-/* { dg-final { scan-assembler-times {\mli\M}       3 } } */
-/* { dg-final { scan-assembler-times {\mld\M}       1 } } */
-/* { dg-final { scan-assembler-times {\mlxvrdx\M}   3 } } */
-/* { dg-final { scan-assembler-times {\mmfvsrd\M}   1 } } */
-/* { dg-final { scan-assembler-times {\mmtvsrdd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M}     1 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-02  0:03 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-02  0:03 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e13ad3cdd6655382e05105b8e3b47ce4aa0be70d

commit e13ad3cdd6655382e05105b8e3b47ce4aa0be70d
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 1 20:03:00 2022 -0400

    Revert patch.
    
    2022-04-01   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            Revert patch.
            * config/rs6000/vsx.md (vsx_lxvr<wd>x): Add support for loading to
            GPR registers.
            (vsx_stxvr<wd>x): Add support for storing from GPR registers.
            (zero_extendditi2): New insn.
    
    gcc/testsuite/
            Revert patch.
            * gcc.target/powerpc/zero-extend-di-ti.c: New test.

Diff:
---
 gcc/config/rs6000/vsx.md                           | 103 +++------------------
 .../gcc.target/powerpc/zero-extend-di-ti.c         |  62 -------------
 2 files changed, 15 insertions(+), 150 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 27810287ad7..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1315,48 +1315,23 @@
     }
 })
 
-;; Load rightmost element from load_data using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
-;; Support TImode being in a GPR register to prevent generating lvxr{d,w,b}x
-;; and then two direct moves if we ultimately need the value in a GPR register.
-(define_insn_and_split "vsx_lxvr<wd>x"
-  [(set (match_operand:TI 0 "register_operand" "=r,wa")
-	(zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "m,Z")))]
-  "TARGET_POWERPC64 && TARGET_POWER10"
-  "@
-   #
-   lxvr<wd>x %x0,%y1"
-  "&& reload_completed && int_reg_operand (operands[0], TImode)"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (const_int 0))]
-{
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
-
-  operands[2] = gen_lowpart (DImode, op0);
-  operands[3] = (<MODE>mode == DImode
-		 ? op1
-		 : gen_rtx_ZERO_EXTEND (DImode, op1));
-
-  operands[4] = gen_highpart (DImode, op0);
-}
-  [(set_attr "type" "load,vecload")
-   (set_attr "num_insns" "2,*")])
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+	(zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "Z")))]
+  "TARGET_POWER10"
+  "lxvr<wd>x %x0,%y1"
+  [(set_attr "type" "vecload")])
 
-;; Store rightmost element into store_data using stxvrbx, stxvrhx, strvxwx,
-;; strvxdx.  Support TImode being in a GPR register to prevent generating
-;; mtvsrdd and then stvxr{d,w,b}x instead of a GPR store.
+;; Store rightmost element into store_data
+;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
 (define_insn "vsx_stxvr<wd>x"
-  [(set (match_operand:INT_ISA3 0 "memory_operand" "=m,Z")
-	(truncate:INT_ISA3 (match_operand:TI 1 "register_operand" "r,wa")))]
-  "TARGET_POWERPC64 && TARGET_POWER10"
-{
-  if (vsx_register_operand (operands[1], TImode))
-    return "stxvr<wc>x %x1,%y0";
-
-  operands[2] = gen_lowpart (<MODE>mode, operands[1]);
-  return "st<wd>%U0%X0 %2,%0";
-}
-  [(set_attr "type" "store,vecstore")])
+  [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
+	(truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
+  "TARGET_POWER10"
+  "stxvr<wd>x %x1,%y0"
+  [(set_attr "type" "vecstore")])
 
 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
 ;; when you really want their element-reversing behavior.
@@ -5044,54 +5019,6 @@
   DONE;
 })
 
-;; Zero extend DI to TI.  If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code.  If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
-  [(set (match_operand:TI 0 "register_operand"                 "=r,r, wa,&wa")
-	(zero_extend:TI (match_operand:DI 1 "register_operand"  "r,wa,r,  wa")))]
-  "TARGET_POWERPC64 && TARGET_P9_VECTOR"
-  "@
-   #
-   #
-   mtvsrdd %x0,0,%1
-   #"
-  "&& reload_completed
-   && (int_reg_operand (operands[0], TImode)
-       || vsx_register_operand (operands[1], DImode))"
-  [(pc)]
-{
-  rtx dest = operands[0];
-  rtx src = operands[1];
-  int dest_regno = reg_or_subregno (dest);
-
-  /* Handle conversion to GPR registers.  Load up the low part and then do
-     zero out the upper part.  */
-  if (INT_REGNO_P (dest_regno))
-    {
-      rtx dest_hi = gen_highpart (DImode, dest);
-      rtx dest_lo = gen_lowpart (DImode, dest);
-
-      emit_move_insn (dest_lo, src);
-      emit_move_insn (dest_hi, const0_rtx);
-      DONE;
-    }
-
-  /* For settomg a VSX register from another VSX register, clear the result
-     register, and use XXPERMDI to shift the value into the lower 64-bits.  */
-  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
-  rtx dest_di = gen_rtx_REG (DImode, dest_regno);
-
-  emit_move_insn (dest_v2di, CONST0_RTX (V2DImode));
-  if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, dest_di, src));
-  else
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, src, dest_di));
-  DONE;
-}
-  [(set_attr "type"   "integer,mfvsr,vecmove,vecperm")
-   (set_attr "length" "8,      8,    *,      8")])
-
 ;; Sign extend DI to TI.  We provide both GPR targets and Altivec targets on
 ;; power10.  On earlier systems, the machine independent code will generate a
 ;; shift left to sign extend the 64-bit value to 128-bit.
diff --git a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c b/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
deleted file mode 100644
index ab5f5d89d4d..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/zero-extend-di-ti.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* { dg-require-effective-target int128     } */
-/* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
-
-/* This patch makes sure the various optimization and code paths are done for
-   zero extending DImode to TImode on power10.  */
-
-__uint128_t
-gpr_to_gpr (unsigned long long a)
-{
-  /* li 4,0.  */
-  return a;
-}
-
-__uint128_t
-mem_to_gpr (unsigned long long *p)
-{
-  /* ld 3,0(3); li 4,0.  */
-  return *p;
-}
-
-__uint128_t
-vsx_to_gpr (__uint128_t *p, double d)
-{
-  /* fctiduz 1,1; li 4,0;mfvsrd 3,1.  */
-  return (unsigned long long)d;
-}
-
-void
-gpr_to_vsx (__uint128_t *p, unsigned long long a)
-{
-  /* mtvsrdd 0,0,4; stxv 0,0(3).  */
-  __uint128_t b = a;
-  __asm__ (" # %x0" : "+wa" (b));
-  *p = b;
-}
-
-void
-mem_to_vsx (__uint128_t *p, unsigned long long *q)
-{
-  /* lxvrdx 0,0,4; stxv 0,0(3).  */
-  __uint128_t a = *q;
-  __asm__ (" # %x0" : "+wa" (a));
-  *p = a;
-}
-
-void
-vsx_to_vsx (__uint128_t *p, double d)
-{
-  /* fctiduz 1,1; xxspltib 0,0; xxpermdi 0,0,1,0; stxv 0,0(3).  */
-  __uint128_t a = (unsigned long long)d;
-  __asm__ (" # %x0" : "+wa" (a));
-  *p = a;
-}
-
-/* { dg-final { scan-assembler-times {\mli\M}       3 } } */
-/* { dg-final { scan-assembler-times {\mld\M}       1 } } */
-/* { dg-final { scan-assembler-times {\mlxvrdx\M}   3 } } */
-/* { dg-final { scan-assembler-times {\mmfvsrd\M}   1 } } */
-/* { dg-final { scan-assembler-times {\mmtvsrdd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mstxv\M}     1 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-04-01 20:15 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-04-01 20:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:08fe7c705fa13e4e4f52b6dce37463ed80aa61a2

commit 08fe7c705fa13e4e4f52b6dce37463ed80aa61a2
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Fri Apr 1 16:15:05 2022 -0400

    Revert patch.
    
    2022-03-31   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            Revert patch.
            * config/rs6000/vsx.md (zero_extendditi2): New insn.

Diff:
---
 gcc/config/rs6000/vsx.md | 52 ------------------------------------------------
 1 file changed, 52 deletions(-)

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index cc8c80863f2..c091e5e2f47 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5019,58 +5019,6 @@
   DONE;
 })
 
-;; Zero extend DI to TI.  If we don't have the MTVSRDD instruction (and LXVRDX
-;; in the case of power10), we use the machine independent code.  If we are
-;; loading up GPRs, we fall back to the old code.
-(define_insn_and_split "zero_extendditi2"
-  [(set (match_operand:TI 0 "register_operand"              "=r,r,r, wa,wa,wa")
-	(zero_extend:TI (match_operand:DI 1 "input_operand"  "r,m,wa,r, Z, wa")))
-   (clobber (match_scratch:DI 2                             "=X,X,X, X, X, &wa"))]
-  "TARGET_POWERPC64 && TARGET_POWER10"
-  "@
-   #
-   #
-   #
-   mtvsrdd %x0,0,%1
-   lxvrdx %x0,%y1
-   #"
-  "&& reload_completed
-   && (int_reg_operand (operands[0], TImode)
-       || vsx_register_operand (operands[1], DImode))"
-  [(pc)]
-{
-  rtx dest = operands[0];
-  rtx src = operands[1];
-  int dest_regno = reg_or_subregno (dest);
-
-  /* Handle conversion to GPR registers.  Load up the low part and then do
-     a sign extension to the upper part.  */
-  if (INT_REGNO_P (dest_regno))
-    {
-      rtx dest_hi = gen_highpart (DImode, dest);
-      rtx dest_lo = gen_lowpart (DImode, dest);
-
-      emit_move_insn (dest_lo, src);
-      emit_move_insn (dest_hi, const0_rtx);
-      DONE;
-    }
-
-  /* For settomg a VSX register from another VSX register, clear a scratch
-     register, and use XXPERMDI to shift the value into the lower 64-bits.  */
-  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
-  rtx zero = operands[2];
-
-  emit_move_insn (zero, const0_rtx);
-  if (BYTES_BIG_ENDIAN)
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, zero, src));
-  else
-    emit_insn (gen_vsx_concat_v2di (dest_v2di, src, zero));
-  DONE;
-}
-  [(set_attr "type"   "integer,load,mfvsr,vecmove,vecload,vecperm")
-   (set_attr "isa"    "*,      *,   *,    *,      p10,    *")
-   (set_attr "length" "8,      8,   8,    *,      *,      8")])
-
 ;; Sign extend DI to TI.  We provide both GPR targets and Altivec targets on
 ;; power10.  On earlier systems, the machine independent code will generate a
 ;; shift left to sign extend the 64-bit value to 128-bit.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5a252165aabe7df56202c33a90c22a30d4ea508e

commit 5a252165aabe7df56202c33a90c22a30d4ea508e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 29 19:35:18 2022 -0400

    Revert patch.
    
    2022-03-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/builtins-1.c: Update insn count.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6d781fee3576aa121054b4b156bb92020e8d8f59

commit 6d781fee3576aa121054b4b156bb92020e8d8f59
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 29 19:29:04 2022 -0400

    Revert patch.
    
    2022-03-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-31 14:00 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-31 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4b06b3b008d424f48bddb787c20897745383277e

commit 4b06b3b008d424f48bddb787c20897745383277e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 28 22:57:56 2022 -0400

    Revert patch.
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26226520335..1b75538f42f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4691,18 +4673,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-30 17:57 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-30 17:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4cb58fd9c112236b76047e2575b2503d3d18f15e

commit 4cb58fd9c112236b76047e2575b2503d3d18f15e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Mar 30 13:56:20 2022 -0400

    Revert patch.
    
    2022-03-29   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/builtins-1.c: Update insn count.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc            |  1 +
 gcc/config/rs6000/vsx.md                      | 19 +++++++-------
 gcc/testsuite/gcc.target/powerpc/builtins-1.c |  2 +-
 gcc/testsuite/gcc.target/powerpc/pr99293.c    | 36 ---------------------------
 4 files changed, 11 insertions(+), 47 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 82fa4bbbfc4..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -4675,18 +4676,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
index 98783668bce..28cd1aa6b1a 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c
@@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa)
 /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */
 /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */
 /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29 23:35 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29 23:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f89498c4aa4dbf9fcea45f7a754d4a438ac01a4e

commit f89498c4aa4dbf9fcea45f7a754d4a438ac01a4e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 29 19:35:18 2022 -0400

    Revert patch.
    
    2022-03-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/builtins-1.c: Update insn count.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29 23:29 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29 23:29 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b3b51cdf4e2c61d90703f6d173e945ca1bf1f375

commit b3b51cdf4e2c61d90703f6d173e945ca1bf1f375
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 29 19:29:04 2022 -0400

    Revert patch.
    
    2022-03-28   Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gcc(refs/users/meissner/heads/work084)] Revert patch.
@ 2022-03-29  2:58 Michael Meissner
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Meissner @ 2022-03-29  2:58 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3a929c2d268d2b9803eca3b396f638182d178076

commit 3a929c2d268d2b9803eca3b396f638182d178076
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Mar 28 22:57:56 2022 -0400

    Revert patch.
    
    gcc/
            PR target/99293
            Revert patch.
            * config/rs6000/rs6000-p8swap.cc (rtx_is_swappable_p): Remove
            UNSPEC_VSX_XXSPLTD case.
            * config/rs6000/vsx.md (UNSPEC_VSX_XXSPLTD): Delete.
            (vsx_xxspltd_<mode>): Rewrite to use VEC_DUPLICATE.
    
    gcc/testsuite:
            PR target/99293
            Revert patch.
            * gcc.target/powerpc/pr99293.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.cc         |  1 +
 gcc/config/rs6000/vsx.md                   | 38 +++++++-----------------------
 gcc/testsuite/gcc.target/powerpc/pr99293.c | 36 ----------------------------
 3 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 1973d9c8245..d301bc3fe59 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -805,6 +805,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VUPKLU_V4SF:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
+	  case UNSPEC_VSX_XXSPLTD:
 	    *special = SH_SPLAT;
 	    return 1;
 	  case UNSPEC_REDUC_PLUS:
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ab24186812..15bd86dfdfb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -296,6 +296,7 @@
    UNSPEC_VSX_XXPERM
 
    UNSPEC_VSX_XXSPLTW
+   UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
    UNSPEC_VSX_DIVUD
    UNSPEC_VSX_DIVSQ
@@ -3088,25 +3089,6 @@
 }
   [(set_attr "type" "vecperm")])
 
-;; Combiner patterns to allow creating XXPERMDI's to access either double
-;; word element in a vector register when used with VEC_DUPLICATE..
-(define_insn "*vsx_dup_<mode>_1"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
-}
-  [(set_attr "type" "vecperm")])
-
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -4694,18 +4676,16 @@
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-	(vec_duplicate:VSX_D
-	 (vec_select:<VS_scalar>
-	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
-	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "i")]))))]
+        (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa")
+	               (match_operand:QI 2 "u5bit_cint_operand" "i")]
+                      UNSPEC_VSX_XXSPLTD))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  HOST_WIDE_INT dword = INTVAL (operands[2]);
-  if (!BYTES_BIG_ENDIAN)
-    dword = !dword;
-
-  operands[3] = GEN_INT (3*dword);
-  return "xxpermdi %x0,%x1,%x1,%3";
+  if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0)
+      || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1))
+    return "xxpermdi %x0,%x1,%x1,0";
+  else
+    return "xxpermdi %x0,%x1,%x1,3";
 }
   [(set_attr "type" "vecperm")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c
deleted file mode 100644
index 03c22f8f4de..00000000000
--- a/gcc/testsuite/gcc.target/powerpc/pr99293.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* { dg-do compile } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mvsx" } */
-
-/* Test for PR 99263, which wants to do:
-   __builtin_vec_splats (__builtin_vec_extract (v, n))
-
-   where v is a V2DF or V2DI vector and n is either 0 or 1.  Previously the
-   compiler would do a direct move to the GPR registers to select the item and
-   a direct move from the GPR registers to do the splat.  */
-
-vector long long
-splat_dup_ll_0 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector long long
-splat_dup_ll_1 (vector long long v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-
-vector double
-splat_dup_d_0 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
-}
-	
-vector double
-splat_dup_d_1 (vector double v)
-{
-  return __builtin_vec_splats (__builtin_vec_extract (v, 1));
-}
-	
-/* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-04-02  2:24 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-31 14:00 [gcc(refs/users/meissner/heads/work084)] Revert patch Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2022-04-02  2:24 Michael Meissner
2022-04-02  0:03 Michael Meissner
2022-04-01 20:15 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-31 14:00 Michael Meissner
2022-03-30 17:57 Michael Meissner
2022-03-29 23:35 Michael Meissner
2022-03-29 23:29 Michael Meissner
2022-03-29  2:58 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).