public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-239] rs6000: Fix predicate for const vector in sldoi_to_mov [PR109069]
@ 2023-04-26  5:22 Kewen Lin
  0 siblings, 0 replies; only message in thread
From: Kewen Lin @ 2023-04-26  5:22 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fd75f6ae5625f087980ff4a7e76cc6284cfe5a3e

commit r14-239-gfd75f6ae5625f087980ff4a7e76cc6284cfe5a3e
Author: Kewen Lin <linkw@linux.ibm.com>
Date:   Wed Apr 26 00:21:05 2023 -0500

    rs6000: Fix predicate for const vector in sldoi_to_mov [PR109069]
    
    As PR109069 shows, commit r12-6537-g080a06fcb076b3 which
    introduces define_insn_and_split sldoi_to_mov adopts
    easy_vector_constant for const vector of interest, but it's
    wrong since predicate easy_vector_constant doesn't guarantee
    each byte in the const vector is the same.  One counter
    example is the const vector in pr109069-1.c.  This patch is
    to introduce new predicate const_vector_each_byte_same to
    ensure all bytes in the given const vector are the same by
    considering both int and float, meanwhile for the constants
    which don't meet easy_vector_constant we need to gen a move
    instead of just a set, and uses VECTOR_MEM_ALTIVEC_OR_VSX_P
    rather than VECTOR_UNIT_ALTIVEC_OR_VSX_P for V2DImode support
    under VSX since vector long long type of vec_sld is guarded
    under stanza vsx.
    
            PR target/109069
    
    gcc/ChangeLog:
    
            * config/rs6000/altivec.md (sldoi_to_mov<mode>): Replace predicate
            easy_vector_constant with const_vector_each_byte_same, add
            handlings in preparation for !easy_vector_constant, and update
            VECTOR_UNIT_ALTIVEC_OR_VSX_P with VECTOR_MEM_ALTIVEC_OR_VSX_P.
            * config/rs6000/predicates.md (const_vector_each_byte_same): New
            predicate.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/powerpc/pr109069-1.c: New test.
            * gcc.target/powerpc/pr109069-2-run.c: New test.
            * gcc.target/powerpc/pr109069-2.c: New test.
            * gcc.target/powerpc/pr109069-2.h: New test.

Diff:
---
 gcc/config/rs6000/altivec.md                      | 14 +++-
 gcc/config/rs6000/predicates.md                   | 37 ++++++++++
 gcc/testsuite/gcc.target/powerpc/pr109069-1.c     | 25 +++++++
 gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c | 50 ++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr109069-2.c     | 12 ++++
 gcc/testsuite/gcc.target/powerpc/pr109069-2.h     | 83 +++++++++++++++++++++++
 6 files changed, 218 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 49b0c964f4d..ad1224e0b57 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -385,14 +385,22 @@
 
 (define_insn_and_split "sldoi_to_mov<mode>"
   [(set (match_operand:VM 0 "altivec_register_operand")
-	(unspec:VM [(match_operand:VM 1 "easy_vector_constant")
+	(unspec:VM [(match_operand:VM 1 "const_vector_each_byte_same")
 	            (match_dup 1)
 		    (match_operand:QI 2 "u5bit_cint_operand")]
 		    UNSPEC_VSLDOI))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
+  "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
   "#"
   "&& 1"
-  [(set (match_dup 0) (match_dup 1))])
+  [(set (match_dup 0) (match_dup 1))]
+  "{
+     if (!easy_vector_constant (operands[1], <MODE>mode))
+       {
+	 rtx dest = gen_reg_rtx (<MODE>mode);
+	 emit_move_insn (dest, operands[1]);
+	 operands[1] = dest;
+       }
+  }")
 
 (define_insn "get_vrsave_internal"
   [(set (match_operand:SI 0 "register_operand" "=r")
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 52c65534e51..a16ee30f0c0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -798,6 +798,43 @@
 	    (and (match_test "easy_altivec_constant (op, mode)")
 		 (match_test "vspltis_shifted (op) != 0")))))
 
+;; Return true if this is a vector constant and each byte in
+;; it is the same.
+(define_predicate "const_vector_each_byte_same"
+  (match_code "const_vector")
+{
+  rtx elt;
+  if (!const_vec_duplicate_p (op, &elt))
+    return false;
+
+  machine_mode emode = GET_MODE_INNER (mode);
+  unsigned HOST_WIDE_INT eval;
+  if (CONST_INT_P (elt))
+    eval = INTVAL (elt);
+  else if (CONST_DOUBLE_AS_FLOAT_P (elt))
+    {
+      gcc_assert (emode == SFmode || emode == DFmode);
+      long l[2];
+      real_to_target (l, CONST_DOUBLE_REAL_VALUE (elt), emode);
+      /* real_to_target puts 32-bit pieces in each long.  */
+      eval = zext_hwi (l[0], 32);
+      eval |= zext_hwi (l[1], 32) << 32;
+    }
+  else
+    return false;
+
+  unsigned int esize = GET_MODE_SIZE (emode);
+  unsigned char byte0 = eval & 0xff;
+  for (unsigned int i = 1; i < esize; i++)
+    {
+      eval >>= BITS_PER_UNIT;
+      if (byte0 != (eval & 0xff))
+	return false;
+    }
+
+  return true;
+})
+
 ;; Return 1 if operand is a vector int register or is either a vector constant
 ;; of all 0 bits of a vector constant of all 1 bits.
 (define_predicate "vector_int_reg_or_same_bit"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-1.c b/gcc/testsuite/gcc.target/powerpc/pr109069-1.c
new file mode 100644
index 00000000000..eb4d73a1e66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-1.c
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-O2 -maltivec" } */
+
+/* Verify it run successfully.  */
+
+#include <altivec.h>
+
+__attribute__ ((noipa))
+vector signed int
+test ()
+{
+  vector signed int v = {-16, -16, -16, -16};
+  vector signed int res = vec_sld (v, v, 3);
+  return res;
+}
+
+int
+main ()
+{
+  vector signed int res = test ();
+  if (res[0] != 0xf0ffffff)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c b/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
new file mode 100644
index 00000000000..fad9fa5df73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2-run.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Verify it doesn't generate wrong code.  */
+
+#include "pr109069-2.h"
+
+int
+main ()
+{
+  vector unsigned char res1 = test1 ();
+  for (int i = 0; i < 16; i++)
+    if (res1[i] != 0xd)
+      __builtin_abort ();
+
+  vector signed short res2 = test2 ();
+  for (int i = 0; i < 8; i++)
+    if (res2[i] != 0x7777)
+      __builtin_abort ();
+
+  vector signed int res3 = test3 ();
+  vector unsigned int res4 = test4 ();
+  vector float res6 = test6 ();
+  for (int i = 0; i < 4; i++)
+    {
+      if (res3[i] != 0xbbbbbbbb)
+	__builtin_abort ();
+      if (res4[i] != 0x7070707)
+	__builtin_abort ();
+      U32b u;
+      u.f = res6[i];
+      if (u.i != 0x17171717)
+	__builtin_abort ();
+    }
+
+  vector unsigned long long res5 = test5 ();
+  vector double res7 = test7 ();
+  for (int i = 0; i < 2; i++)
+    {
+      if (res5[i] != 0x4545454545454545ll)
+	__builtin_abort ();
+      U64b u;
+      u.f = res7[i];
+      if (u.i != 0x5454545454545454ll)
+	__builtin_abort ();
+    }
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2.c b/gcc/testsuite/gcc.target/powerpc/pr109069-2.c
new file mode 100644
index 00000000000..e71bbeb5df5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* Disable rs6000 optimize_swaps as it drops some REG_EQUAL
+   notes on const vector and affects test point here.  */
+/* { dg-options "-O2 -mvsx -mno-optimize-swaps" } */
+
+/* Verify we can optimize away vector shifting if every byte
+   of vector is the same.  */
+
+#include "pr109069-2.h"
+
+/* { dg-final { scan-assembler-not {\mvsldoi\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr109069-2.h b/gcc/testsuite/gcc.target/powerpc/pr109069-2.h
new file mode 100644
index 00000000000..8b03bfb65fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr109069-2.h
@@ -0,0 +1,83 @@
+#include <altivec.h>
+
+typedef union
+{
+  unsigned int i;
+  float f;
+} U32b;
+
+typedef union
+{
+  unsigned long long i;
+  double f;
+} U64b;
+
+__attribute__ ((noipa))
+vector unsigned char
+test1 ()
+{
+  vector unsigned char v = {0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd,
+			    0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd};
+  vector unsigned char res = vec_sld (v, v, 3);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector signed short
+test2 ()
+{
+  vector signed short v
+    = {0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777, 0x7777};
+  vector signed short res = vec_sld (v, v, 5);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector signed int
+test3 ()
+{
+  vector signed int v = {0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb, 0xbbbbbbbb};
+  vector signed int res = vec_sld (v, v, 7);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned int
+test4 ()
+{
+  vector unsigned int v = {0x07070707, 0x07070707, 0x07070707, 0x07070707};
+  vector unsigned int res = vec_sld (v, v, 9);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector unsigned long long
+test5 ()
+{
+  vector unsigned long long v = {0x4545454545454545ll, 0x4545454545454545ll};
+  vector unsigned long long res = vec_sld (v, v, 10);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector float
+test6 ()
+{
+  U32b u;
+  u.i = 0x17171717;
+  vector float vf = {u.f, u.f, u.f, u.f};
+  vector float res = vec_sld (vf, vf, 11);
+  return res;
+}
+
+__attribute__ ((noipa))
+vector double
+test7 ()
+{
+  U64b u;
+  u.i = 0x5454545454545454ll;
+  vector double vf = {u.f, u.f};
+  vector double res = vec_sld (vf, vf, 13);
+  return res;
+}
+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-04-26  5:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-26  5:22 [gcc r14-239] rs6000: Fix predicate for const vector in sldoi_to_mov [PR109069] Kewen Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).