public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r11-3217] rs6000: Remove useless insns fed into lvx/stvx [PR97019]
@ 2020-09-16  5:23 Kewen Lin
  0 siblings, 0 replies; only message in thread
From: Kewen Lin @ 2020-09-16  5:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e0d98ffd14487b7105de3d151122144086faa1fa

commit r11-3217-ge0d98ffd14487b7105de3d151122144086faa1fa
Author: Kewen Lin <linkw@linux.ibm.com>
Date:   Tue Sep 15 22:32:55 2020 -0500

    rs6000: Remove useless insns fed into lvx/stvx [PR97019]
    
    This patch is to extend the existing function find_alignment_op to
    check all defintions of base_reg are AND operations with mask -16B
    to force the alignment.  If all are satifised, it passes all AND
    operations and instructions to function recombine_lvx_pattern
    and recombine_stvx_pattern, they can remove all useless ANDs
    further.
    
    Bootstrapped/regtested on powerpc64le-linux-gnu P8.
    
    gcc/ChangeLog:
    
            PR target/97019
            * config/rs6000/rs6000-p8swap.c (find_alignment_op): Adjust to
            support multiple defintions which are all AND operations with
            the mask -16B.
            (recombine_lvx_pattern): Adjust to handle multiple AND operations
            from find_alignment_op.
            (recombine_stvx_pattern): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            PR target/97019
            * gcc.target/powerpc/pr97019.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000-p8swap.c          | 147 ++++++++++++++++++-----------
 gcc/testsuite/gcc.target/powerpc/pr97019.c |  83 ++++++++++++++++
 2 files changed, 176 insertions(+), 54 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index 3d5dc7d8aae..fff1b08afa6 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -2095,11 +2095,15 @@ alignment_mask (rtx_insn *insn)
   return alignment_with_canonical_addr (SET_SRC (body));
 }
 
-/* Given INSN that's a load or store based at BASE_REG, look for a
-   feeding computation that aligns its address on a 16-byte boundary.
-   Return the rtx and its containing AND_INSN.  */
-static rtx
-find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
+/* Given INSN that's a load or store based at BASE_REG, check if
+   all of its feeding computations align its address on a 16-byte
+   boundary.  If so, return true and add all definition insns into
+   AND_INSNS and their corresponding fully-expanded rtxes for the
+   masking operations into AND_OPS.  */
+
+static bool
+find_alignment_op (rtx_insn *insn, rtx base_reg, vec<rtx_insn *> *and_insns,
+		   vec<rtx> *and_ops)
 {
   df_ref base_use;
   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@@ -2111,19 +2115,28 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
 	continue;
 
       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
-      if (!base_def_link || base_def_link->next)
-	break;
+      if (!base_def_link)
+	return false;
 
-      /* With stack-protector code enabled, and possibly in other
-	 circumstances, there may not be an associated insn for 
-	 the def.  */
-      if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
-	break;
+      while (base_def_link)
+	{
+	  /* With stack-protector code enabled, and possibly in other
+	     circumstances, there may not be an associated insn for
+	     the def.  */
+	  if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
+	    return false;
 
-      *and_insn = DF_REF_INSN (base_def_link->ref);
-      and_operation = alignment_mask (*and_insn);
-      if (and_operation != 0)
-	break;
+	  rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
+	  and_operation = alignment_mask (and_insn);
+
+	  /* Stop if we find any one which doesn't align.  */
+	  if (!and_operation)
+	    return false;
+
+	  and_insns->safe_push (and_insn);
+	  and_ops->safe_push (and_operation);
+	  base_def_link = base_def_link->next;
+	}
     }
 
   return and_operation;
@@ -2143,11 +2156,14 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
   rtx mem = XEXP (SET_SRC (body), 0);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<rtx_insn *> and_insns;
+  auto_vec<rtx> and_ops;
+  bool is_any_def_and
+    = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
 
-  if (and_operation != 0)
+  if (is_any_def_and)
     {
+      gcc_assert (and_insns.length () == and_ops.length ());
       df_ref def;
       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
       FOR_EACH_INSN_INFO_DEF (def, insn_info)
@@ -2168,25 +2184,35 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete)
 	  to_delete[INSN_UID (swap_insn)].replace = true;
 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-	  /* However, first we must be sure that we make the
-	     base register from the AND operation available
-	     in case the register has been overwritten.  Copy
-	     the base register to a new pseudo and use that
-	     as the base register of the AND operation in
-	     the new LVX instruction.  */
-	  rtx and_base = XEXP (and_operation, 0);
-	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-	  rtx copy = gen_rtx_SET (new_reg, and_base);
-	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-	  df_insn_rescan (new_insn);
-
-	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-				       XEXP (and_operation, 1));
+	  rtx new_reg = 0;
+	  rtx and_mask = 0;
+	  for (unsigned i = 0; i < and_insns.length (); i++)
+	    {
+	      /* However, first we must be sure that we make the
+		 base register from the AND operation available
+		 in case the register has been overwritten.  Copy
+		 the base register to a new pseudo and use that
+		 as the base register of the AND operation in
+		 the new LVX instruction.  */
+	      rtx_insn *and_insn = and_insns[i];
+	      rtx and_op = and_ops[i];
+	      rtx and_base = XEXP (and_op, 0);
+	      if (!new_reg)
+		{
+		  new_reg = gen_reg_rtx (GET_MODE (and_base));
+		  and_mask = XEXP (and_op, 1);
+		}
+	      rtx copy = gen_rtx_SET (new_reg, and_base);
+	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+	      df_insn_rescan (new_insn);
+	    }
+
+	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
 	  SET_SRC (body) = mem;
 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
 	  df_insn_rescan (insn);
-		  
+
 	  if (dump_file)
 	    fprintf (dump_file, "lvx opportunity found at %d\n",
 		     INSN_UID (insn));
@@ -2205,11 +2231,14 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
   rtx mem = SET_DEST (body);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<rtx_insn *> and_insns;
+  auto_vec<rtx> and_ops;
+  bool is_any_def_and
+    = find_alignment_op (insn, base_reg, &and_insns, &and_ops);
 
-  if (and_operation != 0)
+  if (is_any_def_and)
     {
+      gcc_assert (and_insns.length () == and_ops.length ());
       rtx src_reg = XEXP (SET_SRC (body), 0);
       df_ref src_use;
       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@@ -2234,25 +2263,35 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete)
 	  to_delete[INSN_UID (swap_insn)].replace = true;
 	  to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-	  /* However, first we must be sure that we make the
-	     base register from the AND operation available
-	     in case the register has been overwritten.  Copy
-	     the base register to a new pseudo and use that
-	     as the base register of the AND operation in
-	     the new STVX instruction.  */
-	  rtx and_base = XEXP (and_operation, 0);
-	  rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-	  rtx copy = gen_rtx_SET (new_reg, and_base);
-	  rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-	  set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-	  df_insn_rescan (new_insn);
-
-	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-				       XEXP (and_operation, 1));
+	  rtx new_reg = 0;
+	  rtx and_mask = 0;
+	  for (unsigned i = 0; i < and_insns.length (); i++)
+	    {
+	      /* However, first we must be sure that we make the
+		 base register from the AND operation available
+		 in case the register has been overwritten.  Copy
+		 the base register to a new pseudo and use that
+		 as the base register of the AND operation in
+		 the new STVX instruction.  */
+	      rtx_insn *and_insn = and_insns[i];
+	      rtx and_op = and_ops[i];
+	      rtx and_base = XEXP (and_op, 0);
+	      if (!new_reg)
+		{
+		  new_reg = gen_reg_rtx (GET_MODE (and_base));
+		  and_mask = XEXP (and_op, 1);
+		}
+	      rtx copy = gen_rtx_SET (new_reg, and_base);
+	      rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+	      set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+	      df_insn_rescan (new_insn);
+	    }
+
+	  XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, and_mask);
 	  SET_SRC (body) = src_reg;
 	  INSN_CODE (insn) = -1; /* Force re-recognition.  */
 	  df_insn_rescan (insn);
-		  
+
 	  if (dump_file)
 	    fprintf (dump_file, "stvx opportunity found at %d\n",
 		     INSN_UID (insn));
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97019.c b/gcc/testsuite/gcc.target/powerpc/pr97019.c
new file mode 100644
index 00000000000..81b1bda320f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
@@ -0,0 +1,83 @@
+/* This issue can only exist on little-endian P8 targets, since
+   the built-in functions vec_ld/vec_st can use lxvd2x/stxvd2x
+   (P8 big-endian) or lxv/stxv (P9 and later) for some cases,
+   those rldicr instructions fed to them are necessary.  */
+/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* Test there are no useless instructions "rldicr x,y,0,59"
+   to align the addresses for lvx/stvx.  */
+
+extern int a, b, c;
+extern vector unsigned long long ev5, ev6, ev7, ev8;
+extern int dummy (vector unsigned long long);
+
+int test_vec_ld(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4, v9;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      v1 = __builtin_vec_ld(16, (unsigned long long *)e);
+      v2 = __builtin_vec_ld(32, (unsigned long long *)e);
+      v3 = __builtin_vec_ld(48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    v5 = __builtin_vec_ld(16, (unsigned long long *)e);
+    v6 = __builtin_vec_ld(32, (unsigned long long *)e);
+    v7 = __builtin_vec_ld(48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+int test_vec_st(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      __builtin_vec_st(v1, 16, (unsigned long long *)e);
+      __builtin_vec_st(v2, 32, (unsigned long long *)e);
+      __builtin_vec_st(v3, 48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    __builtin_vec_st(v5, 16, (unsigned long long *)e);
+    __builtin_vec_st(v6, 32, (unsigned long long *)e);
+    __builtin_vec_st(v7, 48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+/* { dg-final { scan-assembler-not {(?n)rldicr.*,0,59} } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-09-16  5:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-16  5:23 [gcc r11-3217] rs6000: Remove useless insns fed into lvx/stvx [PR97019] Kewen Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).