public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/wschmidt/heads/sibfix)] lra: Canonicalize mult to shift in address reloads
@ 2020-08-28 13:43 William Schmidt
  0 siblings, 0 replies; only message in thread
From: William Schmidt @ 2020-08-28 13:43 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf

commit 6b3034eaba83935d9f6dfb20d2efbdb34b5b00bf
Author: Alex Coplan <alex.coplan@arm.com>
Date:   Thu Aug 27 09:49:57 2020 +0100

    lra: Canonicalize mult to shift in address reloads
    
    Inside a (mem) RTX, it is canonical to write multiplications by powers
    of two using a (mult) [0]. Outside of a (mem), the canonical way to
    write multiplications by powers of two is using (ashift).
    
    Now I observed that LRA does not quite respect this RTL canonicalization
    rule.  When compiling gcc/testsuite/gcc.dg/torture/pr34330.c with -Os
    -ftree-vectorize, the RTL in the dump "281r.ira" has the insn:
    
    (set (reg:SI 111)
         (mem:SI (plus:DI (mult:DI (reg:DI 101 [ ivtmp.9 ])
                     (const_int 4 [0x4]))
                 (reg/v/f:DI 105 [ b ]))))
    
    but LRA then proceeds to generate a reload, and we get the following
    non-canonical insn in "282r.reload":
    
    (set (reg:DI 7 x7 [121])
         (plus:DI (mult:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101])
                 (const_int 4 [0x4]))
             (reg/v/f:DI 1 x1 [orig:105 b ] [105])))
    
    This patch fixes LRA to ensure that we generate canonical RTL in this
    case. After the patch, we get the following insn in "282r.reload":
    
    (set (reg:DI 7 x7 [121])
            (plus:DI (ashift:DI (reg:DI 5 x5 [orig:101 ivtmp.9 ] [101])
                    (const_int 2 [0x2]))
                (reg/v/f:DI 1 x1 [orig:105 b ] [105])))
    
    [0] : https://gcc.gnu.org/onlinedocs/gccint/Insn-Canonicalizations.html
    
    gcc/ChangeLog:
    
            * lra-constraints.c (canonicalize_reload_addr): New.
            (curr_insn_transform): Use canonicalize_reload_addr to ensure we
            generate canonical RTL for an address reload.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/mem-shift-canonical.c: New test.

Diff:
---
 gcc/lra-constraints.c                              | 43 ++++++++++++++++++++--
 .../gcc.target/aarch64/mem-shift-canonical.c       | 27 ++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 421c453997b..580da9c3ed6 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -131,6 +131,7 @@
 #include "lra-int.h"
 #include "print-rtl.h"
 #include "function-abi.h"
+#include "rtl-iter.h"
 
 /* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current
    insn.  Remember that LRA_CURR_RELOAD_NUM is the number of emitted
@@ -570,6 +571,33 @@ init_curr_insn_input_reloads (void)
   curr_insn_input_reloads_num = 0;
 }
 
+/* The canonical form of an rtx inside a MEM is not necessarily the same as the
+   canonical form of the rtx outside the MEM.  Fix this up in the case that
+   we're reloading an address (and therefore pulling it outside a MEM).  */
+static rtx
+canonicalize_reload_addr (rtx addr)
+{
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, addr, NONCONST)
+    {
+      rtx x = *iter;
+      if (GET_CODE (x) == MULT && CONST_INT_P (XEXP (x, 1)))
+	{
+	  const HOST_WIDE_INT ci = INTVAL (XEXP (x, 1));
+	  const int pwr2 = exact_log2 (ci);
+	  if (pwr2 > 0)
+	    {
+	      /* Rewrite this to use a shift instead, which is canonical when
+		 outside of a MEM.  */
+	      PUT_CODE (x, ASHIFT);
+	      XEXP (x, 1) = GEN_INT (pwr2);
+	    }
+	}
+    }
+
+  return addr;
+}
+
 /* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
    created input reload pseudo (only if TYPE is not OP_OUT).  Don't
    reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
@@ -4362,12 +4390,19 @@ curr_insn_transform (bool check_only_p)
 	    {
 	      rtx addr = *loc;
 	      enum rtx_code code = GET_CODE (addr);
-	      
+	      bool align_p = false;
+
 	      if (code == AND && CONST_INT_P (XEXP (addr, 1)))
-		/* (and ... (const_int -X)) is used to align to X bytes.  */
-		addr = XEXP (*loc, 0);
+		{
+		  /* (and ... (const_int -X)) is used to align to X bytes.  */
+		  align_p = true;
+		  addr = XEXP (*loc, 0);
+		}
+	      else
+		addr = canonicalize_reload_addr (addr);
+
 	      lra_emit_move (new_reg, addr);
-	      if (addr != *loc)
+	      if (align_p)
 		emit_move_insn (new_reg, gen_rtx_AND (GET_MODE (new_reg), new_reg, XEXP (*loc, 1)));
 	    }
 	  before = get_insns ();
diff --git a/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c b/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
new file mode 100644
index 00000000000..36beed497a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
@@ -0,0 +1,27 @@
+/* This test is a copy of gcc.dg/torture/pr34330.c: here we are looking for
+   specific patterns being matched in the AArch64 backend.  */
+
+/* { dg-do compile } */
+/* { dg-options "-Os -ftree-vectorize -dp" } */
+
+
+struct T
+{
+  int t;
+  struct { short s1, s2, s3, s4; } *s;
+};
+
+void
+foo (int *a, int *b, int *c, int *d, struct T *e)
+{
+  int i;
+  for (i = 0; i < e->t; i++)
+    {
+      e->s[i].s1 = a[i];
+      e->s[i].s2 = b[i];
+      e->s[i].s3 = c[i];
+      e->s[i].s4 = d[i];
+    }
+}
+
+/* { dg-final { scan-assembler-times "add_lsl_di" 3 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-28 13:43 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-28 13:43 [gcc(refs/users/wschmidt/heads/sibfix)] lra: Canonicalize mult to shift in address reloads William Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).