public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
@ 2010-11-01 21:28 Jakub Jelinek
  2010-11-02 20:36 ` Eric Botcazou
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2010-11-01 21:28 UTC (permalink / raw)
  To: gcc-patches

Hi!

The testcases below ICE because i0dest_in_i0src replacement of i0dest with
i0src happens multiple times within the i2pat, which sometimes leads to
self-referential rtl.
There are two issues, one is that the earlier
newpat = subst (newpat, i0dest, i0src, ...);
might have (but not necessarily) have changed i1src and so when i1dest
is first replaced with i1src that way modified and then i0dest is replaced
with i0src, the replacements are already wrong and as testcases show
self-referential.  The other issue is that if we are to apply more than
one substitution on i2pat and i0dest_in_i0src, then we need to pass
1 as last argument to the first subst in order to avoid unwanted
rtl sharing (which again can lead to self-referential rtl).
Another issue is that if all of i0_feeds_i2_n, i0_feeds_i1_n and
i1_feeds_i2_n is true, then we'd be substituting i0dest with i0src
in i2pat twice.

All should be fixed in the following patch, bootstrapped/regtested on
x86_64-linux and i686-linux.  Ok for trunk?

2010-11-01  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/46034
	PR rtl-optimization/46212
	PR rtl-optimization/46248
	* combine.c (try_combine): If added_sets_2 where i0dest_in_i0src
	and i0 feeds i1 and i1 feeds i2 or i0 feeds i2, make a copy of i1src
	before i0dest -> i0src substitution and pass 1 instead of 0 as last
	argument to subst on i2pat.

	* gcc.c-torture/compile/pr46034.c: New test.
	* gcc.c-torture/compile/pr46248.c: New test.
	* gcc.dg/pr46212.c: New test.

--- gcc/combine.c.jj	2010-11-01 09:07:24.000000000 +0100
+++ gcc/combine.c	2010-11-01 12:01:15.000000000 +0100
@@ -2502,6 +2502,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
+  rtx i1src_copy = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
   /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases.  */
@@ -3128,6 +3129,14 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	  return 0;
 	}
 
+      /* Following subst may modify i1src, make a copy of it
+	 before it is for added_sets_2 handling if needed.  */
+      if (added_sets_2
+	  && i0dest_in_i0src
+	  && i0_feeds_i1_n
+	  && (i1_feeds_i2_n || i0_feeds_i2_n))
+	i1src_copy = copy_rtx (i1src);
+
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
       newpat = subst (newpat, i0dest, i0src, 0,
@@ -3200,11 +3209,10 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
       if (added_sets_2)
 	{
 	  rtx t = i2pat;
-	  if (i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src, 0, 0);
 	  if (i1_feeds_i2_n)
-	    t = subst (t, i1dest, i1src, 0, 0);
-	  if (i0_feeds_i1_n && i1_feeds_i2_n)
+	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+		       i0_feeds_i1_n && i0dest_in_i0src);
+	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
--- gcc/testsuite/gcc.c-torture/compile/pr46034.c.jj	2010-11-01 12:14:56.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46034.c	2010-11-01 12:06:58.000000000 +0100
@@ -0,0 +1,14 @@
+/* PR rtl-optimization/46034 */
+
+void bar (int);
+
+void
+foo (int x, int y)
+{
+  int i;
+  for (i = 0; i < x; i++)
+    {
+      y = __builtin_abs (y);
+      bar (y / 2);
+    }
+}
--- gcc/testsuite/gcc.c-torture/compile/pr46248.c.jj	2010-11-01 12:14:59.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46248.c	2010-11-01 12:14:11.000000000 +0100
@@ -0,0 +1,32 @@
+/* PR rtl-optimization/46248 */
+
+struct S
+{
+  int s;
+};
+
+void
+foo (unsigned char *x, int y, struct S *z)
+{
+  const int l1 = y;
+  const int l2 = y + l1;
+  const int l3 = y + l2;
+  const int l4 = y + l3;
+  const int l5 = y + l4;
+  const int l6 = y + l5;
+  const int l7 = y + l6;
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      int a = x[l3] - x[l4];
+      int b = x[l4] - x[l5];
+      int c = x[l5] - x[l6];
+      int d = (b >= 0 ? b : -b) - (((a >= 0 ? a : -a) + (c >= 0 ? c : -c)) >> 1);
+      if (d < z->s * 2)
+	{
+	  int v = d * (-b > 0 ? 1 : -1);
+	  x[l2] += v >> 3;
+	  x[l7] -= v >> 3;
+	}
+    }
+}
--- gcc/testsuite/gcc.dg/pr46212.c.jj	2010-11-01 12:15:14.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46212.c	2010-11-01 12:17:55.000000000 +0100
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/46212 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops" } */
+/* { dg-options "-O3 -funroll-loops -march=i386" { target { { i686-*-* x86_64-*-* } && ilp32 } } } */
+
+static inline unsigned
+foo (void *x)
+{
+  unsigned y = *(volatile unsigned *) (x);
+  return (y >> 24) | ((y >> 8) & 0xff00) | ((y & 0xff00) << 8) | (y << 24);
+}
+
+void
+bar (void *x, void *y, int z)
+{
+  unsigned c;
+  while (z--)
+    {
+      c = foo (y);
+      *(unsigned *) x = (c & 0xf80000) >> 9 | (c & 0xf800) >> 6
+			| (c & 0xf8) >> 3 | (c & 0x80000000) >> 16;
+    }
+}

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
  2010-11-01 21:28 [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248) Jakub Jelinek
@ 2010-11-02 20:36 ` Eric Botcazou
  2010-11-02 20:51   ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Botcazou @ 2010-11-02 20:36 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1480 bytes --]

> There are two issues, one is that the earlier
> newpat = subst (newpat, i0dest, i0src, ...);
> might have (but not necessarily) have changed i1src and so when i1dest
> is first replaced with i1src that way modified and then i0dest is replaced
> with i0src, the replacements are already wrong and as testcases show
> self-referential.

FWIW I also debugged this (and spotted a pasto in the 4-insn combiner patch 
that I'll fix after your fixes, patch attached).  I also roughly came up with:

+      /* Following subst may modify i1src, make a copy of it
+        before it is for added_sets_2 handling if needed.  */
+      if (added_sets_2
+         && i0dest_in_i0src
+         && i0_feeds_i1_n
+         && (i1_feeds_i2_n || i0_feeds_i2_n))
+       i1src_copy = copy_rtx (i1src);

but why not just

 if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)

i.e. make a copy if substituting I0 will clobber I1SRC and I1SRC will be re- 
substituted in I2PAT?

> The other issue is that if we are to apply more than 
> one substitution on i2pat and i0dest_in_i0src, then we need to pass
> 1 as last argument to the first subst in order to avoid unwanted
> rtl sharing (which again can lead to self-referential rtl).
> Another issue is that if all of i0_feeds_i2_n, i0_feeds_i1_n and
> i1_feeds_i2_n is true, then we'd be substituting i0dest with i0src
> in i2pat twice.

This part looks OK to me.


	* combine.c (try_combine): Fix formatting issues and a pasto.


-- 
Eric Botcazou

[-- Attachment #2: pr46034_eb.diff --]
[-- Type: text/x-diff, Size: 3709 bytes --]

Index: combine.c
===================================================================
--- combine.c	(revision 166059)
+++ combine.c	(working copy)
@@ -3071,23 +3071,23 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	}
 
       n_occurrences = 0;		/* `subst' counts here */
-
-      /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a
-	 unique copy of I2SRC each time we substitute it to avoid
-	 self-referential rtl.  */
-
       subst_low_luid = DF_INSN_LUID (i2);
+
+      /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a unique
+	 copy of I2SRC each time we substitute it, in order to avoid creating
+	 self-referential RTL when we will be substituting I1SRC for I1DEST
+	 later.  Likewise if I0 feeds into I2 and I0DEST is in I0SRC.  */
       newpat = subst (PATTERN (i3), i2dest, i2src, 0,
-		      ((i1_feeds_i2_n && i1dest_in_i1src)
-		       || (i0_feeds_i2_n && i0dest_in_i0src)));
+		      (i1_feeds_i2_n && i1dest_in_i1src)
+		      || (i0_feeds_i2_n && i0dest_in_i0src));
       substed_i2 = 1;
 
-      /* Record whether i2's body now appears within i3's body.  */
+      /* Record whether I2's body now appears within I3's body.  */
       i2_is_used = n_occurrences;
     }
 
-  /* If we already got a failure, don't try to do more.  Otherwise,
-     try to substitute in I1 if we have it.  */
+  /* If we already got a failure, don't try to do more.  Otherwise, try to
+     substitute I1 if we have it.  */
 
   if (i1 && GET_CODE (newpat) != CLOBBER)
     {
@@ -3098,10 +3098,10 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	   && i1_feeds_i2_n
 	   && dead_or_set_p (i2, i1dest)
 	   && !reg_overlap_mentioned_p (i1dest, newpat))
-	  /* Before we can do this substitution, we must redo the test done
-	     above (see detailed comments there) that ensures  that I1DEST
-	     isn't mentioned in any SETs in NEWPAT that are field assignments.  */
-          || !combinable_i3pat (NULL_RTX, &newpat, i1dest, NULL_RTX, NULL_RTX,
+	   /* Before we can do this substitution, we must redo the test done
+	      above (see detailed comments there) that ensures I1DEST isn't
+	      mentioned in any SETs in NEWPAT that are field assignments.  */
+	  || !combinable_i3pat (NULL_RTX, &newpat, i1dest, NULL_RTX, NULL_RTX,
 				0, 0, 0))
 	{
 	  undo_all ();
@@ -3110,18 +3110,28 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i1);
+
+      /* If I0 feeds into I1 and I0DEST is in I0SRC, we need to make a unique
+	 copy of I1SRC each time we substitute it, in order to avoid creating
+	 self-referential RTL when we will be substituting I0SRC for I0DEST
+	 later.  */
       newpat = subst (newpat, i1dest, i1src, 0,
 		      i0_feeds_i1_n && i0dest_in_i0src);
       substed_i1 = 1;
+
+      /* Record whether I1's body now appears within I3's body.  */
       i1_is_used = n_occurrences;
     }
+
+  /* Likewise for I0 if we have it.  */
+
   if (i0 && GET_CODE (newpat) != CLOBBER)
     {
       if ((FIND_REG_INC_NOTE (i0, NULL_RTX) != 0
 	   && ((i0_feeds_i2_n && dead_or_set_p (i2, i0dest))
 	       || (i0_feeds_i1_n && dead_or_set_p (i1, i0dest)))
 	   && !reg_overlap_mentioned_p (i0dest, newpat))
-          || !combinable_i3pat (NULL_RTX, &newpat, i0dest, NULL_RTX, NULL_RTX,
+	  || !combinable_i3pat (NULL_RTX, &newpat, i0dest, NULL_RTX, NULL_RTX,
 				0, 0, 0))
 	{
 	  undo_all ();
@@ -3130,8 +3140,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
-      newpat = subst (newpat, i0dest, i0src, 0,
-		      i0_feeds_i1_n && i0dest_in_i0src);
+      newpat = subst (newpat, i0dest, i0src, 0, 0);
       substed_i0 = 1;
     }
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
  2010-11-02 20:36 ` Eric Botcazou
@ 2010-11-02 20:51   ` Jakub Jelinek
  2010-11-02 21:04     ` Eric Botcazou
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2010-11-02 20:51 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On Tue, Nov 02, 2010 at 09:27:29PM +0100, Eric Botcazou wrote:
> > There are two issues, one is that the earlier
> > newpat = subst (newpat, i0dest, i0src, ...);
> > might have (but not necessarily) have changed i1src and so when i1dest
> > is first replaced with i1src that way modified and then i0dest is replaced
> > with i0src, the replacements are already wrong and as testcases show
> > self-referential.
> 
> FWIW I also debugged this (and spotted a pasto in the 4-insn combiner patch 
> that I'll fix after your fixes, patch attached).  I also roughly came up with:
> 
> +      /* Following subst may modify i1src, make a copy of it
> +        before it is for added_sets_2 handling if needed.  */
> +      if (added_sets_2
> +         && i0dest_in_i0src
> +         && i0_feeds_i1_n
> +         && (i1_feeds_i2_n || i0_feeds_i2_n))
> +       i1src_copy = copy_rtx (i1src);
> 
> but why not just
> 
>  if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
> 
> i.e. make a copy if substituting I0 will clobber I1SRC and I1SRC will be re- 
> substituted in I2PAT?

I think you are right, if !i1_feeds_i2_n then the copy is not needed,
because it will not be used.  If !i0dest_in_i0src, I think
the copy is not strictily needed, because it shouldn't matter whether
i0dest is replaced with i0src just once or more than once, but if you
prefer the if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
condition, I can bootstrap/regtest it with that.

> 
> > The other issue is that if we are to apply more than 
> > one substitution on i2pat and i0dest_in_i0src, then we need to pass
> > 1 as last argument to the first subst in order to avoid unwanted
> > rtl sharing (which again can lead to self-referential rtl).
> > Another issue is that if all of i0_feeds_i2_n, i0_feeds_i1_n and
> > i1_feeds_i2_n is true, then we'd be substituting i0dest with i0src
> > in i2pat twice.
> 
> This part looks OK to me.
> 
> 
> 	* combine.c (try_combine): Fix formatting issues and a pasto.

Yeah, the

> -      newpat = subst (newpat, i0dest, i0src, 0,
> -                   i0_feeds_i1_n && i0dest_in_i0src);
> +      newpat = subst (newpat, i0dest, i0src, 0, 0);

is what I came across too and it surprised me, but I decided it
doesn't break anything, just is inefficient.  But you're right it is better
to fix it to make the code more readable.

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
  2010-11-02 20:51   ` Jakub Jelinek
@ 2010-11-02 21:04     ` Eric Botcazou
  2010-11-03  9:17       ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Botcazou @ 2010-11-02 21:04 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

> I think you are right, if !i1_feeds_i2_n then the copy is not needed,
> because it will not be used.  If !i0dest_in_i0src, I think
> the copy is not strictily needed, because it shouldn't matter whether
> i0dest is replaced with i0src just once or more than once, but if you
> prefer the if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
> condition, I can bootstrap/regtest it with that.

Yes, I think this is easier to understand that way.  OK with this change if it 
successfully completes the testing cycle.

> > 	* combine.c (try_combine): Fix formatting issues and a pasto.
>
> Yeah, the
>
> > -      newpat = subst (newpat, i0dest, i0src, 0,
> > -                   i0_feeds_i1_n && i0dest_in_i0src);
> > +      newpat = subst (newpat, i0dest, i0src, 0, 0);
>
> is what I came across too and it surprised me, but I decided it
> doesn't break anything, just is inefficient.  But you're right it is better
> to fix it to make the code more readable.

My opinion as well, the pasto can confuse the reader (it did with me :-)

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
  2010-11-02 21:04     ` Eric Botcazou
@ 2010-11-03  9:17       ` Jakub Jelinek
  2010-11-03 18:03         ` Eric Botcazou
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2010-11-03  9:17 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On Tue, Nov 02, 2010 at 10:01:32PM +0100, Eric Botcazou wrote:
> > I think you are right, if !i1_feeds_i2_n then the copy is not needed,
> > because it will not be used.  If !i0dest_in_i0src, I think
> > the copy is not strictily needed, because it shouldn't matter whether
> > i0dest is replaced with i0src just once or more than once, but if you
> > prefer the if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
> > condition, I can bootstrap/regtest it with that.
> 
> Yes, I think this is easier to understand that way.  OK with this change if it 
> successfully completes the testing cycle.

Here is what I have committed after bootstrap/regtest.  Thanks.

2010-11-03  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/46034
	PR rtl-optimization/46212
	PR rtl-optimization/46248
	* combine.c (try_combine): If added_sets_2 where i0dest_in_i0src
	and i0 feeds i1 and i1 feeds i2 or i0 feeds i2, make a copy of i1src
	before i0dest -> i0src substitution and pass 1 instead of 0 as last
	argument to subst on i2pat.

	* gcc.c-torture/compile/pr46034.c: New test.
	* gcc.c-torture/compile/pr46248.c: New test.
	* gcc.dg/pr46212.c: New test.

--- gcc/combine.c.jj	2010-11-01 09:07:24.000000000 +0100
+++ gcc/combine.c	2010-11-01 12:01:15.000000000 +0100
@@ -2502,6 +2502,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
+  rtx i1src_copy = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
   /* The PATTERNs of I0, I1, and I2, or a copy of them in certain cases.  */
@@ -3128,6 +3129,11 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	  return 0;
 	}
 
+      /* Following subst may modify i1src, make a copy of it
+	 before it is for added_sets_2 handling if needed.  */
+      if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
+	i1src_copy = copy_rtx (i1src);
+
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
       newpat = subst (newpat, i0dest, i0src, 0,
@@ -3200,11 +3206,10 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
       if (added_sets_2)
 	{
 	  rtx t = i2pat;
-	  if (i0_feeds_i2_n)
-	    t = subst (t, i0dest, i0src, 0, 0);
 	  if (i1_feeds_i2_n)
-	    t = subst (t, i1dest, i1src, 0, 0);
-	  if (i0_feeds_i1_n && i1_feeds_i2_n)
+	    t = subst (t, i1dest, i1src_copy ? i1src_copy : i1src, 0,
+		       i0_feeds_i1_n && i0dest_in_i0src);
+	  if ((i0_feeds_i1_n && i1_feeds_i2_n) || i0_feeds_i2_n)
 	    t = subst (t, i0dest, i0src, 0, 0);
 
 	  XVECEXP (newpat, 0, --total_sets) = t;
--- gcc/testsuite/gcc.c-torture/compile/pr46034.c.jj	2010-11-01 12:14:56.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46034.c	2010-11-01 12:06:58.000000000 +0100
@@ -0,0 +1,14 @@
+/* PR rtl-optimization/46034 */
+
+void bar (int);
+
+void
+foo (int x, int y)
+{
+  int i;
+  for (i = 0; i < x; i++)
+    {
+      y = __builtin_abs (y);
+      bar (y / 2);
+    }
+}
--- gcc/testsuite/gcc.c-torture/compile/pr46248.c.jj	2010-11-01 12:14:59.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr46248.c	2010-11-01 12:14:11.000000000 +0100
@@ -0,0 +1,32 @@
+/* PR rtl-optimization/46248 */
+
+struct S
+{
+  int s;
+};
+
+void
+foo (unsigned char *x, int y, struct S *z)
+{
+  const int l1 = y;
+  const int l2 = y + l1;
+  const int l3 = y + l2;
+  const int l4 = y + l3;
+  const int l5 = y + l4;
+  const int l6 = y + l5;
+  const int l7 = y + l6;
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      int a = x[l3] - x[l4];
+      int b = x[l4] - x[l5];
+      int c = x[l5] - x[l6];
+      int d = (b >= 0 ? b : -b) - (((a >= 0 ? a : -a) + (c >= 0 ? c : -c)) >> 1);
+      if (d < z->s * 2)
+	{
+	  int v = d * (-b > 0 ? 1 : -1);
+	  x[l2] += v >> 3;
+	  x[l7] -= v >> 3;
+	}
+    }
+}
--- gcc/testsuite/gcc.dg/pr46212.c.jj	2010-11-01 12:15:14.000000000 +0100
+++ gcc/testsuite/gcc.dg/pr46212.c	2010-11-01 12:17:55.000000000 +0100
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/46212 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops" } */
+/* { dg-options "-O3 -funroll-loops -march=i386" { target { { i686-*-* x86_64-*-* } && ilp32 } } } */
+
+static inline unsigned
+foo (void *x)
+{
+  unsigned y = *(volatile unsigned *) (x);
+  return (y >> 24) | ((y >> 8) & 0xff00) | ((y & 0xff00) << 8) | (y << 24);
+}
+
+void
+bar (void *x, void *y, int z)
+{
+  unsigned c;
+  while (z--)
+    {
+      c = foo (y);
+      *(unsigned *) x = (c & 0xf80000) >> 9 | (c & 0xf800) >> 6
+			| (c & 0xf8) >> 3 | (c & 0x80000000) >> 16;
+    }
+}

	Jakub

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248)
  2010-11-03  9:17       ` Jakub Jelinek
@ 2010-11-03 18:03         ` Eric Botcazou
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Botcazou @ 2010-11-03 18:03 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 302 bytes --]

> Here is what I have committed after bootstrap/regtest.

Thanks.  Here's what I have installed on top of your patch, after retesting.


2010-11-03  Eric Botcazou  <ebotcazou@adacore.com>

        * combine.c (try_combine): Fix formatting issues, improve comments and
	fix a pasto.


-- 
Eric Botcazou

[-- Attachment #2: p.diff --]
[-- Type: text/x-diff, Size: 4391 bytes --]

Index: combine.c
===================================================================
--- combine.c	(revision 166232)
+++ combine.c	(working copy)
@@ -2502,6 +2502,7 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
   rtx i3dest_killed = 0;
   /* SET_DEST and SET_SRC of I2, I1 and I0.  */
   rtx i2dest = 0, i2src = 0, i1dest = 0, i1src = 0, i0dest = 0, i0src = 0;
+  /* Copy of SET_SRC of I1, if needed.  */
   rtx i1src_copy = 0;
   /* Set if I2DEST was reused as a scratch register.  */
   bool i2scratch = false;
@@ -3072,23 +3073,23 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	}
 
       n_occurrences = 0;		/* `subst' counts here */
-
-      /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a
-	 unique copy of I2SRC each time we substitute it to avoid
-	 self-referential rtl.  */
-
       subst_low_luid = DF_INSN_LUID (i2);
+
+      /* If I1 feeds into I2 and I1DEST is in I1SRC, we need to make a unique
+	 copy of I2SRC each time we substitute it, in order to avoid creating
+	 self-referential RTL when we will be substituting I1SRC for I1DEST
+	 later.  Likewise if I0 feeds into I2 and I0DEST is in I0SRC.  */
       newpat = subst (PATTERN (i3), i2dest, i2src, 0,
-		      ((i1_feeds_i2_n && i1dest_in_i1src)
-		       || (i0_feeds_i2_n && i0dest_in_i0src)));
+		      (i1_feeds_i2_n && i1dest_in_i1src)
+		      || (i0_feeds_i2_n && i0dest_in_i0src));
       substed_i2 = 1;
 
-      /* Record whether i2's body now appears within i3's body.  */
+      /* Record whether I2's body now appears within I3's body.  */
       i2_is_used = n_occurrences;
     }
 
-  /* If we already got a failure, don't try to do more.  Otherwise,
-     try to substitute in I1 if we have it.  */
+  /* If we already got a failure, don't try to do more.  Otherwise, try to
+     substitute I1 if we have it.  */
 
   if (i1 && GET_CODE (newpat) != CLOBBER)
     {
@@ -3099,10 +3100,10 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 	   && i1_feeds_i2_n
 	   && dead_or_set_p (i2, i1dest)
 	   && !reg_overlap_mentioned_p (i1dest, newpat))
-	  /* Before we can do this substitution, we must redo the test done
-	     above (see detailed comments there) that ensures  that I1DEST
-	     isn't mentioned in any SETs in NEWPAT that are field assignments.  */
-          || !combinable_i3pat (NULL_RTX, &newpat, i1dest, NULL_RTX, NULL_RTX,
+	   /* Before we can do this substitution, we must redo the test done
+	      above (see detailed comments there) that ensures I1DEST isn't
+	      mentioned in any SETs in NEWPAT that are field assignments.  */
+	  || !combinable_i3pat (NULL_RTX, &newpat, i1dest, NULL_RTX, NULL_RTX,
 				0, 0, 0))
 	{
 	  undo_all ();
@@ -3111,33 +3112,42 @@ try_combine (rtx i3, rtx i2, rtx i1, rtx
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i1);
+
+      /* If I0 feeds into I1 and I0DEST is in I0SRC, we need to make a unique
+	 copy of I1SRC each time we substitute it, in order to avoid creating
+	 self-referential RTL when we will be substituting I0SRC for I0DEST
+	 later.  */
       newpat = subst (newpat, i1dest, i1src, 0,
 		      i0_feeds_i1_n && i0dest_in_i0src);
       substed_i1 = 1;
+
+      /* Record whether I1's body now appears within I3's body.  */
       i1_is_used = n_occurrences;
     }
+
+  /* Likewise for I0 if we have it.  */
+
   if (i0 && GET_CODE (newpat) != CLOBBER)
     {
       if ((FIND_REG_INC_NOTE (i0, NULL_RTX) != 0
 	   && ((i0_feeds_i2_n && dead_or_set_p (i2, i0dest))
 	       || (i0_feeds_i1_n && dead_or_set_p (i1, i0dest)))
 	   && !reg_overlap_mentioned_p (i0dest, newpat))
-          || !combinable_i3pat (NULL_RTX, &newpat, i0dest, NULL_RTX, NULL_RTX,
+	  || !combinable_i3pat (NULL_RTX, &newpat, i0dest, NULL_RTX, NULL_RTX,
 				0, 0, 0))
 	{
 	  undo_all ();
 	  return 0;
 	}
 
-      /* Following subst may modify i1src, make a copy of it
-	 before it is for added_sets_2 handling if needed.  */
+      /* If the following substitution will modify I1SRC, make a copy of it
+	 for the case where it is substituted for I1DEST in I2PAT later.  */
       if (i0_feeds_i1_n && added_sets_2 && i1_feeds_i2_n)
 	i1src_copy = copy_rtx (i1src);
 
       n_occurrences = 0;
       subst_low_luid = DF_INSN_LUID (i0);
-      newpat = subst (newpat, i0dest, i0src, 0,
-		      i0_feeds_i1_n && i0dest_in_i0src);
+      newpat = subst (newpat, i0dest, i0src, 0, 0);
       substed_i0 = 1;
     }
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-11-03 17:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-01 21:28 [PATCH] Fix combiner (PRs rtl-optimization/46034, rtl-optimization/46212, rtl-optimization/46248) Jakub Jelinek
2010-11-02 20:36 ` Eric Botcazou
2010-11-02 20:51   ` Jakub Jelinek
2010-11-02 21:04     ` Eric Botcazou
2010-11-03  9:17       ` Jakub Jelinek
2010-11-03 18:03         ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).