public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-30  3:08 HAO CHEN GUI
  2020-12-11  2:14 ` HAO CHEN GUI
  2021-01-15 19:01 ` Segher Boessenkool
  0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-30  3:08 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 588 bytes --]

Hi,

   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
combine. In the patch, newpat is split twice. The newpat, newi2pat and 
newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is done 
at the end where all former attempts fail. In 4 insn combine pre-check, 
the zero and sign extend are added as the patch is for the issue 1 
listed in pr65010.

   The attachments are the patch diff file and change log file.

   Bootstrapped and tested on powerpc64le, ARM and x86 with no 
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.


[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]

	* combine.c (combine_validate_cost): Add an argument for newi1pat.
	(try_combine): Add a 4-insn combine pattern for optimizing rtx
	sign_extend (op:zero_extend, zero_extend).

[-- Attachment #3: patch-1.diff --]
[-- Type: text/plain, Size: 10702 bytes --]

diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..1a7c508217e 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-		       rtx newpat, rtx newi2pat, rtx newotherpat)
+		       rtx newpat, rtx newi2pat, rtx newotherpat,
+		       rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+	tmp = PATTERN (i1);
+	PATTERN (i1) = newi1pat;
+	tmpi = INSN_CODE (i1);
+	INSN_CODE (i1) = -1;
+	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+	PATTERN (i1) = tmp;
+	INSN_CODE (i1) = tmpi;
+	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
 	fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
 	fprintf (dump_file, "replacement costs %d + %d = %d\n",
 		 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+	INSN_COST (i1) = new_i1_cost;
+      else
+	INSN_COST (i1) = 0;
       if (i0)
 	INSN_COST (i0) = 0;
     }
@@ -2671,8 +2692,8 @@ static rtx_insn *
 try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
-  /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  /* New patterns for I3, I2 and I1, respectively.  */
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
-  int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  /* INSN_CODEs for new I3, new I2, new I1, and user of condition code.  */
+  int insn_code_number, i2_code_number = 0, i1_code_number = 0;
+  int other_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2702,7 +2724,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int i2dest_killed = 0, i1dest_killed = 0, i0dest_killed = 0;
   int i1_feeds_i2_n = 0, i0_feeds_i2_n = 0, i0_feeds_i1_n = 0;
   /* Notes that must be added to REG_NOTES in I3 and I2.  */
-  rtx new_i3_notes, new_i2_notes;
+  rtx new_i3_notes, new_i2_notes, new_i1_notes;
   /* Notes that we substituted I3 into I2 instead of the normal case.  */
   int i3_subst_into_i2 = 0;
   /* Notes that I1, I2 or I3 is a MULT operation.  */
@@ -2735,6 +2757,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       int i;
       int ngood = 0;
       int nshift = 0;
+      int nextend = 0;
       rtx set0, set3;
 
       if (!flag_expensive_optimizations)
@@ -2758,6 +2781,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
 		   || GET_CODE (src) == LSHIFTRT)
 	    nshift++;
+	  else if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+	    nextend++;
 	}
 
       /* If I0 loads a memory and I3 sets the same memory, then I1 and I2
@@ -2787,7 +2812,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  && rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
 	ngood += 2;
 
-      if (ngood < 2 && nshift < 2)
+      if (ngood < 2 && nshift < 2 && nextend < 2)
 	return 0;
     }
 
@@ -3399,6 +3424,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
 
+	  if (i0)
+	    {
+	      subst_low_luid = DF_INSN_LUID (i0);
+	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+	    }
+
 	  subst_low_luid = DF_INSN_LUID (i2);
 	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
@@ -3991,6 +4022,61 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     don't use one now.  */
 	  if (i2_code_number >= 0 && ! (split_code == MULT && ! have_mult))
 	    insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
+
+	  if (i0 && i1
+	      && GET_CODE (newpat) == SET
+	      && BINARY_P (SET_SRC (newpat))
+	      && insn_code_number < 0
+	      && i2_code_number >= 0)
+	    {
+	      newi1pat = NULL_RTX;
+	      rtx newi1dest;
+	      machine_mode new_mode;
+
+	      if ((split = find_split_point (&newpat, i3, false)) != 0
+		  && (!HAVE_cc0 || REG_P (i1dest))
+		  && (GET_MODE (*split) == GET_MODE (i1dest)
+		      || GET_MODE (*split) == VOIDmode
+		      || can_change_dest_mode (i1dest, added_sets_1,
+					       GET_MODE (*split)))
+		  && !reg_referenced_p (i1dest, newpat)
+		  && !modified_between_p (*split, i1, i3))
+		{
+		  new_mode = GET_MODE (*split);
+		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+		    newi1dest = gen_rtx_REG (new_mode, REGNO (i1dest));
+		  else
+		    {
+		      SUBST_MODE (regno_reg_rtx[REGNO (i1dest)], new_mode);
+		      newi1dest = regno_reg_rtx[REGNO (i1dest)];
+		    }
+		  newi1pat =  gen_rtx_SET (newi1dest, *split);
+		  SUBST (*split, newi1dest);
+
+		  i1_code_number = recog_for_combine (&newi1pat, i2,
+						      &new_i1_notes);
+
+		  if (i1_code_number < 0)
+		    {
+		      undo_all ();
+		      return 0;
+		    }
+		  else
+		    {
+		      /* swap newi1pat and newi2pat if dest of newi2pat is set
+			 in newi1pat.  */
+		      if (reg_referenced_p (newdest, newi1pat))
+			{
+			    std::swap (i1dest, i2dest);
+			    std::swap (newi1pat, newi2pat);
+			    std::swap (i1_code_number, i2_code_number);
+			}
+
+		      insn_code_number = recog_for_combine (&newpat, i3,
+							    &new_i3_notes);
+		    }
+		}
+	    }
 	}
     }
 
@@ -4209,11 +4295,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+				newi1pat))
+      {
+	undo_all ();
+	return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4329,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 		/* Put back the new mode.  */
 		adjust_reg_mode (reg, new_mode);
 	      }
+	    else if (reg == i1dest)
+	      {
+		propagate_for_debug (i1, last_combined_insn, reg, i1src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
 	    else
 	      {
 		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4446,6 +4539,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     reset_used_flags (i0notes);
     reset_used_flags (newpat);
     reset_used_flags (newi2pat);
+    reset_used_flags (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4455,6 +4549,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     i0notes = copy_rtx_if_shared (i0notes);
     newpat = copy_rtx_if_shared (newpat);
     newi2pat = copy_rtx_if_shared (newi2pat);
+    newi1pat = copy_rtx_if_shared (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4552,10 +4647,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       {
 	LOG_LINKS (i1) = NULL;
 	REG_NOTES (i1) = 0;
-	if (MAY_HAVE_DEBUG_BIND_INSNS)
-	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-			       this_basic_block);
-	SET_INSN_DELETED (i1);
+	if (newi1pat)
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				   this_basic_block);
+	    INSN_CODE (i1) = i1_code_number;
+	    PATTERN (i1) = newi1pat;
+	  }
+	else
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				 this_basic_block);
+	    SET_INSN_DELETED (i1);
+	  }
       }
 
     if (i0)
@@ -4579,6 +4685,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       from_luid = DF_INSN_LUID (i1);
     else
       from_luid = DF_INSN_LUID (i2);
+    if (newi1pat)
+      move_deaths (newi1pat, NULL_RTX, from_luid, i1, &midnotes);
     if (newi2pat)
       move_deaths (newi2pat, NULL_RTX, from_luid, i2, &midnotes);
     move_deaths (newpat, newi2pat, from_luid, i3, &midnotes);
@@ -4604,6 +4712,10 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
        know these are REG_UNUSED and want them to go to the desired insn,
        so we always pass it as i3.  */
 
+    if (newi1pat && new_i1_notes)
+      distribute_notes (new_i1_notes, i1, i1, NULL, NULL_RTX, NULL_RTX,
+			NULL_RTX);
+
     if (newi2pat && new_i2_notes)
       distribute_notes (new_i2_notes, i2, i2, NULL, NULL_RTX, NULL_RTX,
 			NULL_RTX);
@@ -4738,6 +4850,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     /* Update reg_stat[].nonzero_bits et al for any changes that may have
        been made to this insn.  The order is important, because newi2pat
        can affect nonzero_bits of newpat.  */
+    if (newi1pat)
+      note_pattern_stores (newi1pat, set_nonzero_bits_and_sign_copies, NULL);
     if (newi2pat)
       note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL);
     note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-30  3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
@ 2020-12-11  2:14 ` HAO CHEN GUI
  2021-01-04  2:03   ` HAO CHEN GUI
  2021-01-15 19:01 ` Segher Boessenkool
  1 sibling, 1 reply; 10+ messages in thread
From: HAO CHEN GUI @ 2020-12-11  2:14 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches

Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

On 30/11/2020 上午 11:08, HAO CHEN GUI wrote:
> Hi,
>
>   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
> combine. In the patch, newpat is split twice. The newpat, newi2pat and 
> newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is 
> done at the end where all former attempts fail. In 4 insn combine 
> pre-check, the zero and sign extend are added as the patch is for the 
> issue 1 listed in pr65010.
>
>   The attachments are the patch diff file and change log file.
>
>   Bootstrapped and tested on powerpc64le, ARM and x86 with no 
> regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-12-11  2:14 ` HAO CHEN GUI
@ 2021-01-04  2:03   ` HAO CHEN GUI
  2021-01-14  8:44     ` HAO CHEN GUI
  0 siblings, 1 reply; 10+ messages in thread
From: HAO CHEN GUI @ 2021-01-04  2:03 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches

Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

Thanks a lot.


On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
> Segher,
>
>     Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2021-01-04  2:03   ` HAO CHEN GUI
@ 2021-01-14  8:44     ` HAO CHEN GUI
  0 siblings, 0 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2021-01-14  8:44 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches

Segher,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

Thanks a lot.

On 4/1/2021 上午 10:03, HAO CHEN GUI wrote:
> Segher,
>
>     Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
>
> Thanks a lot.
>
>
> On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
>> Segher,
>>
>>     Gentle ping this:
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-30  3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
  2020-12-11  2:14 ` HAO CHEN GUI
@ 2021-01-15 19:01 ` Segher Boessenkool
  1 sibling, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2021-01-15 19:01 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: gcc-patches

On Mon, Nov 30, 2020 at 11:08:22AM +0800, HAO CHEN GUI wrote:
>   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
> combine. In the patch, newpat is split twice. The newpat, newi2pat and 

As I said before, that has a lot of problems, and is only suitable for
stage 1 (after many fixes!)

I don't think this is a good idea at all, fwiw.  Before we even should
think about doing 4->3 combinations, we should stop severely limiting
what combinations of 4 insns we allow; and we do that because it will
slow down the compiler a lot, for no big gain.


Segher

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-11  0:18 ` Jeff Law
@ 2020-11-14  3:30   ` Jim Wilson
  0 siblings, 0 replies; 10+ messages in thread
From: Jim Wilson @ 2020-11-14  3:30 UTC (permalink / raw)
  To: Jeff Law; +Cc: HAO CHEN GUI, gcc-patches, Segher Boessenkool

On Tue, Nov 10, 2020 at 4:18 PM Jeff Law via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

>
> On 11/8/20 7:48 PM, HAO CHEN GUI via Gcc-patches wrote:
> > ChangeLog
> >
> >       * combine.c (combine_validate_cost): Add an argument for newi1pat.
> >       (try_combine): Add a 4-insn combine pattern for optimizing rtx
> >       sign_extend (op:zero_extend, zero_extend).
>
> It'd be nice to see motivating examples.  Depending on their structure,
> we may get better results cleaning things up with match.pd patterns.  We
> already have some which work in this space.
>

I don't have a use case for this specifically, but for the general case of
allowing a 4-insn combine to split into 3 I do have a RISC-V use case for
that in Philipp Tomsich's recent match.pd thread.
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/558765.html

Maybe instead of modifying combine to know about the
sign_extend(zero_extend zero_extend) case you could add a 4->3 splitter to
the rs6000.md file, and then where we call combine_split_insns modify the
code to accept 3 output insns when there were 4 input insns.  That would
fix this rs6000 case, and also allow me to fix my RISC-V case with a
riscv.md splitter.

Jim

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-12  0:22 ` Segher Boessenkool
@ 2020-11-12 14:54   ` Segher Boessenkool
  0 siblings, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2020-11-12 14:54 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: gcc-patches

On Wed, Nov 11, 2020 at 06:22:53PM -0600, Segher Boessenkool wrote:
> I'm running an all-arch comparison with this patch, just to see what it
> does, but [...]

Results: C0 is trunk, C1 with patch:

                    C0        C1
       alpha   6422312   99.971%
         arc   3783838  100.000%
         arm  10168277  100.000%
       arm64  20077721         0
       armhf  14886534  100.000%
         c6x   2509915  100.000%
        csky         0         0
       h8300   1229802  100.000%
        i386  12040952         0
        ia64  18555229  100.000%
        m68k   3868729  100.000%
  microblaze   5885763  100.000%
        mips   9158101  100.000%
      mips64   7402870  100.001%
       nds32   4833031  100.000%
       nios2   3917080  100.000%
    openrisc   4571561  100.000%
      parisc   7725308  100.000%
    parisc64         0         0
     powerpc  11004119  100.000%
   powerpc64  22618492  100.000%
 powerpc64le  19609678  100.000%
     riscv32   1639840  100.000%
     riscv64   7658668         0
        s390  15345481         0
          sh         0         0
     shnommu   1694176  100.000%
       sparc   4744809  100.000%
     sparc64   7205254  100.000%
      x86_64  19870124         0
      xtensa   2658455  100.002%

0 means it did not build...  So some targets newly ICE (x86, riscv, z).

It surprisingly only helps alpha a bit, and all other changes are in the
wrong direction (but very slightly).


Segher

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-09  2:48 HAO CHEN GUI
  2020-11-11  0:18 ` Jeff Law
@ 2020-11-12  0:22 ` Segher Boessenkool
  2020-11-12 14:54   ` Segher Boessenkool
  1 sibling, 1 reply; 10+ messages in thread
From: Segher Boessenkool @ 2020-11-12  0:22 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: gcc-patches

Hi Hao Chen,

[ You first need to add yourself to MAINTAINERS?  And get an account to
  do that, if you do not yet have one yet :-) ]

On Mon, Nov 09, 2020 at 10:48:19AM +0800, HAO CHEN GUI wrote:
> This patch adds a new pattern in 4-insn combine. It supports the 
> following sign_extend(op: zero_extend, zero_extend) optimization. In the 
> patch, newpat is split twice. The first split becomes newi1pat and the 
> second becomes newi2pat. They replace i1, i2 and i3 if all of them can 
> be recognized.
> 
> 7: r126:SI=zero_extend([r123:DI+0x1])
> 6: r125:SI=zero_extend([r123:DI])
> 8: r127:SI=r125:SI+r126:SI
> 9: r124:DI=sign_extend(r127:SI)
> 
> are replaced by:
> 
> 7: r125:DI=zero_extend([r123:DI])
> 8: r127:DI=zero_extend([r123:DI+0x1])
> 9: r124:DI=r127:DI+r125:DI

So in the original insn 8 registers 125 and 126 died, so this isn't
changing insn 7 at all, just moving it back from 6!  That is not what
combine should do, but it also is problematic: is it checked anywhere
that we *can* move the insn like this?

> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
>  
>  static bool
>  combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> -		       rtx newpat, rtx newi2pat, rtx newotherpat)
> +		       rtx newpat, rtx newi2pat, rtx newotherpat,
> +		       rtx newi1pat)

(Keep the args ordered: 3, 2, 1, other.  "newpat" means "newi3pat").

> @@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
>  {
>    /* New patterns for I3 and I2, respectively.  */
> -  rtx newpat, newi2pat = 0;
> +  rtx newpat, newi2pat = 0, newi1pat = 0;
>    rtvec newpat_vec_with_clobbers = 0;
>    int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
>    /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
> @@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>    int total_sets;
>    /* Nonzero if I2's or I1's body now appears in I3.  */
>    int i2_is_used = 0, i1_is_used = 0;
> -  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
> +  /* INSN_CODEs for new I3, new I2, new I1 and user of condition code.  */

(Comma after I1: "new I1, and ...".)

>    int insn_code_number, i2_code_number = 0, other_code_number = 0;
> +  int i1_code_number = 0;

Well, put it together with i3, i2 then, put "other" on its own line?

> @@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	  else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
>  	    ngood++;
>  	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
> -		   || GET_CODE (src) == LSHIFTRT)
> +		   || GET_CODE (src) == LSHIFTRT
> +		   || GET_CODE (src) == SIGN_EXTEND
> +		   || GET_CODE (src) == ZERO_EXTEND)
>  	    nshift++;

Extends are not shifts.  Please count "nextend" separately?

> @@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
>  	    }
>  
> +	  if (i0)
> +	    {
> +	      subst_low_luid = DF_INSN_LUID (i0);
> +	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
> +	    }

This won't work?  "subst_low_uid" is overwritten right in the next
statement:

>  	  subst_low_luid = DF_INSN_LUID (i2);
>  	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);

> @@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	      rtx src_op0 = XEXP (setsrc, 0);
>  	      rtx src_op1 = XEXP (setsrc, 1);
>  
> +	      /* Double split when src of i0 and i1 are both ZERO_EXTEND.  */
> +	      if (i0 && i1
> +		  && GET_CODE (PATTERN (i0)) == SET
> +		  && GET_CODE (PATTERN (i1)) == SET
> +		  && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
> +		  && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
> +		  && (rtx_equal_p (XEXP (*split, 0),
> +				   XEXP (SET_SRC (PATTERN (i1)), 0))
> +		      || rtx_equal_p (XEXP (*split, 0),
> +				      XEXP (SET_SRC (PATTERN (i0)), 0))))
> +		{
> +		  newi1pat = NULL_RTX;
> +		  rtx newdest, *i0_i1dest;
> +		  machine_mode new_mode;
> +
> +		  new_mode = GET_MODE (*split);
> +		  if (rtx_equal_p (XEXP (*split, 0),
> +				   XEXP (SET_SRC (PATTERN (i1)), 0)))
> +		    i0_i1dest = &i1dest;
> +		  else
> +		    i0_i1dest = &i0dest;
> +
> +		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
> +		    newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));

You can only have one mode that you use pseudos in in the whole routine.
This doesn't work, i0_i1dest can be used elsewhere still.

> +		  else
> +		    {
> +		      SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
> +		      newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
> +		    }

Messing with modes in general is a big problem for combine (it still
has some open problems for this, too).

> +		  newi1pat =  gen_rtx_SET (newdest, *split);
> +		  SUBST (*split, newdest);
> +
> +		  i1_code_number = recog_for_combine (&newi1pat, i2,
> +							&new_i2_notes);
> +		  if (i1_code_number < 0)
> +		    {
> +		      undo_all ();
> +		      return 0;
> +		    }
> +
> +		  split = find_split_point (&newpat, i3, false);
> +		}

I doubt that you can call find_split_point twice like this...  It likely
still needs to check that the insns it ends up with for newi1 and newi2
can go, in that order, where i1 and i2 used to be.  For i2 and i3 this
is much simpler, but now we will need a lot of extra work to do these
checks.

> +	    else if (reg == i0dest)
> +	      {
> +		propagate_for_debug (i0, last_combined_insn, reg, i0src,
> +				     this_basic_block);
> +		adjust_reg_mode (reg, new_mode);
> +	      }

How can i0dest ever end up in the final insns?  Ouch.

I'm running an all-arch comparison with this patch, just to see what it
does, but please make it so that for

7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)

8+9 is combined to just

9: r124:DI=r125:DI+r126:DI

and that's all?  (The "compact" format leaves out crucial information,
but the loads here are QImode loads.)

I don't think 4->3 combines will do very much that cannot be done by
some combination of 2->1, 3->1, 4->1, 2->2, 3->2, 4->2, but we'll see.
It will be quite expensive to do in any case (combine isn't one of the
expensive passes anymore, but let's try to keep it that way ;-) )


Segher

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Add a new pattern in 4-insn combine
  2020-11-09  2:48 HAO CHEN GUI
@ 2020-11-11  0:18 ` Jeff Law
  2020-11-14  3:30   ` Jim Wilson
  2020-11-12  0:22 ` Segher Boessenkool
  1 sibling, 1 reply; 10+ messages in thread
From: Jeff Law @ 2020-11-11  0:18 UTC (permalink / raw)
  To: HAO CHEN GUI, gcc-patches; +Cc: Segher Boessenkool


On 11/8/20 7:48 PM, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
>
> This patch adds a new pattern in 4-insn combine. It supports the
> following sign_extend(op: zero_extend, zero_extend) optimization. In
> the patch, newpat is split twice. The first split becomes newi1pat and
> the second becomes newi2pat. They replace i1, i2 and i3 if all of them
> can be recognized.
>
> 7: r126:SI=zero_extend([r123:DI+0x1])
> 6: r125:SI=zero_extend([r123:DI])
> 8: r127:SI=r125:SI+r126:SI
> 9: r124:DI=sign_extend(r127:SI)
>
> are replaced by:
>
> 7: r125:DI=zero_extend([r123:DI])
> 8: r127:DI=zero_extend([r123:DI+0x1])
> 9: r124:DI=r127:DI+r125:DI
>
> The attachments are the patch diff file and change log file.
>
> Bootstrapped and tested on powerpc64le-linux-gnu with no regressions. 
> Is this okay for trunk? Any recommendations? Thanks a lot.
>
>
>
> ChangeLog
>
> 	* combine.c (combine_validate_cost): Add an argument for newi1pat.
> 	(try_combine): Add a 4-insn combine pattern for optimizing rtx
> 	sign_extend (op:zero_extend, zero_extend).

It'd be nice to see motivating examples.  Depending on their structure,
we may get better results cleaning things up with match.pd patterns.  We
already have some which work in this space.


Whether or not the combiner change itself moves forward is up to Segher
though.

jeff

>
> patch.diff
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index c88382efbd3..73259e6a9ed 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
>  
>  static bool
>  combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> -		       rtx newpat, rtx newi2pat, rtx newotherpat)
> +		       rtx newpat, rtx newi2pat, rtx newotherpat,
> +		       rtx newi1pat)
>  {
>    int i0_cost, i1_cost, i2_cost, i3_cost;
> -  int new_i2_cost, new_i3_cost;
> +  int new_i1_cost, new_i2_cost, new_i3_cost;
>    int old_cost, new_cost;
>  
>    /* Lookup the original insn_costs.  */
> @@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
>        new_i2_cost = 0;
>      }
>  
> +  if (newi1pat)
> +    {
> +	tmp = PATTERN (i1);
> +	PATTERN (i1) = newi1pat;
> +	tmpi = INSN_CODE (i1);
> +	INSN_CODE (i1) = -1;
> +	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
> +	PATTERN (i1) = tmp;
> +	INSN_CODE (i1) = tmpi;
> +	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
> +    }
> +  else
> +    new_i1_cost = 0;
> +
>    if (undobuf.other_insn)
>      {
>        int old_other_cost, new_other_cost;
> @@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
>  	fprintf (dump_file, "%d + ", i1_cost);
>        fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
>  
> -      if (newi2pat)
> +      if (newi1pat)
> +	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
> +		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
> +      else if (newi2pat)
>  	fprintf (dump_file, "replacement costs %d + %d = %d\n",
>  		 new_i2_cost, new_i3_cost, new_cost);
>        else
> @@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
>    INSN_COST (i3) = new_i3_cost;
>    if (i1)
>      {
> -      INSN_COST (i1) = 0;
> +      if (newi1pat)
> +	INSN_COST (i1) = new_i1_cost;
> +      else
> +	INSN_COST (i1) = 0;
>        if (i0)
>  	INSN_COST (i0) = 0;
>      }
> @@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
>  {
>    /* New patterns for I3 and I2, respectively.  */
> -  rtx newpat, newi2pat = 0;
> +  rtx newpat, newi2pat = 0, newi1pat = 0;
>    rtvec newpat_vec_with_clobbers = 0;
>    int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
>    /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
> @@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>    int total_sets;
>    /* Nonzero if I2's or I1's body now appears in I3.  */
>    int i2_is_used = 0, i1_is_used = 0;
> -  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
> +  /* INSN_CODEs for new I3, new I2, new I1 and user of condition code.  */
>    int insn_code_number, i2_code_number = 0, other_code_number = 0;
> +  int i1_code_number = 0;
>    /* Contains I3 if the destination of I3 is used in its source, which means
>       that the old life of I3 is being killed.  If that usage is placed into
>       I2 and not in I3, a REG_DEAD note must be made.  */
> @@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	  else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
>  	    ngood++;
>  	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
> -		   || GET_CODE (src) == LSHIFTRT)
> +		   || GET_CODE (src) == LSHIFTRT
> +		   || GET_CODE (src) == SIGN_EXTEND
> +		   || GET_CODE (src) == ZERO_EXTEND)
>  	    nshift++;
>  	}
>  
> @@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
>  	    }
>  
> +	  if (i0)
> +	    {
> +	      subst_low_luid = DF_INSN_LUID (i0);
> +	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
> +	    }
> +
>  	  subst_low_luid = DF_INSN_LUID (i2);
>  	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
>  	}
> @@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  	      rtx src_op0 = XEXP (setsrc, 0);
>  	      rtx src_op1 = XEXP (setsrc, 1);
>  
> +	      /* Double split when src of i0 and i1 are both ZERO_EXTEND.  */
> +	      if (i0 && i1
> +		  && GET_CODE (PATTERN (i0)) == SET
> +		  && GET_CODE (PATTERN (i1)) == SET
> +		  && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
> +		  && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
> +		  && (rtx_equal_p (XEXP (*split, 0),
> +				   XEXP (SET_SRC (PATTERN (i1)), 0))
> +		      || rtx_equal_p (XEXP (*split, 0),
> +				      XEXP (SET_SRC (PATTERN (i0)), 0))))
> +		{
> +		  newi1pat = NULL_RTX;
> +		  rtx newdest, *i0_i1dest;
> +		  machine_mode new_mode;
> +
> +		  new_mode = GET_MODE (*split);
> +		  if (rtx_equal_p (XEXP (*split, 0),
> +				   XEXP (SET_SRC (PATTERN (i1)), 0)))
> +		    i0_i1dest = &i1dest;
> +		  else
> +		    i0_i1dest = &i0dest;
> +
> +		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
> +		    newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
> +		  else
> +		    {
> +		      SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
> +		      newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
> +		    }
> +
> +		  newi1pat =  gen_rtx_SET (newdest, *split);
> +		  SUBST (*split, newdest);
> +
> +		  i1_code_number = recog_for_combine (&newi1pat, i2,
> +							&new_i2_notes);
> +		  if (i1_code_number < 0)
> +		    {
> +		      undo_all ();
> +		      return 0;
> +		    }
> +
> +		  split = find_split_point (&newpat, i3, false);
> +		}
> +
>  	      /* Split "X = Y op Y" as "Z = Y; X = Z op Z".  */
>  	      if (rtx_equal_p (src_op0, src_op1))
>  		{
> @@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  
>    /* Only allow this combination if insn_cost reports that the
>       replacement instructions are cheaper than the originals.  */
> -  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
> -    {
> -      undo_all ();
> -      return 0;
> -    }
> +    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
> +				newi1pat))
> +      {
> +	undo_all ();
> +	return 0;
> +      }
>  
>    if (MAY_HAVE_DEBUG_BIND_INSNS)
>      {
> @@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>  		/* Put back the new mode.  */
>  		adjust_reg_mode (reg, new_mode);
>  	      }
> +	    else if (reg == i0dest)
> +	      {
> +		propagate_for_debug (i0, last_combined_insn, reg, i0src,
> +				     this_basic_block);
> +		adjust_reg_mode (reg, new_mode);
> +	      }
> +	    else if (reg == i1dest)
> +	      {
> +		propagate_for_debug (i1, last_combined_insn, reg, i1src,
> +				     this_basic_block);
> +		adjust_reg_mode (reg, new_mode);
> +	      }
>  	    else
>  	      {
>  		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
> @@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>        {
>  	LOG_LINKS (i1) = NULL;
>  	REG_NOTES (i1) = 0;
> -	if (MAY_HAVE_DEBUG_BIND_INSNS)
> -	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> -			       this_basic_block);
> -	SET_INSN_DELETED (i1);
> +	if (newi1pat)
> +	  {
> +	    if (MAY_HAVE_DEBUG_BIND_INSNS)
> +	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> +				   this_basic_block);
> +	    INSN_CODE (i1) = i1_code_number;
> +	    PATTERN (i1) = newi1pat;
> +	  }
> +	else
> +	  {
> +	    if (MAY_HAVE_DEBUG_BIND_INSNS)
> +	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> +				 this_basic_block);
> +	    SET_INSN_DELETED (i1);
> +	  }
>        }
>  
>      if (i0)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-09  2:48 HAO CHEN GUI
  2020-11-11  0:18 ` Jeff Law
  2020-11-12  0:22 ` Segher Boessenkool
  0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-09  2:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool

[-- Attachment #1: Type: text/plain, Size: 751 bytes --]

Hi,

This patch adds a new pattern in 4-insn combine. It supports the 
following sign_extend(op: zero_extend, zero_extend) optimization. In the 
patch, newpat is split twice. The first split becomes newi1pat and the 
second becomes newi2pat. They replace i1, i2 and i3 if all of them can 
be recognized.

7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)

are replaced by:

7: r125:DI=zero_extend([r123:DI])
8: r127:DI=zero_extend([r123:DI+0x1])
9: r124:DI=r127:DI+r125:DI

The attachments are the patch diff file and change log file.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.  
Is this okay for trunk? Any recommendations? Thanks a lot.



[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]

	* combine.c (combine_validate_cost): Add an argument for newi1pat.
	(try_combine): Add a 4-insn combine pattern for optimizing rtx
	sign_extend (op:zero_extend, zero_extend).

[-- Attachment #3: patch.diff --]
[-- Type: text/plain, Size: 7331 bytes --]

diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..73259e6a9ed 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-		       rtx newpat, rtx newi2pat, rtx newotherpat)
+		       rtx newpat, rtx newi2pat, rtx newotherpat,
+		       rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+	tmp = PATTERN (i1);
+	PATTERN (i1) = newi1pat;
+	tmpi = INSN_CODE (i1);
+	INSN_CODE (i1) = -1;
+	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+	PATTERN (i1) = tmp;
+	INSN_CODE (i1) = tmpi;
+	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
 	fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
 	fprintf (dump_file, "replacement costs %d + %d = %d\n",
 		 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+	INSN_COST (i1) = new_i1_cost;
+      else
+	INSN_COST (i1) = 0;
       if (i0)
 	INSN_COST (i0) = 0;
     }
@@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
   /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
+  /* INSN_CODEs for new I3, new I2, new I1 and user of condition code.  */
   int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  int i1_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
 	    ngood++;
 	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
-		   || GET_CODE (src) == LSHIFTRT)
+		   || GET_CODE (src) == LSHIFTRT
+		   || GET_CODE (src) == SIGN_EXTEND
+		   || GET_CODE (src) == ZERO_EXTEND)
 	    nshift++;
 	}
 
@@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
 
+	  if (i0)
+	    {
+	      subst_low_luid = DF_INSN_LUID (i0);
+	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+	    }
+
 	  subst_low_luid = DF_INSN_LUID (i2);
 	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
@@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      rtx src_op0 = XEXP (setsrc, 0);
 	      rtx src_op1 = XEXP (setsrc, 1);
 
+	      /* Double split when src of i0 and i1 are both ZERO_EXTEND.  */
+	      if (i0 && i1
+		  && GET_CODE (PATTERN (i0)) == SET
+		  && GET_CODE (PATTERN (i1)) == SET
+		  && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
+		  && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
+		  && (rtx_equal_p (XEXP (*split, 0),
+				   XEXP (SET_SRC (PATTERN (i1)), 0))
+		      || rtx_equal_p (XEXP (*split, 0),
+				      XEXP (SET_SRC (PATTERN (i0)), 0))))
+		{
+		  newi1pat = NULL_RTX;
+		  rtx newdest, *i0_i1dest;
+		  machine_mode new_mode;
+
+		  new_mode = GET_MODE (*split);
+		  if (rtx_equal_p (XEXP (*split, 0),
+				   XEXP (SET_SRC (PATTERN (i1)), 0)))
+		    i0_i1dest = &i1dest;
+		  else
+		    i0_i1dest = &i0dest;
+
+		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+		    newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
+		  else
+		    {
+		      SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
+		      newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
+		    }
+
+		  newi1pat =  gen_rtx_SET (newdest, *split);
+		  SUBST (*split, newdest);
+
+		  i1_code_number = recog_for_combine (&newi1pat, i2,
+							&new_i2_notes);
+		  if (i1_code_number < 0)
+		    {
+		      undo_all ();
+		      return 0;
+		    }
+
+		  split = find_split_point (&newpat, i3, false);
+		}
+
 	      /* Split "X = Y op Y" as "Z = Y; X = Z op Z".  */
 	      if (rtx_equal_p (src_op0, src_op1))
 		{
@@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+				newi1pat))
+      {
+	undo_all ();
+	return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 		/* Put back the new mode.  */
 		adjust_reg_mode (reg, new_mode);
 	      }
+	    else if (reg == i0dest)
+	      {
+		propagate_for_debug (i0, last_combined_insn, reg, i0src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
+	    else if (reg == i1dest)
+	      {
+		propagate_for_debug (i1, last_combined_insn, reg, i1src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
 	    else
 	      {
 		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       {
 	LOG_LINKS (i1) = NULL;
 	REG_NOTES (i1) = 0;
-	if (MAY_HAVE_DEBUG_BIND_INSNS)
-	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-			       this_basic_block);
-	SET_INSN_DELETED (i1);
+	if (newi1pat)
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				   this_basic_block);
+	    INSN_CODE (i1) = i1_code_number;
+	    PATTERN (i1) = newi1pat;
+	  }
+	else
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				 this_basic_block);
+	    SET_INSN_DELETED (i1);
+	  }
       }
 
     if (i0)

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-01-15 19:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-30  3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
2020-12-11  2:14 ` HAO CHEN GUI
2021-01-04  2:03   ` HAO CHEN GUI
2021-01-14  8:44     ` HAO CHEN GUI
2021-01-15 19:01 ` Segher Boessenkool
  -- strict thread matches above, loose matches on Subject: below --
2020-11-09  2:48 HAO CHEN GUI
2020-11-11  0:18 ` Jeff Law
2020-11-14  3:30   ` Jim Wilson
2020-11-12  0:22 ` Segher Boessenkool
2020-11-12 14:54   ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).