public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-09  2:48 HAO CHEN GUI
  2020-11-11  0:18 ` Jeff Law
  2020-11-12  0:22 ` Segher Boessenkool
  0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-09  2:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool

[-- Attachment #1: Type: text/plain, Size: 751 bytes --]

Hi,

This patch adds a new pattern in 4-insn combine. It supports the 
following sign_extend(op: zero_extend, zero_extend) optimization. In the 
patch, newpat is split twice. The first split becomes newi1pat and the 
second becomes newi2pat. They replace i1, i2 and i3 if all of them can 
be recognized.

7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)

are replaced by:

7: r125:DI=zero_extend([r123:DI])
8: r127:DI=zero_extend([r123:DI+0x1])
9: r124:DI=r127:DI+r125:DI

The attachments are the patch diff file and change log file.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.  
Is this okay for trunk? Any recommendations? Thanks a lot.



[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]

	* combine.c (combine_validate_cost): Add an argument for newi1pat.
	(try_combine): Add a 4-insn combine pattern for optimizing rtx
	sign_extend (op:zero_extend, zero_extend).

[-- Attachment #3: patch.diff --]
[-- Type: text/plain, Size: 7331 bytes --]

diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..73259e6a9ed 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-		       rtx newpat, rtx newi2pat, rtx newotherpat)
+		       rtx newpat, rtx newi2pat, rtx newotherpat,
+		       rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+	tmp = PATTERN (i1);
+	PATTERN (i1) = newi1pat;
+	tmpi = INSN_CODE (i1);
+	INSN_CODE (i1) = -1;
+	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+	PATTERN (i1) = tmp;
+	INSN_CODE (i1) = tmpi;
+	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
 	fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
 	fprintf (dump_file, "replacement costs %d + %d = %d\n",
 		 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+	INSN_COST (i1) = new_i1_cost;
+      else
+	INSN_COST (i1) = 0;
       if (i0)
 	INSN_COST (i0) = 0;
     }
@@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
   /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
+  /* INSN_CODEs for new I3, new I2, new I1 and user of condition code.  */
   int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  int i1_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
 	    ngood++;
 	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
-		   || GET_CODE (src) == LSHIFTRT)
+		   || GET_CODE (src) == LSHIFTRT
+		   || GET_CODE (src) == SIGN_EXTEND
+		   || GET_CODE (src) == ZERO_EXTEND)
 	    nshift++;
 	}
 
@@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
 
+	  if (i0)
+	    {
+	      subst_low_luid = DF_INSN_LUID (i0);
+	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+	    }
+
 	  subst_low_luid = DF_INSN_LUID (i2);
 	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
@@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      rtx src_op0 = XEXP (setsrc, 0);
 	      rtx src_op1 = XEXP (setsrc, 1);
 
+	      /* Double split when src of i0 and i1 are both ZERO_EXTEND.  */
+	      if (i0 && i1
+		  && GET_CODE (PATTERN (i0)) == SET
+		  && GET_CODE (PATTERN (i1)) == SET
+		  && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
+		  && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
+		  && (rtx_equal_p (XEXP (*split, 0),
+				   XEXP (SET_SRC (PATTERN (i1)), 0))
+		      || rtx_equal_p (XEXP (*split, 0),
+				      XEXP (SET_SRC (PATTERN (i0)), 0))))
+		{
+		  newi1pat = NULL_RTX;
+		  rtx newdest, *i0_i1dest;
+		  machine_mode new_mode;
+
+		  new_mode = GET_MODE (*split);
+		  if (rtx_equal_p (XEXP (*split, 0),
+				   XEXP (SET_SRC (PATTERN (i1)), 0)))
+		    i0_i1dest = &i1dest;
+		  else
+		    i0_i1dest = &i0dest;
+
+		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+		    newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
+		  else
+		    {
+		      SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
+		      newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
+		    }
+
+		  newi1pat =  gen_rtx_SET (newdest, *split);
+		  SUBST (*split, newdest);
+
+		  i1_code_number = recog_for_combine (&newi1pat, i2,
+							&new_i2_notes);
+		  if (i1_code_number < 0)
+		    {
+		      undo_all ();
+		      return 0;
+		    }
+
+		  split = find_split_point (&newpat, i3, false);
+		}
+
 	      /* Split "X = Y op Y" as "Z = Y; X = Z op Z".  */
 	      if (rtx_equal_p (src_op0, src_op1))
 		{
@@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+				newi1pat))
+      {
+	undo_all ();
+	return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 		/* Put back the new mode.  */
 		adjust_reg_mode (reg, new_mode);
 	      }
+	    else if (reg == i0dest)
+	      {
+		propagate_for_debug (i0, last_combined_insn, reg, i0src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
+	    else if (reg == i1dest)
+	      {
+		propagate_for_debug (i1, last_combined_insn, reg, i1src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
 	    else
 	      {
 		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       {
 	LOG_LINKS (i1) = NULL;
 	REG_NOTES (i1) = 0;
-	if (MAY_HAVE_DEBUG_BIND_INSNS)
-	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-			       this_basic_block);
-	SET_INSN_DELETED (i1);
+	if (newi1pat)
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				   this_basic_block);
+	    INSN_CODE (i1) = i1_code_number;
+	    PATTERN (i1) = newi1pat;
+	  }
+	else
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				 this_basic_block);
+	    SET_INSN_DELETED (i1);
+	  }
       }
 
     if (i0)

^ permalink raw reply	[flat|nested] 10+ messages in thread
* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-30  3:08 HAO CHEN GUI
  2020-12-11  2:14 ` HAO CHEN GUI
  2021-01-15 19:01 ` Segher Boessenkool
  0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-30  3:08 UTC (permalink / raw)
  To: Segher Boessenkool, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 588 bytes --]

Hi,

   This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
combine. In the patch, newpat is split twice. The newpat, newi2pat and 
newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is done 
at the end where all former attempts fail. In 4 insn combine pre-check, 
the zero and sign extend are added as the patch is for the issue 1 
listed in pr65010.

   The attachments are the patch diff file and change log file.

   Bootstrapped and tested on powerpc64le, ARM and x86 with no 
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.


[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]

	* combine.c (combine_validate_cost): Add an argument for newi1pat.
	(try_combine): Add a 4-insn combine pattern for optimizing rtx
	sign_extend (op:zero_extend, zero_extend).

[-- Attachment #3: patch-1.diff --]
[-- Type: text/plain, Size: 10702 bytes --]

diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..1a7c508217e 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-		       rtx newpat, rtx newi2pat, rtx newotherpat)
+		       rtx newpat, rtx newi2pat, rtx newotherpat,
+		       rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+	tmp = PATTERN (i1);
+	PATTERN (i1) = newi1pat;
+	tmpi = INSN_CODE (i1);
+	INSN_CODE (i1) = -1;
+	new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+	PATTERN (i1) = tmp;
+	INSN_CODE (i1) = tmpi;
+	new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
 	fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+	fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+		 new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
 	fprintf (dump_file, "replacement costs %d + %d = %d\n",
 		 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+	INSN_COST (i1) = new_i1_cost;
+      else
+	INSN_COST (i1) = 0;
       if (i0)
 	INSN_COST (i0) = 0;
     }
@@ -2671,8 +2692,8 @@ static rtx_insn *
 try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
-  /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  /* New patterns for I3, I2 and I1, respectively.  */
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
-  int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  /* INSN_CODEs for new I3, new I2, new I1, and user of condition code.  */
+  int insn_code_number, i2_code_number = 0, i1_code_number = 0;
+  int other_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2702,7 +2724,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
   int i2dest_killed = 0, i1dest_killed = 0, i0dest_killed = 0;
   int i1_feeds_i2_n = 0, i0_feeds_i2_n = 0, i0_feeds_i1_n = 0;
   /* Notes that must be added to REG_NOTES in I3 and I2.  */
-  rtx new_i3_notes, new_i2_notes;
+  rtx new_i3_notes, new_i2_notes, new_i1_notes;
   /* Notes that we substituted I3 into I2 instead of the normal case.  */
   int i3_subst_into_i2 = 0;
   /* Notes that I1, I2 or I3 is a MULT operation.  */
@@ -2735,6 +2757,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       int i;
       int ngood = 0;
       int nshift = 0;
+      int nextend = 0;
       rtx set0, set3;
 
       if (!flag_expensive_optimizations)
@@ -2758,6 +2781,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
 		   || GET_CODE (src) == LSHIFTRT)
 	    nshift++;
+	  else if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+	    nextend++;
 	}
 
       /* If I0 loads a memory and I3 sets the same memory, then I1 and I2
@@ -2787,7 +2812,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	  && rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
 	ngood += 2;
 
-      if (ngood < 2 && nshift < 2)
+      if (ngood < 2 && nshift < 2 && nextend < 2)
 	return 0;
     }
 
@@ -3399,6 +3424,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	      i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
 	    }
 
+	  if (i0)
+	    {
+	      subst_low_luid = DF_INSN_LUID (i0);
+	      i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+	    }
+
 	  subst_low_luid = DF_INSN_LUID (i2);
 	  i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
 	}
@@ -3991,6 +4022,61 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 	     don't use one now.  */
 	  if (i2_code_number >= 0 && ! (split_code == MULT && ! have_mult))
 	    insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
+
+	  if (i0 && i1
+	      && GET_CODE (newpat) == SET
+	      && BINARY_P (SET_SRC (newpat))
+	      && insn_code_number < 0
+	      && i2_code_number >= 0)
+	    {
+	      newi1pat = NULL_RTX;
+	      rtx newi1dest;
+	      machine_mode new_mode;
+
+	      if ((split = find_split_point (&newpat, i3, false)) != 0
+		  && (!HAVE_cc0 || REG_P (i1dest))
+		  && (GET_MODE (*split) == GET_MODE (i1dest)
+		      || GET_MODE (*split) == VOIDmode
+		      || can_change_dest_mode (i1dest, added_sets_1,
+					       GET_MODE (*split)))
+		  && !reg_referenced_p (i1dest, newpat)
+		  && !modified_between_p (*split, i1, i3))
+		{
+		  new_mode = GET_MODE (*split);
+		  if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+		    newi1dest = gen_rtx_REG (new_mode, REGNO (i1dest));
+		  else
+		    {
+		      SUBST_MODE (regno_reg_rtx[REGNO (i1dest)], new_mode);
+		      newi1dest = regno_reg_rtx[REGNO (i1dest)];
+		    }
+		  newi1pat =  gen_rtx_SET (newi1dest, *split);
+		  SUBST (*split, newi1dest);
+
+		  i1_code_number = recog_for_combine (&newi1pat, i2,
+						      &new_i1_notes);
+
+		  if (i1_code_number < 0)
+		    {
+		      undo_all ();
+		      return 0;
+		    }
+		  else
+		    {
+		      /* swap newi1pat and newi2pat if dest of newi2pat is set
+			 in newi1pat.  */
+		      if (reg_referenced_p (newdest, newi1pat))
+			{
+			    std::swap (i1dest, i2dest);
+			    std::swap (newi1pat, newi2pat);
+			    std::swap (i1_code_number, i2_code_number);
+			}
+
+		      insn_code_number = recog_for_combine (&newpat, i3,
+							    &new_i3_notes);
+		    }
+		}
+	    }
 	}
     }
 
@@ -4209,11 +4295,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+				newi1pat))
+      {
+	undo_all ();
+	return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4329,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
 		/* Put back the new mode.  */
 		adjust_reg_mode (reg, new_mode);
 	      }
+	    else if (reg == i1dest)
+	      {
+		propagate_for_debug (i1, last_combined_insn, reg, i1src,
+				     this_basic_block);
+		adjust_reg_mode (reg, new_mode);
+	      }
 	    else
 	      {
 		rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4446,6 +4539,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     reset_used_flags (i0notes);
     reset_used_flags (newpat);
     reset_used_flags (newi2pat);
+    reset_used_flags (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4455,6 +4549,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     i0notes = copy_rtx_if_shared (i0notes);
     newpat = copy_rtx_if_shared (newpat);
     newi2pat = copy_rtx_if_shared (newi2pat);
+    newi1pat = copy_rtx_if_shared (newi1pat);
     if (undobuf.other_insn)
       reset_used_flags (PATTERN (undobuf.other_insn));
 
@@ -4552,10 +4647,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       {
 	LOG_LINKS (i1) = NULL;
 	REG_NOTES (i1) = 0;
-	if (MAY_HAVE_DEBUG_BIND_INSNS)
-	  propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-			       this_basic_block);
-	SET_INSN_DELETED (i1);
+	if (newi1pat)
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	      propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				   this_basic_block);
+	    INSN_CODE (i1) = i1_code_number;
+	    PATTERN (i1) = newi1pat;
+	  }
+	else
+	  {
+	    if (MAY_HAVE_DEBUG_BIND_INSNS)
+	    propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+				 this_basic_block);
+	    SET_INSN_DELETED (i1);
+	  }
       }
 
     if (i0)
@@ -4579,6 +4685,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
       from_luid = DF_INSN_LUID (i1);
     else
       from_luid = DF_INSN_LUID (i2);
+    if (newi1pat)
+      move_deaths (newi1pat, NULL_RTX, from_luid, i1, &midnotes);
     if (newi2pat)
       move_deaths (newi2pat, NULL_RTX, from_luid, i2, &midnotes);
     move_deaths (newpat, newi2pat, from_luid, i3, &midnotes);
@@ -4604,6 +4712,10 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
        know these are REG_UNUSED and want them to go to the desired insn,
        so we always pass it as i3.  */
 
+    if (newi1pat && new_i1_notes)
+      distribute_notes (new_i1_notes, i1, i1, NULL, NULL_RTX, NULL_RTX,
+			NULL_RTX);
+
     if (newi2pat && new_i2_notes)
       distribute_notes (new_i2_notes, i2, i2, NULL, NULL_RTX, NULL_RTX,
 			NULL_RTX);
@@ -4738,6 +4850,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
     /* Update reg_stat[].nonzero_bits et al for any changes that may have
        been made to this insn.  The order is important, because newi2pat
        can affect nonzero_bits of newpat.  */
+    if (newi1pat)
+      note_pattern_stores (newi1pat, set_nonzero_bits_and_sign_copies, NULL);
     if (newi2pat)
       note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL);
     note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL);

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-01-15 19:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-09  2:48 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
2020-11-11  0:18 ` Jeff Law
2020-11-14  3:30   ` Jim Wilson
2020-11-12  0:22 ` Segher Boessenkool
2020-11-12 14:54   ` Segher Boessenkool
2020-11-30  3:08 HAO CHEN GUI
2020-12-11  2:14 ` HAO CHEN GUI
2021-01-04  2:03   ` HAO CHEN GUI
2021-01-14  8:44     ` HAO CHEN GUI
2021-01-15 19:01 ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).