* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-30 3:08 HAO CHEN GUI
2020-12-11 2:14 ` HAO CHEN GUI
2021-01-15 19:01 ` Segher Boessenkool
0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-30 3:08 UTC (permalink / raw)
To: Segher Boessenkool, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 588 bytes --]
Hi,
This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn
combine. In the patch, newpat is split twice. The newpat, newi2pat and
newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is done
at the end where all former attempts fail. In 4 insn combine pre-check,
the zero and sign extend are added as the patch is for the issue 1
listed in pr65010.
The attachments are the patch diff file and change log file.
Bootstrapped and tested on powerpc64le, ARM and x86 with no
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]
* combine.c (combine_validate_cost): Add an argument for newi1pat.
(try_combine): Add a 4-insn combine pattern for optimizing rtx
sign_extend (op:zero_extend, zero_extend).
[-- Attachment #3: patch-1.diff --]
[-- Type: text/plain, Size: 10702 bytes --]
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..1a7c508217e 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
static bool
combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
- rtx newpat, rtx newi2pat, rtx newotherpat)
+ rtx newpat, rtx newi2pat, rtx newotherpat,
+ rtx newi1pat)
{
int i0_cost, i1_cost, i2_cost, i3_cost;
- int new_i2_cost, new_i3_cost;
+ int new_i1_cost, new_i2_cost, new_i3_cost;
int old_cost, new_cost;
/* Lookup the original insn_costs. */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
new_i2_cost = 0;
}
+ if (newi1pat)
+ {
+ tmp = PATTERN (i1);
+ PATTERN (i1) = newi1pat;
+ tmpi = INSN_CODE (i1);
+ INSN_CODE (i1) = -1;
+ new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+ PATTERN (i1) = tmp;
+ INSN_CODE (i1) = tmpi;
+ new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+ }
+ else
+ new_i1_cost = 0;
+
if (undobuf.other_insn)
{
int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
fprintf (dump_file, "%d + ", i1_cost);
fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
- if (newi2pat)
+ if (newi1pat)
+ fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+ new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+ else if (newi2pat)
fprintf (dump_file, "replacement costs %d + %d = %d\n",
new_i2_cost, new_i3_cost, new_cost);
else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
INSN_COST (i3) = new_i3_cost;
if (i1)
{
- INSN_COST (i1) = 0;
+ if (newi1pat)
+ INSN_COST (i1) = new_i1_cost;
+ else
+ INSN_COST (i1) = 0;
if (i0)
INSN_COST (i0) = 0;
}
@@ -2671,8 +2692,8 @@ static rtx_insn *
try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int *new_direct_jump_p, rtx_insn *last_combined_insn)
{
- /* New patterns for I3 and I2, respectively. */
- rtx newpat, newi2pat = 0;
+ /* New patterns for I3, I2 and I1, respectively. */
+ rtx newpat, newi2pat = 0, newi1pat = 0;
rtvec newpat_vec_with_clobbers = 0;
int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
/* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int total_sets;
/* Nonzero if I2's or I1's body now appears in I3. */
int i2_is_used = 0, i1_is_used = 0;
- /* INSN_CODEs for new I3, new I2, and user of condition code. */
- int insn_code_number, i2_code_number = 0, other_code_number = 0;
+ /* INSN_CODEs for new I3, new I2, new I1, and user of condition code. */
+ int insn_code_number, i2_code_number = 0, i1_code_number = 0;
+ int other_code_number = 0;
/* Contains I3 if the destination of I3 is used in its source, which means
that the old life of I3 is being killed. If that usage is placed into
I2 and not in I3, a REG_DEAD note must be made. */
@@ -2702,7 +2724,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int i2dest_killed = 0, i1dest_killed = 0, i0dest_killed = 0;
int i1_feeds_i2_n = 0, i0_feeds_i2_n = 0, i0_feeds_i1_n = 0;
/* Notes that must be added to REG_NOTES in I3 and I2. */
- rtx new_i3_notes, new_i2_notes;
+ rtx new_i3_notes, new_i2_notes, new_i1_notes;
/* Notes that we substituted I3 into I2 instead of the normal case. */
int i3_subst_into_i2 = 0;
/* Notes that I1, I2 or I3 is a MULT operation. */
@@ -2735,6 +2757,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int i;
int ngood = 0;
int nshift = 0;
+ int nextend = 0;
rtx set0, set3;
if (!flag_expensive_optimizations)
@@ -2758,6 +2781,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
|| GET_CODE (src) == LSHIFTRT)
nshift++;
+ else if (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+ nextend++;
}
/* If I0 loads a memory and I3 sets the same memory, then I1 and I2
@@ -2787,7 +2812,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
&& rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
ngood += 2;
- if (ngood < 2 && nshift < 2)
+ if (ngood < 2 && nshift < 2 && nextend < 2)
return 0;
}
@@ -3399,6 +3424,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
+ if (i0)
+ {
+ subst_low_luid = DF_INSN_LUID (i0);
+ i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+ }
+
subst_low_luid = DF_INSN_LUID (i2);
i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
@@ -3991,6 +4022,61 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
don't use one now. */
if (i2_code_number >= 0 && ! (split_code == MULT && ! have_mult))
insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes);
+
+ if (i0 && i1
+ && GET_CODE (newpat) == SET
+ && BINARY_P (SET_SRC (newpat))
+ && insn_code_number < 0
+ && i2_code_number >= 0)
+ {
+ newi1pat = NULL_RTX;
+ rtx newi1dest;
+ machine_mode new_mode;
+
+ if ((split = find_split_point (&newpat, i3, false)) != 0
+ && (!HAVE_cc0 || REG_P (i1dest))
+ && (GET_MODE (*split) == GET_MODE (i1dest)
+ || GET_MODE (*split) == VOIDmode
+ || can_change_dest_mode (i1dest, added_sets_1,
+ GET_MODE (*split)))
+ && !reg_referenced_p (i1dest, newpat)
+ && !modified_between_p (*split, i1, i3))
+ {
+ new_mode = GET_MODE (*split);
+ if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+ newi1dest = gen_rtx_REG (new_mode, REGNO (i1dest));
+ else
+ {
+ SUBST_MODE (regno_reg_rtx[REGNO (i1dest)], new_mode);
+ newi1dest = regno_reg_rtx[REGNO (i1dest)];
+ }
+ newi1pat = gen_rtx_SET (newi1dest, *split);
+ SUBST (*split, newi1dest);
+
+ i1_code_number = recog_for_combine (&newi1pat, i2,
+ &new_i1_notes);
+
+ if (i1_code_number < 0)
+ {
+ undo_all ();
+ return 0;
+ }
+ else
+ {
+ /* swap newi1pat and newi2pat if dest of newi2pat is set
+ in newi1pat. */
+ if (reg_referenced_p (newdest, newi1pat))
+ {
+ std::swap (i1dest, i2dest);
+ std::swap (newi1pat, newi2pat);
+ std::swap (i1_code_number, i2_code_number);
+ }
+
+ insn_code_number = recog_for_combine (&newpat, i3,
+ &new_i3_notes);
+ }
+ }
+ }
}
}
@@ -4209,11 +4295,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
/* Only allow this combination if insn_cost reports that the
replacement instructions are cheaper than the originals. */
- if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
- {
- undo_all ();
- return 0;
- }
+ if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+ newi1pat))
+ {
+ undo_all ();
+ return 0;
+ }
if (MAY_HAVE_DEBUG_BIND_INSNS)
{
@@ -4242,6 +4329,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
/* Put back the new mode. */
adjust_reg_mode (reg, new_mode);
}
+ else if (reg == i1dest)
+ {
+ propagate_for_debug (i1, last_combined_insn, reg, i1src,
+ this_basic_block);
+ adjust_reg_mode (reg, new_mode);
+ }
else
{
rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4446,6 +4539,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
reset_used_flags (i0notes);
reset_used_flags (newpat);
reset_used_flags (newi2pat);
+ reset_used_flags (newi1pat);
if (undobuf.other_insn)
reset_used_flags (PATTERN (undobuf.other_insn));
@@ -4455,6 +4549,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
i0notes = copy_rtx_if_shared (i0notes);
newpat = copy_rtx_if_shared (newpat);
newi2pat = copy_rtx_if_shared (newi2pat);
+ newi1pat = copy_rtx_if_shared (newi1pat);
if (undobuf.other_insn)
reset_used_flags (PATTERN (undobuf.other_insn));
@@ -4552,10 +4647,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
{
LOG_LINKS (i1) = NULL;
REG_NOTES (i1) = 0;
- if (MAY_HAVE_DEBUG_BIND_INSNS)
- propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
- this_basic_block);
- SET_INSN_DELETED (i1);
+ if (newi1pat)
+ {
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+ this_basic_block);
+ INSN_CODE (i1) = i1_code_number;
+ PATTERN (i1) = newi1pat;
+ }
+ else
+ {
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+ this_basic_block);
+ SET_INSN_DELETED (i1);
+ }
}
if (i0)
@@ -4579,6 +4685,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
from_luid = DF_INSN_LUID (i1);
else
from_luid = DF_INSN_LUID (i2);
+ if (newi1pat)
+ move_deaths (newi1pat, NULL_RTX, from_luid, i1, &midnotes);
if (newi2pat)
move_deaths (newi2pat, NULL_RTX, from_luid, i2, &midnotes);
move_deaths (newpat, newi2pat, from_luid, i3, &midnotes);
@@ -4604,6 +4712,10 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
know these are REG_UNUSED and want them to go to the desired insn,
so we always pass it as i3. */
+ if (newi1pat && new_i1_notes)
+ distribute_notes (new_i1_notes, i1, i1, NULL, NULL_RTX, NULL_RTX,
+ NULL_RTX);
+
if (newi2pat && new_i2_notes)
distribute_notes (new_i2_notes, i2, i2, NULL, NULL_RTX, NULL_RTX,
NULL_RTX);
@@ -4738,6 +4850,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
/* Update reg_stat[].nonzero_bits et al for any changes that may have
been made to this insn. The order is important, because newi2pat
can affect nonzero_bits of newpat. */
+ if (newi1pat)
+ note_pattern_stores (newi1pat, set_nonzero_bits_and_sign_copies, NULL);
if (newi2pat)
note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL);
note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL);
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-30 3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
@ 2020-12-11 2:14 ` HAO CHEN GUI
2021-01-04 2:03 ` HAO CHEN GUI
2021-01-15 19:01 ` Segher Boessenkool
1 sibling, 1 reply; 10+ messages in thread
From: HAO CHEN GUI @ 2020-12-11 2:14 UTC (permalink / raw)
To: Segher Boessenkool, gcc-patches
Segher,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
On 30/11/2020 上午 11:08, HAO CHEN GUI wrote:
> Hi,
>
> This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn
> combine. In the patch, newpat is split twice. The newpat, newi2pat and
> newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is
> done at the end where all former attempts fail. In 4 insn combine
> pre-check, the zero and sign extend are added as the patch is for the
> issue 1 listed in pr65010.
>
> The attachments are the patch diff file and change log file.
>
> Bootstrapped and tested on powerpc64le, ARM and x86 with no
> regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-12-11 2:14 ` HAO CHEN GUI
@ 2021-01-04 2:03 ` HAO CHEN GUI
2021-01-14 8:44 ` HAO CHEN GUI
0 siblings, 1 reply; 10+ messages in thread
From: HAO CHEN GUI @ 2021-01-04 2:03 UTC (permalink / raw)
To: Segher Boessenkool, gcc-patches
Segher,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
Thanks a lot.
On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
> Segher,
>
> Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2021-01-04 2:03 ` HAO CHEN GUI
@ 2021-01-14 8:44 ` HAO CHEN GUI
0 siblings, 0 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2021-01-14 8:44 UTC (permalink / raw)
To: Segher Boessenkool, gcc-patches
Segher,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
Thanks a lot.
On 4/1/2021 上午 10:03, HAO CHEN GUI wrote:
> Segher,
>
> Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
>
> Thanks a lot.
>
>
> On 11/12/2020 上午 10:14, HAO CHEN GUI wrote:
>> Segher,
>>
>> Gentle ping this:
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-30 3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
2020-12-11 2:14 ` HAO CHEN GUI
@ 2021-01-15 19:01 ` Segher Boessenkool
1 sibling, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2021-01-15 19:01 UTC (permalink / raw)
To: HAO CHEN GUI; +Cc: gcc-patches
On Mon, Nov 30, 2020 at 11:08:22AM +0800, HAO CHEN GUI wrote:
> This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn
> combine. In the patch, newpat is split twice. The newpat, newi2pat and
As I said before, that has a lot of problems, and is only suitable for
stage 1 (after many fixes!)
I don't think this is a good idea at all, fwiw. Before we even should
think about doing 4->3 combinations, we should stop severely limiting
what combinations of 4 insns we allow; and we do that because it will
slow down the compiler a lot, for no big gain.
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-11 0:18 ` Jeff Law
@ 2020-11-14 3:30 ` Jim Wilson
0 siblings, 0 replies; 10+ messages in thread
From: Jim Wilson @ 2020-11-14 3:30 UTC (permalink / raw)
To: Jeff Law; +Cc: HAO CHEN GUI, gcc-patches, Segher Boessenkool
On Tue, Nov 10, 2020 at 4:18 PM Jeff Law via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:
>
> On 11/8/20 7:48 PM, HAO CHEN GUI via Gcc-patches wrote:
> > ChangeLog
> >
> > * combine.c (combine_validate_cost): Add an argument for newi1pat.
> > (try_combine): Add a 4-insn combine pattern for optimizing rtx
> > sign_extend (op:zero_extend, zero_extend).
>
> It'd be nice to see motivating examples. Depending on their structure,
> we may get better results cleaning things up with match.pd patterns. We
> already have some which work in this space.
>
I don't have a use case for this specifically, but for the general case of
allowing a 4-insn combine to split into 3 I do have a RISC-V use case for
that in Philipp Tomsich's recent match.pd thread.
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/558765.html
Maybe instead of modifying combine to know about the
sign_extend(zero_extend zero_extend) case you could add a 4->3 splitter to
the rs6000.md file, and then where we call combine_split_insns modify the
code to accept 3 output insns when there were 4 input insns. That would
fix this rs6000 case, and also allow me to fix my RISC-V case with a
riscv.md splitter.
Jim
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-12 0:22 ` Segher Boessenkool
@ 2020-11-12 14:54 ` Segher Boessenkool
0 siblings, 0 replies; 10+ messages in thread
From: Segher Boessenkool @ 2020-11-12 14:54 UTC (permalink / raw)
To: HAO CHEN GUI; +Cc: gcc-patches
On Wed, Nov 11, 2020 at 06:22:53PM -0600, Segher Boessenkool wrote:
> I'm running an all-arch comparison with this patch, just to see what it
> does, but [...]
Results: C0 is trunk, C1 with patch:
C0 C1
alpha 6422312 99.971%
arc 3783838 100.000%
arm 10168277 100.000%
arm64 20077721 0
armhf 14886534 100.000%
c6x 2509915 100.000%
csky 0 0
h8300 1229802 100.000%
i386 12040952 0
ia64 18555229 100.000%
m68k 3868729 100.000%
microblaze 5885763 100.000%
mips 9158101 100.000%
mips64 7402870 100.001%
nds32 4833031 100.000%
nios2 3917080 100.000%
openrisc 4571561 100.000%
parisc 7725308 100.000%
parisc64 0 0
powerpc 11004119 100.000%
powerpc64 22618492 100.000%
powerpc64le 19609678 100.000%
riscv32 1639840 100.000%
riscv64 7658668 0
s390 15345481 0
sh 0 0
shnommu 1694176 100.000%
sparc 4744809 100.000%
sparc64 7205254 100.000%
x86_64 19870124 0
xtensa 2658455 100.002%
0 means it did not build... So some targets newly ICE (x86, riscv, z).
It surprisingly only helps alpha a bit, and all other changes are in the
wrong direction (but very slightly).
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-09 2:48 HAO CHEN GUI
2020-11-11 0:18 ` Jeff Law
@ 2020-11-12 0:22 ` Segher Boessenkool
2020-11-12 14:54 ` Segher Boessenkool
1 sibling, 1 reply; 10+ messages in thread
From: Segher Boessenkool @ 2020-11-12 0:22 UTC (permalink / raw)
To: HAO CHEN GUI; +Cc: gcc-patches
Hi Hao Chen,
[ You first need to add yourself to MAINTAINERS? And get an account to
do that, if you do not yet have one yet :-) ]
On Mon, Nov 09, 2020 at 10:48:19AM +0800, HAO CHEN GUI wrote:
> This patch adds a new pattern in 4-insn combine. It supports the
> following sign_extend(op: zero_extend, zero_extend) optimization. In the
> patch, newpat is split twice. The first split becomes newi1pat and the
> second becomes newi2pat. They replace i1, i2 and i3 if all of them can
> be recognized.
>
> 7: r126:SI=zero_extend([r123:DI+0x1])
> 6: r125:SI=zero_extend([r123:DI])
> 8: r127:SI=r125:SI+r126:SI
> 9: r124:DI=sign_extend(r127:SI)
>
> are replaced by:
>
> 7: r125:DI=zero_extend([r123:DI])
> 8: r127:DI=zero_extend([r123:DI+0x1])
> 9: r124:DI=r127:DI+r125:DI
So in the original insn 8 registers 125 and 126 died, so this isn't
changing insn 7 at all, just moving it back from 6! That is not what
combine should do, but it also is problematic: is it checked anywhere
that we *can* move the insn like this?
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
>
> static bool
> combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> - rtx newpat, rtx newi2pat, rtx newotherpat)
> + rtx newpat, rtx newi2pat, rtx newotherpat,
> + rtx newi1pat)
(Keep the args ordered: 3, 2, 1, other. "newpat" means "newi3pat").
> @@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> int *new_direct_jump_p, rtx_insn *last_combined_insn)
> {
> /* New patterns for I3 and I2, respectively. */
> - rtx newpat, newi2pat = 0;
> + rtx newpat, newi2pat = 0, newi1pat = 0;
> rtvec newpat_vec_with_clobbers = 0;
> int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
> /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
> @@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> int total_sets;
> /* Nonzero if I2's or I1's body now appears in I3. */
> int i2_is_used = 0, i1_is_used = 0;
> - /* INSN_CODEs for new I3, new I2, and user of condition code. */
> + /* INSN_CODEs for new I3, new I2, new I1 and user of condition code. */
(Comma after I1: "new I1, and ...".)
> int insn_code_number, i2_code_number = 0, other_code_number = 0;
> + int i1_code_number = 0;
Well, put it together with i3, i2 then, put "other" on its own line?
> @@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
> ngood++;
> else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
> - || GET_CODE (src) == LSHIFTRT)
> + || GET_CODE (src) == LSHIFTRT
> + || GET_CODE (src) == SIGN_EXTEND
> + || GET_CODE (src) == ZERO_EXTEND)
> nshift++;
Extends are not shifts. Please count "nextend" separately?
> @@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
> }
>
> + if (i0)
> + {
> + subst_low_luid = DF_INSN_LUID (i0);
> + i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
> + }
This won't work? "subst_low_uid" is overwritten right in the next
statement:
> subst_low_luid = DF_INSN_LUID (i2);
> i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
> @@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> rtx src_op0 = XEXP (setsrc, 0);
> rtx src_op1 = XEXP (setsrc, 1);
>
> + /* Double split when src of i0 and i1 are both ZERO_EXTEND. */
> + if (i0 && i1
> + && GET_CODE (PATTERN (i0)) == SET
> + && GET_CODE (PATTERN (i1)) == SET
> + && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
> + && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
> + && (rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i1)), 0))
> + || rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i0)), 0))))
> + {
> + newi1pat = NULL_RTX;
> + rtx newdest, *i0_i1dest;
> + machine_mode new_mode;
> +
> + new_mode = GET_MODE (*split);
> + if (rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i1)), 0)))
> + i0_i1dest = &i1dest;
> + else
> + i0_i1dest = &i0dest;
> +
> + if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
> + newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
You can only have one mode that you use pseudos in in the whole routine.
This doesn't work, i0_i1dest can be used elsewhere still.
> + else
> + {
> + SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
> + newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
> + }
Messing with modes in general is a big problem for combine (it still
has some open problems for this, too).
> + newi1pat = gen_rtx_SET (newdest, *split);
> + SUBST (*split, newdest);
> +
> + i1_code_number = recog_for_combine (&newi1pat, i2,
> + &new_i2_notes);
> + if (i1_code_number < 0)
> + {
> + undo_all ();
> + return 0;
> + }
> +
> + split = find_split_point (&newpat, i3, false);
> + }
I doubt that you can call find_split_point twice like this... It likely
still needs to check that the insns it ends up with for newi1 and newi2
can go, in that order, where i1 and i2 used to be. For i2 and i3 this
is much simpler, but now we will need a lot of extra work to do these
checks.
> + else if (reg == i0dest)
> + {
> + propagate_for_debug (i0, last_combined_insn, reg, i0src,
> + this_basic_block);
> + adjust_reg_mode (reg, new_mode);
> + }
How can i0dest ever end up in the final insns? Ouch.
I'm running an all-arch comparison with this patch, just to see what it
does, but please make it so that for
7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)
8+9 is combined to just
9: r124:DI=r125:DI+r126:DI
and that's all? (The "compact" format leaves out crucial information,
but the loads here are QImode loads.)
I don't think 4->3 combines will do very much that cannot be done by
some combination of 2->1, 3->1, 4->1, 2->2, 3->2, 4->2, but we'll see.
It will be quite expensive to do in any case (combine isn't one of the
expensive passes anymore, but let's try to keep it that way ;-) )
Segher
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Add a new pattern in 4-insn combine
2020-11-09 2:48 HAO CHEN GUI
@ 2020-11-11 0:18 ` Jeff Law
2020-11-14 3:30 ` Jim Wilson
2020-11-12 0:22 ` Segher Boessenkool
1 sibling, 1 reply; 10+ messages in thread
From: Jeff Law @ 2020-11-11 0:18 UTC (permalink / raw)
To: HAO CHEN GUI, gcc-patches; +Cc: Segher Boessenkool
On 11/8/20 7:48 PM, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
>
> This patch adds a new pattern in 4-insn combine. It supports the
> following sign_extend(op: zero_extend, zero_extend) optimization. In
> the patch, newpat is split twice. The first split becomes newi1pat and
> the second becomes newi2pat. They replace i1, i2 and i3 if all of them
> can be recognized.
>
> 7: r126:SI=zero_extend([r123:DI+0x1])
> 6: r125:SI=zero_extend([r123:DI])
> 8: r127:SI=r125:SI+r126:SI
> 9: r124:DI=sign_extend(r127:SI)
>
> are replaced by:
>
> 7: r125:DI=zero_extend([r123:DI])
> 8: r127:DI=zero_extend([r123:DI+0x1])
> 9: r124:DI=r127:DI+r125:DI
>
> The attachments are the patch diff file and change log file.
>
> Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
>
>
>
> ChangeLog
>
> * combine.c (combine_validate_cost): Add an argument for newi1pat.
> (try_combine): Add a 4-insn combine pattern for optimizing rtx
> sign_extend (op:zero_extend, zero_extend).
It'd be nice to see motivating examples. Depending on their structure,
we may get better results cleaning things up with match.pd patterns. We
already have some which work in this space.
Whether or not the combiner change itself moves forward is up to Segher
though.
jeff
>
> patch.diff
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index c88382efbd3..73259e6a9ed 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
>
> static bool
> combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> - rtx newpat, rtx newi2pat, rtx newotherpat)
> + rtx newpat, rtx newi2pat, rtx newotherpat,
> + rtx newi1pat)
> {
> int i0_cost, i1_cost, i2_cost, i3_cost;
> - int new_i2_cost, new_i3_cost;
> + int new_i1_cost, new_i2_cost, new_i3_cost;
> int old_cost, new_cost;
>
> /* Lookup the original insn_costs. */
> @@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> new_i2_cost = 0;
> }
>
> + if (newi1pat)
> + {
> + tmp = PATTERN (i1);
> + PATTERN (i1) = newi1pat;
> + tmpi = INSN_CODE (i1);
> + INSN_CODE (i1) = -1;
> + new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
> + PATTERN (i1) = tmp;
> + INSN_CODE (i1) = tmpi;
> + new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
> + }
> + else
> + new_i1_cost = 0;
> +
> if (undobuf.other_insn)
> {
> int old_other_cost, new_other_cost;
> @@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> fprintf (dump_file, "%d + ", i1_cost);
> fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
>
> - if (newi2pat)
> + if (newi1pat)
> + fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
> + new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
> + else if (newi2pat)
> fprintf (dump_file, "replacement costs %d + %d = %d\n",
> new_i2_cost, new_i3_cost, new_cost);
> else
> @@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
> INSN_COST (i3) = new_i3_cost;
> if (i1)
> {
> - INSN_COST (i1) = 0;
> + if (newi1pat)
> + INSN_COST (i1) = new_i1_cost;
> + else
> + INSN_COST (i1) = 0;
> if (i0)
> INSN_COST (i0) = 0;
> }
> @@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> int *new_direct_jump_p, rtx_insn *last_combined_insn)
> {
> /* New patterns for I3 and I2, respectively. */
> - rtx newpat, newi2pat = 0;
> + rtx newpat, newi2pat = 0, newi1pat = 0;
> rtvec newpat_vec_with_clobbers = 0;
> int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
> /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
> @@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> int total_sets;
> /* Nonzero if I2's or I1's body now appears in I3. */
> int i2_is_used = 0, i1_is_used = 0;
> - /* INSN_CODEs for new I3, new I2, and user of condition code. */
> + /* INSN_CODEs for new I3, new I2, new I1 and user of condition code. */
> int insn_code_number, i2_code_number = 0, other_code_number = 0;
> + int i1_code_number = 0;
> /* Contains I3 if the destination of I3 is used in its source, which means
> that the old life of I3 is being killed. If that usage is placed into
> I2 and not in I3, a REG_DEAD note must be made. */
> @@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
> ngood++;
> else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
> - || GET_CODE (src) == LSHIFTRT)
> + || GET_CODE (src) == LSHIFTRT
> + || GET_CODE (src) == SIGN_EXTEND
> + || GET_CODE (src) == ZERO_EXTEND)
> nshift++;
> }
>
> @@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
> }
>
> + if (i0)
> + {
> + subst_low_luid = DF_INSN_LUID (i0);
> + i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
> + }
> +
> subst_low_luid = DF_INSN_LUID (i2);
> i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
> }
> @@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> rtx src_op0 = XEXP (setsrc, 0);
> rtx src_op1 = XEXP (setsrc, 1);
>
> + /* Double split when src of i0 and i1 are both ZERO_EXTEND. */
> + if (i0 && i1
> + && GET_CODE (PATTERN (i0)) == SET
> + && GET_CODE (PATTERN (i1)) == SET
> + && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
> + && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
> + && (rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i1)), 0))
> + || rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i0)), 0))))
> + {
> + newi1pat = NULL_RTX;
> + rtx newdest, *i0_i1dest;
> + machine_mode new_mode;
> +
> + new_mode = GET_MODE (*split);
> + if (rtx_equal_p (XEXP (*split, 0),
> + XEXP (SET_SRC (PATTERN (i1)), 0)))
> + i0_i1dest = &i1dest;
> + else
> + i0_i1dest = &i0dest;
> +
> + if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
> + newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
> + else
> + {
> + SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
> + newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
> + }
> +
> + newi1pat = gen_rtx_SET (newdest, *split);
> + SUBST (*split, newdest);
> +
> + i1_code_number = recog_for_combine (&newi1pat, i2,
> + &new_i2_notes);
> + if (i1_code_number < 0)
> + {
> + undo_all ();
> + return 0;
> + }
> +
> + split = find_split_point (&newpat, i3, false);
> + }
> +
> /* Split "X = Y op Y" as "Z = Y; X = Z op Z". */
> if (rtx_equal_p (src_op0, src_op1))
> {
> @@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
>
> /* Only allow this combination if insn_cost reports that the
> replacement instructions are cheaper than the originals. */
> - if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
> - {
> - undo_all ();
> - return 0;
> - }
> + if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
> + newi1pat))
> + {
> + undo_all ();
> + return 0;
> + }
>
> if (MAY_HAVE_DEBUG_BIND_INSNS)
> {
> @@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> /* Put back the new mode. */
> adjust_reg_mode (reg, new_mode);
> }
> + else if (reg == i0dest)
> + {
> + propagate_for_debug (i0, last_combined_insn, reg, i0src,
> + this_basic_block);
> + adjust_reg_mode (reg, new_mode);
> + }
> + else if (reg == i1dest)
> + {
> + propagate_for_debug (i1, last_combined_insn, reg, i1src,
> + this_basic_block);
> + adjust_reg_mode (reg, new_mode);
> + }
> else
> {
> rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
> @@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
> {
> LOG_LINKS (i1) = NULL;
> REG_NOTES (i1) = 0;
> - if (MAY_HAVE_DEBUG_BIND_INSNS)
> - propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> - this_basic_block);
> - SET_INSN_DELETED (i1);
> + if (newi1pat)
> + {
> + if (MAY_HAVE_DEBUG_BIND_INSNS)
> + propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> + this_basic_block);
> + INSN_CODE (i1) = i1_code_number;
> + PATTERN (i1) = newi1pat;
> + }
> + else
> + {
> + if (MAY_HAVE_DEBUG_BIND_INSNS)
> + propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
> + this_basic_block);
> + SET_INSN_DELETED (i1);
> + }
> }
>
> if (i0)
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] Add a new pattern in 4-insn combine
@ 2020-11-09 2:48 HAO CHEN GUI
2020-11-11 0:18 ` Jeff Law
2020-11-12 0:22 ` Segher Boessenkool
0 siblings, 2 replies; 10+ messages in thread
From: HAO CHEN GUI @ 2020-11-09 2:48 UTC (permalink / raw)
To: gcc-patches; +Cc: Segher Boessenkool
[-- Attachment #1: Type: text/plain, Size: 751 bytes --]
Hi,
This patch adds a new pattern in 4-insn combine. It supports the
following sign_extend(op: zero_extend, zero_extend) optimization. In the
patch, newpat is split twice. The first split becomes newi1pat and the
second becomes newi2pat. They replace i1, i2 and i3 if all of them can
be recognized.
7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)
are replaced by:
7: r125:DI=zero_extend([r123:DI])
8: r127:DI=zero_extend([r123:DI+0x1])
9: r124:DI=r127:DI+r125:DI
The attachments are the patch diff file and change log file.
Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.
[-- Attachment #2: ChangeLog --]
[-- Type: text/plain, Size: 176 bytes --]
* combine.c (combine_validate_cost): Add an argument for newi1pat.
(try_combine): Add a 4-insn combine pattern for optimizing rtx
sign_extend (op:zero_extend, zero_extend).
[-- Attachment #3: patch.diff --]
[-- Type: text/plain, Size: 7331 bytes --]
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..73259e6a9ed 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link *newval)
static bool
combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
- rtx newpat, rtx newi2pat, rtx newotherpat)
+ rtx newpat, rtx newi2pat, rtx newotherpat,
+ rtx newi1pat)
{
int i0_cost, i1_cost, i2_cost, i3_cost;
- int new_i2_cost, new_i3_cost;
+ int new_i1_cost, new_i2_cost, new_i3_cost;
int old_cost, new_cost;
/* Lookup the original insn_costs. */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
new_i2_cost = 0;
}
+ if (newi1pat)
+ {
+ tmp = PATTERN (i1);
+ PATTERN (i1) = newi1pat;
+ tmpi = INSN_CODE (i1);
+ INSN_CODE (i1) = -1;
+ new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+ PATTERN (i1) = tmp;
+ INSN_CODE (i1) = tmpi;
+ new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+ }
+ else
+ new_i1_cost = 0;
+
if (undobuf.other_insn)
{
int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
fprintf (dump_file, "%d + ", i1_cost);
fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
- if (newi2pat)
+ if (newi1pat)
+ fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+ new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+ else if (newi2pat)
fprintf (dump_file, "replacement costs %d + %d = %d\n",
new_i2_cost, new_i3_cost, new_cost);
else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
INSN_COST (i3) = new_i3_cost;
if (i1)
{
- INSN_COST (i1) = 0;
+ if (newi1pat)
+ INSN_COST (i1) = new_i1_cost;
+ else
+ INSN_COST (i1) = 0;
if (i0)
INSN_COST (i0) = 0;
}
@@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int *new_direct_jump_p, rtx_insn *last_combined_insn)
{
/* New patterns for I3 and I2, respectively. */
- rtx newpat, newi2pat = 0;
+ rtx newpat, newi2pat = 0, newi1pat = 0;
rtvec newpat_vec_with_clobbers = 0;
int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
/* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int total_sets;
/* Nonzero if I2's or I1's body now appears in I3. */
int i2_is_used = 0, i1_is_used = 0;
- /* INSN_CODEs for new I3, new I2, and user of condition code. */
+ /* INSN_CODEs for new I3, new I2, new I1 and user of condition code. */
int insn_code_number, i2_code_number = 0, other_code_number = 0;
+ int i1_code_number = 0;
/* Contains I3 if the destination of I3 is used in its source, which means
that the old life of I3 is being killed. If that usage is placed into
I2 and not in I3, a REG_DEAD note must be made. */
@@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
ngood++;
else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
- || GET_CODE (src) == LSHIFTRT)
+ || GET_CODE (src) == LSHIFTRT
+ || GET_CODE (src) == SIGN_EXTEND
+ || GET_CODE (src) == ZERO_EXTEND)
nshift++;
}
@@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
}
+ if (i0)
+ {
+ subst_low_luid = DF_INSN_LUID (i0);
+ i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+ }
+
subst_low_luid = DF_INSN_LUID (i2);
i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
}
@@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
rtx src_op0 = XEXP (setsrc, 0);
rtx src_op1 = XEXP (setsrc, 1);
+ /* Double split when src of i0 and i1 are both ZERO_EXTEND. */
+ if (i0 && i1
+ && GET_CODE (PATTERN (i0)) == SET
+ && GET_CODE (PATTERN (i1)) == SET
+ && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
+ && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
+ && (rtx_equal_p (XEXP (*split, 0),
+ XEXP (SET_SRC (PATTERN (i1)), 0))
+ || rtx_equal_p (XEXP (*split, 0),
+ XEXP (SET_SRC (PATTERN (i0)), 0))))
+ {
+ newi1pat = NULL_RTX;
+ rtx newdest, *i0_i1dest;
+ machine_mode new_mode;
+
+ new_mode = GET_MODE (*split);
+ if (rtx_equal_p (XEXP (*split, 0),
+ XEXP (SET_SRC (PATTERN (i1)), 0)))
+ i0_i1dest = &i1dest;
+ else
+ i0_i1dest = &i0dest;
+
+ if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+ newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
+ else
+ {
+ SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
+ newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
+ }
+
+ newi1pat = gen_rtx_SET (newdest, *split);
+ SUBST (*split, newdest);
+
+ i1_code_number = recog_for_combine (&newi1pat, i2,
+ &new_i2_notes);
+ if (i1_code_number < 0)
+ {
+ undo_all ();
+ return 0;
+ }
+
+ split = find_split_point (&newpat, i3, false);
+ }
+
/* Split "X = Y op Y" as "Z = Y; X = Z op Z". */
if (rtx_equal_p (src_op0, src_op1))
{
@@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
/* Only allow this combination if insn_cost reports that the
replacement instructions are cheaper than the originals. */
- if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
- {
- undo_all ();
- return 0;
- }
+ if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+ newi1pat))
+ {
+ undo_all ();
+ return 0;
+ }
if (MAY_HAVE_DEBUG_BIND_INSNS)
{
@@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
/* Put back the new mode. */
adjust_reg_mode (reg, new_mode);
}
+ else if (reg == i0dest)
+ {
+ propagate_for_debug (i0, last_combined_insn, reg, i0src,
+ this_basic_block);
+ adjust_reg_mode (reg, new_mode);
+ }
+ else if (reg == i1dest)
+ {
+ propagate_for_debug (i1, last_combined_insn, reg, i1src,
+ this_basic_block);
+ adjust_reg_mode (reg, new_mode);
+ }
else
{
rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
{
LOG_LINKS (i1) = NULL;
REG_NOTES (i1) = 0;
- if (MAY_HAVE_DEBUG_BIND_INSNS)
- propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
- this_basic_block);
- SET_INSN_DELETED (i1);
+ if (newi1pat)
+ {
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+ this_basic_block);
+ INSN_CODE (i1) = i1_code_number;
+ PATTERN (i1) = newi1pat;
+ }
+ else
+ {
+ if (MAY_HAVE_DEBUG_BIND_INSNS)
+ propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+ this_basic_block);
+ SET_INSN_DELETED (i1);
+ }
}
if (i0)
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2021-01-15 19:02 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-30 3:08 [PATCH] Add a new pattern in 4-insn combine HAO CHEN GUI
2020-12-11 2:14 ` HAO CHEN GUI
2021-01-04 2:03 ` HAO CHEN GUI
2021-01-14 8:44 ` HAO CHEN GUI
2021-01-15 19:01 ` Segher Boessenkool
-- strict thread matches above, loose matches on Subject: below --
2020-11-09 2:48 HAO CHEN GUI
2020-11-11 0:18 ` Jeff Law
2020-11-14 3:30 ` Jim Wilson
2020-11-12 0:22 ` Segher Boessenkool
2020-11-12 14:54 ` Segher Boessenkool
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).