* [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue
@ 2024-07-02 3:23 Hongyu Wang
2024-07-02 8:17 ` Richard Biener
2024-07-04 3:00 ` Hongtao Liu
0 siblings, 2 replies; 4+ messages in thread
From: Hongyu Wang @ 2024-07-02 3:23 UTC (permalink / raw)
To: gcc-patches; +Cc: hongtao.liu, ubizjak
Hi,
According to APX spec, the pushp/popp pairs should be matched,
otherwise the PPX hint cannot take effect and cause performance loss.
In the ix86_expand_epilogue, there are several optimizations that may
cause the epilogue using mov to restore the regs. Check if PPX applied
and prevent usage of mov/leave in the epilogue.
Bootstrapped/regtested on x86_64-pc-linux-gnu.
Ok for trunk?
gcc/ChangeLog:
* config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_emit_save_regs): Emit ppx is available only when
TARGET_APX_PPX && !crtl->calls_eh_return.
(ix86_expand_epilogue): Don't restore reg using mov when
apx_ppx_used flag is true.
* config/i386/i386.h (struct machine_frame_state):
Add apx_ppx_used flag.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ppx-2.c: New test.
* gcc.target/i386/apx-ppx-3.c: Likewise.
---
gcc/config/i386/i386.cc | 13 +++++++++----
gcc/config/i386/i386.h | 4 ++++
gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++
4 files changed, 34 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index bd7411190af..99def8d4a77 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
{
int regno;
rtx_insn *insn;
+ bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
if (!TARGET_APX_PUSH2POP2
|| !ix86_can_use_push2pop2 ()
@@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
{
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
- TARGET_APX_PPX));
+ use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
@@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
regno_list[0]),
gen_rtx_REG (word_mode,
regno_list[1]),
- TARGET_APX_PPX));
+ use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
@@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
else
{
insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
- TARGET_APX_PPX));
+ use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
aligned = true;
}
@@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
{
insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
regno_list[0]),
- TARGET_APX_PPX));
+ use_ppx));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
@@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
if (!frame.save_regs_using_mov)
{
ix86_emit_save_regs ();
+ m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
int_registers_saved = true;
gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
}
@@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
/* SEH requires the use of pops to identify the epilogue. */
else if (TARGET_SEH)
restore_regs_via_mov = false;
+ /* If we already save reg with pushp, don't use move at epilogue. */
+ else if (m->fs.apx_ppx_used)
+ restore_regs_via_mov = false;
/* If we're only restoring one register and sp cannot be used then
using a move instruction to restore the register since it's
less work than reloading sp and popping the register. */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 147b12cd014..0c5292e1d64 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
The flags realigned and sp_realigned are mutually exclusive. */
BOOL_BITFIELD sp_realigned : 1;
+ /* When APX_PPX used in prologue, force epilogue to emit
+ popp instead of move and leave. */
+ BOOL_BITFIELD apx_ppx_used : 1;
+
/* If sp_realigned is set, this is the last valid offset from the CFA
that can be used for access with the frame pointer. */
HOST_WIDE_INT sp_realigned_fp_last;
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
new file mode 100644
index 00000000000..42a95340b55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
+
+/* { dg-final { scan-assembler "pushp" } } */
+/* { dg-final { scan-assembler "popp" } } */
+/* { dg-final { scan-assembler-not "leave" } } */
+
+extern int bar (int a);
+extern int *q;
+
+void foo (int *a)
+{
+ q[2] = bar (q[1]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
new file mode 100644
index 00000000000..76931fbe294
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mapx-features=ppx" } */
+
+/* { dg-final { scan-assembler-not "pushp" } } */
+/* { dg-final { scan-assembler-not "popp" } } */
+
+#include "eh_return-2.c"
--
2.31.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue
2024-07-02 3:23 [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue Hongyu Wang
@ 2024-07-02 8:17 ` Richard Biener
2024-07-02 8:38 ` Hongyu Wang
2024-07-04 3:00 ` Hongtao Liu
1 sibling, 1 reply; 4+ messages in thread
From: Richard Biener @ 2024-07-02 8:17 UTC (permalink / raw)
To: Hongyu Wang; +Cc: gcc-patches, hongtao.liu, ubizjak
On Tue, Jul 2, 2024 at 5:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
>
> Hi,
>
> According to APX spec, the pushp/popp pairs should be matched,
> otherwise the PPX hint cannot take effect and cause performance loss.
>
> In the ix86_expand_epilogue, there are several optimizations that may
> cause the epilogue using mov to restore the regs. Check if PPX applied
> and prevent usage of mov/leave in the epilogue.
Isn't it true in general that mismatched push/pop confuses a CPUs stack
engine? Does the distance of the push/pop make any difference here
or in general?
> Bootstrapped/regtested on x86_64-pc-linux-gnu.
>
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> (ix86_emit_save_regs): Emit ppx is available only when
> TARGET_APX_PPX && !crtl->calls_eh_return.
> (ix86_expand_epilogue): Don't restore reg using mov when
> apx_ppx_used flag is true.
> * config/i386/i386.h (struct machine_frame_state):
> Add apx_ppx_used flag.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/apx-ppx-2.c: New test.
> * gcc.target/i386/apx-ppx-3.c: Likewise.
> ---
> gcc/config/i386/i386.cc | 13 +++++++++----
> gcc/config/i386/i386.h | 4 ++++
> gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++
> 4 files changed, 34 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index bd7411190af..99def8d4a77 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> {
> int regno;
> rtx_insn *insn;
> + bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
>
> if (!TARGET_APX_PUSH2POP2
> || !ix86_can_use_push2pop2 ()
> @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> }
> }
> @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> regno_list[0]),
> gen_rtx_REG (word_mode,
> regno_list[1]),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
>
> @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> else
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> aligned = true;
> }
> @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> regno_list[0]),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> }
> }
> @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> if (!frame.save_regs_using_mov)
> {
> ix86_emit_save_regs ();
> + m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> int_registers_saved = true;
> gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> }
> @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> /* SEH requires the use of pops to identify the epilogue. */
> else if (TARGET_SEH)
> restore_regs_via_mov = false;
> + /* If we already save reg with pushp, don't use move at epilogue. */
> + else if (m->fs.apx_ppx_used)
> + restore_regs_via_mov = false;
> /* If we're only restoring one register and sp cannot be used then
> using a move instruction to restore the register since it's
> less work than reloading sp and popping the register. */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 147b12cd014..0c5292e1d64 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> The flags realigned and sp_realigned are mutually exclusive. */
> BOOL_BITFIELD sp_realigned : 1;
>
> + /* When APX_PPX used in prologue, force epilogue to emit
> + popp instead of move and leave. */
> + BOOL_BITFIELD apx_ppx_used : 1;
> +
> /* If sp_realigned is set, this is the last valid offset from the CFA
> that can be used for access with the frame pointer. */
> HOST_WIDE_INT sp_realigned_fp_last;
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> new file mode 100644
> index 00000000000..42a95340b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> +
> +/* { dg-final { scan-assembler "pushp" } } */
> +/* { dg-final { scan-assembler "popp" } } */
> +/* { dg-final { scan-assembler-not "leave" } } */
> +
> +extern int bar (int a);
> +extern int *q;
> +
> +void foo (int *a)
> +{
> + q[2] = bar (q[1]);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> new file mode 100644
> index 00000000000..76931fbe294
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ppx" } */
> +
> +/* { dg-final { scan-assembler-not "pushp" } } */
> +/* { dg-final { scan-assembler-not "popp" } } */
> +
> +#include "eh_return-2.c"
> --
> 2.31.1
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue
2024-07-02 8:17 ` Richard Biener
@ 2024-07-02 8:38 ` Hongyu Wang
0 siblings, 0 replies; 4+ messages in thread
From: Hongyu Wang @ 2024-07-02 8:38 UTC (permalink / raw)
To: Richard Biener; +Cc: Hongyu Wang, gcc-patches, hongtao.liu, ubizjak
> Isn't it true in general that mismatched push/pop confuses a CPUs stack
> engine? Does the distance of the push/pop make any difference here
> or in general?
We are not sure about the impact for distance, but the PPX hint was
just a performance hint. As mentioned in the apx spec, the mismatched
pushp/popp pair does confused the fast-forwarding logic and turns off
the PPX optimization. We just need to make sure every pushp for a
certain reg has corresponding popp for that reg.
Richard Biener <richard.guenther@gmail.com> 于2024年7月2日周二 16:18写道:
>
> On Tue, Jul 2, 2024 at 5:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
> >
> > Hi,
> >
> > According to APX spec, the pushp/popp pairs should be matched,
> > otherwise the PPX hint cannot take effect and cause performance loss.
> >
> > In the ix86_expand_epilogue, there are several optimizations that may
> > cause the epilogue using mov to restore the regs. Check if PPX applied
> > and prevent usage of mov/leave in the epilogue.
>
> Isn't it true in general that mismatched push/pop confuses a CPUs stack
> engine? Does the distance of the push/pop make any difference here
> or in general?
>
> > Bootstrapped/regtested on x86_64-pc-linux-gnu.
> >
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> > flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> > (ix86_emit_save_regs): Emit ppx is available only when
> > TARGET_APX_PPX && !crtl->calls_eh_return.
> > (ix86_expand_epilogue): Don't restore reg using mov when
> > apx_ppx_used flag is true.
> > * config/i386/i386.h (struct machine_frame_state):
> > Add apx_ppx_used flag.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/apx-ppx-2.c: New test.
> > * gcc.target/i386/apx-ppx-3.c: Likewise.
> > ---
> > gcc/config/i386/i386.cc | 13 +++++++++----
> > gcc/config/i386/i386.h | 4 ++++
> > gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> > gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++
> > 4 files changed, 34 insertions(+), 4 deletions(-)
> > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index bd7411190af..99def8d4a77 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> > {
> > int regno;
> > rtx_insn *insn;
> > + bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
> >
> > if (!TARGET_APX_PUSH2POP2
> > || !ix86_can_use_push2pop2 ()
> > @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> > if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> > {
> > insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > - TARGET_APX_PPX));
> > + use_ppx));
> > RTX_FRAME_RELATED_P (insn) = 1;
> > }
> > }
> > @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> > regno_list[0]),
> > gen_rtx_REG (word_mode,
> > regno_list[1]),
> > - TARGET_APX_PPX));
> > + use_ppx));
> > RTX_FRAME_RELATED_P (insn) = 1;
> > rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
> >
> > @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> > else
> > {
> > insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> > - TARGET_APX_PPX));
> > + use_ppx));
> > RTX_FRAME_RELATED_P (insn) = 1;
> > aligned = true;
> > }
> > @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> > {
> > insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> > regno_list[0]),
> > - TARGET_APX_PPX));
> > + use_ppx));
> > RTX_FRAME_RELATED_P (insn) = 1;
> > }
> > }
> > @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> > if (!frame.save_regs_using_mov)
> > {
> > ix86_emit_save_regs ();
> > + m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> > int_registers_saved = true;
> > gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> > }
> > @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> > /* SEH requires the use of pops to identify the epilogue. */
> > else if (TARGET_SEH)
> > restore_regs_via_mov = false;
> > + /* If we already save reg with pushp, don't use move at epilogue. */
> > + else if (m->fs.apx_ppx_used)
> > + restore_regs_via_mov = false;
> > /* If we're only restoring one register and sp cannot be used then
> > using a move instruction to restore the register since it's
> > less work than reloading sp and popping the register. */
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 147b12cd014..0c5292e1d64 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> > The flags realigned and sp_realigned are mutually exclusive. */
> > BOOL_BITFIELD sp_realigned : 1;
> >
> > + /* When APX_PPX used in prologue, force epilogue to emit
> > + popp instead of move and leave. */
> > + BOOL_BITFIELD apx_ppx_used : 1;
> > +
> > /* If sp_realigned is set, this is the last valid offset from the CFA
> > that can be used for access with the frame pointer. */
> > HOST_WIDE_INT sp_realigned_fp_last;
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > new file mode 100644
> > index 00000000000..42a95340b55
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> > +
> > +/* { dg-final { scan-assembler "pushp" } } */
> > +/* { dg-final { scan-assembler "popp" } } */
> > +/* { dg-final { scan-assembler-not "leave" } } */
> > +
> > +extern int bar (int a);
> > +extern int *q;
> > +
> > +void foo (int *a)
> > +{
> > + q[2] = bar (q[1]);
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > new file mode 100644
> > index 00000000000..76931fbe294
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> > @@ -0,0 +1,7 @@
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mapx-features=ppx" } */
> > +
> > +/* { dg-final { scan-assembler-not "pushp" } } */
> > +/* { dg-final { scan-assembler-not "popp" } } */
> > +
> > +#include "eh_return-2.c"
> > --
> > 2.31.1
> >
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue
2024-07-02 3:23 [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue Hongyu Wang
2024-07-02 8:17 ` Richard Biener
@ 2024-07-04 3:00 ` Hongtao Liu
1 sibling, 0 replies; 4+ messages in thread
From: Hongtao Liu @ 2024-07-04 3:00 UTC (permalink / raw)
To: Hongyu Wang; +Cc: gcc-patches, hongtao.liu, ubizjak
On Tue, Jul 2, 2024 at 11:24 AM Hongyu Wang <hongyu.wang@intel.com> wrote:
>
> Hi,
>
> According to APX spec, the pushp/popp pairs should be matched,
> otherwise the PPX hint cannot take effect and cause performance loss.
>
> In the ix86_expand_epilogue, there are several optimizations that may
> cause the epilogue using mov to restore the regs. Check if PPX applied
> and prevent usage of mov/leave in the epilogue.
>
> Bootstrapped/regtested on x86_64-pc-linux-gnu.
>
> Ok for trunk?
Ok.
>
> gcc/ChangeLog:
>
> * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used
> flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return.
> (ix86_emit_save_regs): Emit ppx is available only when
> TARGET_APX_PPX && !crtl->calls_eh_return.
> (ix86_expand_epilogue): Don't restore reg using mov when
> apx_ppx_used flag is true.
> * config/i386/i386.h (struct machine_frame_state):
> Add apx_ppx_used flag.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/apx-ppx-2.c: New test.
> * gcc.target/i386/apx-ppx-3.c: Likewise.
> ---
> gcc/config/i386/i386.cc | 13 +++++++++----
> gcc/config/i386/i386.h | 4 ++++
> gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++
> gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++
> 4 files changed, 34 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index bd7411190af..99def8d4a77 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -7429,6 +7429,7 @@ ix86_emit_save_regs (void)
> {
> int regno;
> rtx_insn *insn;
> + bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
>
> if (!TARGET_APX_PUSH2POP2
> || !ix86_can_use_push2pop2 ()
> @@ -7438,7 +7439,7 @@ ix86_emit_save_regs (void)
> if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> }
> }
> @@ -7469,7 +7470,7 @@ ix86_emit_save_regs (void)
> regno_list[0]),
> gen_rtx_REG (word_mode,
> regno_list[1]),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
>
> @@ -7502,7 +7503,7 @@ ix86_emit_save_regs (void)
> else
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> aligned = true;
> }
> @@ -7511,7 +7512,7 @@ ix86_emit_save_regs (void)
> {
> insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
> regno_list[0]),
> - TARGET_APX_PPX));
> + use_ppx));
> RTX_FRAME_RELATED_P (insn) = 1;
> }
> }
> @@ -8985,6 +8986,7 @@ ix86_expand_prologue (void)
> if (!frame.save_regs_using_mov)
> {
> ix86_emit_save_regs ();
> + m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
> int_registers_saved = true;
> gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
> }
> @@ -9870,6 +9872,9 @@ ix86_expand_epilogue (int style)
> /* SEH requires the use of pops to identify the epilogue. */
> else if (TARGET_SEH)
> restore_regs_via_mov = false;
> + /* If we already save reg with pushp, don't use move at epilogue. */
> + else if (m->fs.apx_ppx_used)
> + restore_regs_via_mov = false;
> /* If we're only restoring one register and sp cannot be used then
> using a move instruction to restore the register since it's
> less work than reloading sp and popping the register. */
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 147b12cd014..0c5292e1d64 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2693,6 +2693,10 @@ struct GTY(()) machine_frame_state
> The flags realigned and sp_realigned are mutually exclusive. */
> BOOL_BITFIELD sp_realigned : 1;
>
> + /* When APX_PPX used in prologue, force epilogue to emit
> + popp instead of move and leave. */
> + BOOL_BITFIELD apx_ppx_used : 1;
> +
> /* If sp_realigned is set, this is the last valid offset from the CFA
> that can be used for access with the frame pointer. */
> HOST_WIDE_INT sp_realigned_fp_last;
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> new file mode 100644
> index 00000000000..42a95340b55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */
> +
> +/* { dg-final { scan-assembler "pushp" } } */
> +/* { dg-final { scan-assembler "popp" } } */
> +/* { dg-final { scan-assembler-not "leave" } } */
> +
> +extern int bar (int a);
> +extern int *q;
> +
> +void foo (int *a)
> +{
> + q[2] = bar (q[1]);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> new file mode 100644
> index 00000000000..76931fbe294
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mapx-features=ppx" } */
> +
> +/* { dg-final { scan-assembler-not "pushp" } } */
> +/* { dg-final { scan-assembler-not "popp" } } */
> +
> +#include "eh_return-2.c"
> --
> 2.31.1
>
--
BR,
Hongtao
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-07-04 3:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-02 3:23 [PATCH] [APX PPX] Avoid generating unmatched pushp/popp in pro/epilogue Hongyu Wang
2024-07-02 8:17 ` Richard Biener
2024-07-02 8:38 ` Hongyu Wang
2024-07-04 3:00 ` Hongtao Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).