* [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501)
@ 2013-12-20 16:07 Jakub Jelinek
2013-12-20 16:10 ` H.J. Lu
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Jakub Jelinek @ 2013-12-20 16:07 UTC (permalink / raw)
To: Jan Hubicka, Richard Henderson, H.J. Lu; +Cc: gcc-patches
Hi!
Honza recently changed the i?86 backend, so that it often doesn't
do -maccumulate-outgoing-args by default on x86_64.
Unfortunately, on some of the here included testcases this regressed
quite a bit the generated code. As AVX vectors are used, the dynamic
realignment code needs to assume e.g. that some of them will need to be
spilled, and for -mno-accumulate-outgoing-args the code needs to set
need_drap early as well. But in when emitting the prologue/epilogue,
if need_drap is set, we don't perform the optimization for leaf functions
which have zero size stack frame, thus we end up with uselessly doing
dynamic stack realignment, setting up DRAP that nothing uses and later on
restore everything back.
This patch improves it, if the DRAP register isn't live at the start of
entry bb successor and we aren't going to realign the stack, we don't
need DRAP at all, and even if we need DRAP register, that can't be the sole
reason for doing stack realignment, the prologue code is able to set up DRAP
even without dynamic stack realignment.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2013-12-20 Jakub Jelinek <jakub@redhat.com>
PR target/59501
* config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
if !crtl->stack_realign_needed.
(ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
and stack_realign_needed will be false, clear drap_reg and need_drap.
Optimize leaf functions that don't need stack frame even if
crtl->need_drap.
* gcc.target/i386/pr59501-1.c: New test.
* gcc.target/i386/pr59501-1a.c: New test.
* gcc.target/i386/pr59501-2.c: New test.
* gcc.target/i386/pr59501-2a.c: New test.
* gcc.target/i386/pr59501-3.c: New test.
* gcc.target/i386/pr59501-3a.c: New test.
* gcc.target/i386/pr59501-4.c: New test.
* gcc.target/i386/pr59501-4a.c: New test.
* gcc.target/i386/pr59501-5.c: New test.
* gcc.target/i386/pr59501-6.c: New test.
--- gcc/config/i386/i386.c.jj 2013-12-19 13:35:23.000000000 +0100
+++ gcc/config/i386/i386.c 2013-12-20 11:44:14.389310804 +0100
@@ -9235,7 +9235,9 @@ ix86_save_reg (unsigned int regno, bool
}
}
- if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+ if (crtl->drap_reg
+ && regno == REGNO (crtl->drap_reg)
+ && crtl->stack_realign_needed)
return true;
return (df_regs_ever_live_p (regno)
@@ -10473,12 +10475,23 @@ ix86_finalize_stack_realign_flags (void)
return;
}
+ /* If drap has been set, but it actually isn't live at the start
+ of the function and !stack_realign, there is no reason to set it up. */
+ if (crtl->drap_reg && !stack_realign)
+ {
+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+ if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+ {
+ crtl->drap_reg = NULL_RTX;
+ crtl->need_drap = false;
+ }
+ }
+
/* If the only reason for frame_pointer_needed is that we conservatively
assumed stack realignment might be needed, but in the end nothing that
needed the stack alignment had been spilled, clear frame_pointer_needed
and say we don't need stack realignment. */
if (stack_realign
- && !crtl->need_drap
&& frame_pointer_needed
&& crtl->is_leaf
&& flag_omit_frame_pointer
@@ -10516,6 +10529,18 @@ ix86_finalize_stack_realign_flags (void)
}
}
+ /* If drap has been set, but it actually isn't live at the start
+ of the function, there is no reason to set it up. */
+ if (crtl->drap_reg)
+ {
+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+ if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+ {
+ crtl->drap_reg = NULL_RTX;
+ crtl->need_drap = false;
+ }
+ }
+
frame_pointer_needed = false;
stack_realign = false;
crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
--- gcc/testsuite/gcc.target/i386/pr59501-2.c.jj 2013-12-20 12:02:08.754662741 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-2.c 2013-12-20 12:02:04.665668734 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-1.c"
--- gcc/testsuite/gcc.target/i386/pr59501-1.c.jj 2013-12-20 12:01:44.253781613 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-1.c 2013-12-20 12:12:26.715391613 +0100
@@ -0,0 +1,30 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) V
+foo (double *x, unsigned *y)
+{
+ V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+ return r;
+}
+
+static void
+TEST (void)
+{
+ double a[16];
+ unsigned b[4] = { 5, 0, 15, 7 };
+ int i;
+ for (i = 0; i < 16; i++)
+ a[i] = 0.5 + i;
+ V v = foo (a, b);
+ if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)
+ __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-4a.c.jj 2013-12-20 12:19:20.603212859 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-4a.c 2013-12-20 12:23:33.647881672 +0100
@@ -0,0 +1,8 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-3a.c"
+
+/* Verify no dynamic realignment is performed. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
--- gcc/testsuite/gcc.target/i386/pr59501-3.c.jj 2013-12-20 12:02:44.644462041 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-3.c 2013-12-20 12:13:06.834181801 +0100
@@ -0,0 +1,30 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+ V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+ return r;
+}
+
+static void
+TEST (void)
+{
+ double a[16];
+ unsigned b[4] = { 5, 0, 15, 7 };
+ int i;
+ for (i = 0; i < 16; i++)
+ a[i] = 0.5 + i;
+ V v = foo (a, 0, 0, 0, 0, 0, 0, b);
+ if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)
+ __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-3a.c.jj 2013-12-20 12:18:41.313420496 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-3a.c 2013-12-20 12:22:15.257292900 +0100
@@ -0,0 +1,15 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+typedef double V __attribute__ ((vector_size (32)));
+
+V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+ V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+ return r;
+}
+
+/* Verify no dynamic realignment is performed. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
--- gcc/testsuite/gcc.target/i386/pr59501-1a.c.jj 2013-12-20 12:15:16.890495826 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-1a.c 2013-12-20 12:22:02.820358481 +0100
@@ -0,0 +1,17 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+typedef double V __attribute__ ((vector_size (32)));
+
+V
+foo (double *x, unsigned *y)
+{
+ V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+ return r;
+}
+
+/* Verify no dynamic realignment is performed. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
+/* And DRAP isn't needed either. */
+/* { dg-final { scan-assembler-not "r10" } } */
--- gcc/testsuite/gcc.target/i386/pr59501-6.c.jj 2013-12-20 12:08:21.574682265 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-6.c 2013-12-20 12:08:32.966622139 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-5.c"
--- gcc/testsuite/gcc.target/i386/pr59501-5.c.jj 2013-12-20 12:06:53.276148649 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-5.c 2013-12-20 12:13:38.012014216 +0100
@@ -0,0 +1,39 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -mno-accumulate-outgoing-args" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include CHECK_H
+
+typedef double V __attribute__ ((vector_size (32)));
+
+__attribute__((noinline, noclone)) void
+bar (char *p)
+{
+ p[0] = 1;
+ p[37] = 2;
+ asm volatile ("" : : "r" (p) : "memory");
+}
+
+__attribute__((noinline, noclone)) V
+foo (double *x, int a, int b, int c, int d, int e, int f, unsigned *y)
+{
+ bar (__builtin_alloca (a + b + c + d + e + f));
+ V r = { x[y[0]], x[y[1]], x[y[2]], x[y[3]] };
+ return r;
+}
+
+static void
+TEST (void)
+{
+ double a[16];
+ unsigned b[4] = { 5, 0, 15, 7 };
+ int i;
+ for (i = 0; i < 16; i++)
+ a[i] = 0.5 + i;
+ V v = foo (a, 0, 30, 0, 0, 8, 0, b);
+ if (v[0] != 5.5 || v[1] != 0.5 || v[2] != 15.5 || v[3] != 7.5)
+ __builtin_abort ();
+}
--- gcc/testsuite/gcc.target/i386/pr59501-4.c.jj 2013-12-20 12:03:16.159292616 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-4.c 2013-12-20 12:06:24.651298808 +0100
@@ -0,0 +1,5 @@
+/* PR target/59501 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-3.c"
--- gcc/testsuite/gcc.target/i386/pr59501-2a.c.jj 2013-12-20 12:18:11.371578107 +0100
+++ gcc/testsuite/gcc.target/i386/pr59501-2a.c 2013-12-20 12:22:07.988329149 +0100
@@ -0,0 +1,10 @@
+/* PR target/59501 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
+
+#include "pr59501-1a.c"
+
+/* Verify no dynamic realignment is performed. */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
+/* And DRAP isn't needed either. */
+/* { dg-final { scan-assembler-not "r10" } } */
Jakub
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501)
2013-12-20 16:07 [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501) Jakub Jelinek
@ 2013-12-20 16:10 ` H.J. Lu
2013-12-22 21:14 ` Jan Hubicka
2017-07-07 16:58 ` H.J. Lu
2 siblings, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2013-12-20 16:10 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Jan Hubicka, Richard Henderson, GCC Patches
On Fri, Dec 20, 2013 at 8:06 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Honza recently changed the i?86 backend, so that it often doesn't
> do -maccumulate-outgoing-args by default on x86_64.
> Unfortunately, on some of the here included testcases this regressed
> quite a bit the generated code. As AVX vectors are used, the dynamic
> realignment code needs to assume e.g. that some of them will need to be
> spilled, and for -mno-accumulate-outgoing-args the code needs to set
> need_drap early as well. But in when emitting the prologue/epilogue,
> if need_drap is set, we don't perform the optimization for leaf functions
> which have zero size stack frame, thus we end up with uselessly doing
> dynamic stack realignment, setting up DRAP that nothing uses and later on
> restore everything back.
>
> This patch improves it, if the DRAP register isn't live at the start of
> entry bb successor and we aren't going to realign the stack, we don't
> need DRAP at all, and even if we need DRAP register, that can't be the sole
> reason for doing stack realignment, the prologue code is able to set up DRAP
> even without dynamic stack realignment.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2013-12-20 Jakub Jelinek <jakub@redhat.com>
>
> PR target/59501
> * config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
> if !crtl->stack_realign_needed.
> (ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
> and stack_realign_needed will be false, clear drap_reg and need_drap.
> Optimize leaf functions that don't need stack frame even if
> crtl->need_drap.
>
> * gcc.target/i386/pr59501-1.c: New test.
> * gcc.target/i386/pr59501-1a.c: New test.
> * gcc.target/i386/pr59501-2.c: New test.
> * gcc.target/i386/pr59501-2a.c: New test.
> * gcc.target/i386/pr59501-3.c: New test.
> * gcc.target/i386/pr59501-3a.c: New test.
> * gcc.target/i386/pr59501-4.c: New test.
> * gcc.target/i386/pr59501-4a.c: New test.
> * gcc.target/i386/pr59501-5.c: New test.
> * gcc.target/i386/pr59501-6.c: New test.
>
> --- gcc/config/i386/i386.c.jj 2013-12-19 13:35:23.000000000 +0100
> +++ gcc/config/i386/i386.c 2013-12-20 11:44:14.389310804 +0100
> @@ -9235,7 +9235,9 @@ ix86_save_reg (unsigned int regno, bool
> }
> }
>
> - if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
> + if (crtl->drap_reg
> + && regno == REGNO (crtl->drap_reg)
> + && crtl->stack_realign_needed)
> return true;
>
> return (df_regs_ever_live_p (regno)
> @@ -10473,12 +10475,23 @@ ix86_finalize_stack_realign_flags (void)
> return;
> }
>
> + /* If drap has been set, but it actually isn't live at the start
> + of the function and !stack_realign, there is no reason to set it up. */
> + if (crtl->drap_reg && !stack_realign)
> + {
> + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> + if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
> + {
> + crtl->drap_reg = NULL_RTX;
> + crtl->need_drap = false;
> + }
> + }
> +
> /* If the only reason for frame_pointer_needed is that we conservatively
> assumed stack realignment might be needed, but in the end nothing that
> needed the stack alignment had been spilled, clear frame_pointer_needed
> and say we don't need stack realignment. */
> if (stack_realign
> - && !crtl->need_drap
> && frame_pointer_needed
> && crtl->is_leaf
> && flag_omit_frame_pointer
> @@ -10516,6 +10529,18 @@ ix86_finalize_stack_realign_flags (void)
> }
> }
>
> + /* If drap has been set, but it actually isn't live at the start
> + of the function, there is no reason to set it up. */
> + if (crtl->drap_reg)
> + {
> + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> + if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
> + {
> + crtl->drap_reg = NULL_RTX;
> + crtl->need_drap = false;
> + }
> + }
> +
> frame_pointer_needed = false;
> stack_realign = false;
> crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
Looks good to me. But I can't approve it.
Thanks.
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501)
2013-12-20 16:07 [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501) Jakub Jelinek
2013-12-20 16:10 ` H.J. Lu
@ 2013-12-22 21:14 ` Jan Hubicka
2017-07-07 16:58 ` H.J. Lu
2 siblings, 0 replies; 6+ messages in thread
From: Jan Hubicka @ 2013-12-22 21:14 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Jan Hubicka, Richard Henderson, H.J. Lu, gcc-patches
> Hi!
>
> Honza recently changed the i?86 backend, so that it often doesn't
> do -maccumulate-outgoing-args by default on x86_64.
> Unfortunately, on some of the here included testcases this regressed
> quite a bit the generated code. As AVX vectors are used, the dynamic
Yep, the accidental change to disable register accumulation was apparently
on too long.
I still plan to switch accmulate-outoging-args on for generic - I benchmarked
it on AMD targets and it seems to be good, I sitll need to analyze the code
size implication though. At some testers we seems to have regressed in 32bit
that may be related to fact that frame pointer enabled codegen is more compact
and Vladimir patch, while restoring performance, led to code size issues.
> realignment code needs to assume e.g. that some of them will need to be
> spilled, and for -mno-accumulate-outgoing-args the code needs to set
> need_drap early as well. But in when emitting the prologue/epilogue,
> if need_drap is set, we don't perform the optimization for leaf functions
> which have zero size stack frame, thus we end up with uselessly doing
> dynamic stack realignment, setting up DRAP that nothing uses and later on
> restore everything back.
>
> This patch improves it, if the DRAP register isn't live at the start of
> entry bb successor and we aren't going to realign the stack, we don't
> need DRAP at all, and even if we need DRAP register, that can't be the sole
> reason for doing stack realignment, the prologue code is able to set up DRAP
> even without dynamic stack realignment.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2013-12-20 Jakub Jelinek <jakub@redhat.com>
>
> PR target/59501
> * config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
> if !crtl->stack_realign_needed.
> (ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
> and stack_realign_needed will be false, clear drap_reg and need_drap.
> Optimize leaf functions that don't need stack frame even if
> crtl->need_drap.
>
> * gcc.target/i386/pr59501-1.c: New test.
> * gcc.target/i386/pr59501-1a.c: New test.
> * gcc.target/i386/pr59501-2.c: New test.
> * gcc.target/i386/pr59501-2a.c: New test.
> * gcc.target/i386/pr59501-3.c: New test.
> * gcc.target/i386/pr59501-3a.c: New test.
> * gcc.target/i386/pr59501-4.c: New test.
> * gcc.target/i386/pr59501-4a.c: New test.
> * gcc.target/i386/pr59501-5.c: New test.
> * gcc.target/i386/pr59501-6.c: New test.
This seems OK, thanks!
Honza
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501)
2013-12-20 16:07 [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501) Jakub Jelinek
2013-12-20 16:10 ` H.J. Lu
2013-12-22 21:14 ` Jan Hubicka
@ 2017-07-07 16:58 ` H.J. Lu
2017-07-08 0:57 ` [PATCH] i386: Avoid stack realignment if possible H.J. Lu
2 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2017-07-07 16:58 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Jan Hubicka, Richard Henderson, GCC Patches, Uros Bizjak
[-- Attachment #1: Type: text/plain, Size: 3112 bytes --]
On Fri, Dec 20, 2013 at 8:06 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Honza recently changed the i?86 backend, so that it often doesn't
> do -maccumulate-outgoing-args by default on x86_64.
> Unfortunately, on some of the here included testcases this regressed
> quite a bit the generated code. As AVX vectors are used, the dynamic
> realignment code needs to assume e.g. that some of them will need to be
> spilled, and for -mno-accumulate-outgoing-args the code needs to set
> need_drap early as well. But in when emitting the prologue/epilogue,
> if need_drap is set, we don't perform the optimization for leaf functions
> which have zero size stack frame, thus we end up with uselessly doing
> dynamic stack realignment, setting up DRAP that nothing uses and later on
> restore everything back.
>
> This patch improves it, if the DRAP register isn't live at the start of
> entry bb successor and we aren't going to realign the stack, we don't
> need DRAP at all, and even if we need DRAP register, that can't be the sole
> reason for doing stack realignment, the prologue code is able to set up DRAP
> even without dynamic stack realignment.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2013-12-20 Jakub Jelinek <jakub@redhat.com>
>
> PR target/59501
> * config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
> if !crtl->stack_realign_needed.
> (ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
> and stack_realign_needed will be false, clear drap_reg and need_drap.
> Optimize leaf functions that don't need stack frame even if
> crtl->need_drap.
>
> * gcc.target/i386/pr59501-1.c: New test.
> * gcc.target/i386/pr59501-1a.c: New test.
> * gcc.target/i386/pr59501-2.c: New test.
> * gcc.target/i386/pr59501-2a.c: New test.
> * gcc.target/i386/pr59501-3.c: New test.
> * gcc.target/i386/pr59501-3a.c: New test.
> * gcc.target/i386/pr59501-4.c: New test.
> * gcc.target/i386/pr59501-4a.c: New test.
> * gcc.target/i386/pr59501-5.c: New test.
> * gcc.target/i386/pr59501-6.c: New test.
>
>
> --- gcc/testsuite/gcc.target/i386/pr59501-4a.c.jj 2013-12-20 12:19:20.603212859 +0100
> +++ gcc/testsuite/gcc.target/i386/pr59501-4a.c 2013-12-20 12:23:33.647881672 +0100
> @@ -0,0 +1,8 @@
> +/* PR target/59501 */
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
> +
> +#include "pr59501-3a.c"
> +
> +/* Verify no dynamic realignment is performed. */
> +/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
>
Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
xfailed due to stack frame access via frame pointer instead of DARP.
This patch finds the maximum stack alignment from the stack frame access
instructions and avoids stack realignment if stack alignment needed is
less than incoming stack boundary.
I am testing this patch. OK for trunk if there is no regression?
--
H.J.
[-- Attachment #2: 0001-i386-Avoid-stack-realignment-if-possible.patch --]
[-- Type: text/x-patch, Size: 5225 bytes --]
From d9844cba6ce20498aab42a32927230cb7b56475d Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 7 Jul 2017 06:01:16 -0700
Subject: [PATCH] i386: Avoid stack realignment if possible
Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
xfailed due to stack frame access via frame pointer instead of DARP.
This patch finds the maximum stack alignment from the stack frame access
instructions and avoids stack realignment if stack alignment needed is
less than incoming stack boundary.
gcc/
PR target/59501
* config/i386/i386.c (ix86_finalize_stack_realign_flags): Don't
realign stack if stack alignment needed is less than incoming
stack boundary.
gcc/testsuite/
PR target/59501
* gcc.target/i386/pr59501-4a.c: Remove xfail.
---
gcc/config/i386/i386.c | 83 +++++++++++++++++++-----------
gcc/testsuite/gcc.target/i386/pr59501-4a.c | 2 +-
2 files changed, 55 insertions(+), 30 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b041524..0c61998 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14161,6 +14161,10 @@ ix86_finalize_stack_realign_flags (void)
add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
+
+ unsigned int stack_alignment = 0;
+ bool require_stack_frame = false;
+
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *insn;
@@ -14169,43 +14173,64 @@ ix86_finalize_stack_realign_flags (void)
&& requires_stack_frame_p (insn, prologue_used,
set_up_by_prologue))
{
- if (crtl->stack_realign_needed != stack_realign)
- recompute_frame_layout_p = true;
- crtl->stack_realign_needed = stack_realign;
- crtl->stack_realign_finalized = true;
- if (recompute_frame_layout_p)
- ix86_compute_frame_layout ();
- return;
+ require_stack_frame = true;
+
+ /* Find the maximum stack alignment. */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+ if ((MEM_P (*iter)))
+ {
+ unsigned int alignment = MEM_ALIGN (*iter);
+ if (alignment > stack_alignment)
+ stack_alignment = alignment;
+ }
}
}
- /* If drap has been set, but it actually isn't live at the start
- of the function, there is no reason to set it up. */
- if (crtl->drap_reg)
+ if (require_stack_frame)
{
- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
- if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+ /* Stack frame is required. If stack alignment needed is less
+ than incoming stack boundary, don't realign stack. */
+ stack_realign = incoming_stack_boundary < stack_alignment;
+ if (!stack_realign)
{
- crtl->drap_reg = NULL_RTX;
- crtl->need_drap = false;
+ crtl->max_used_stack_slot_alignment
+ = incoming_stack_boundary;
+ crtl->stack_alignment_needed
+ = incoming_stack_boundary;
}
}
else
- cfun->machine->no_drap_save_restore = true;
-
- frame_pointer_needed = false;
- stack_realign = false;
- crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
- crtl->stack_alignment_needed = incoming_stack_boundary;
- crtl->stack_alignment_estimated = incoming_stack_boundary;
- if (crtl->preferred_stack_boundary > incoming_stack_boundary)
- crtl->preferred_stack_boundary = incoming_stack_boundary;
- df_finish_pass (true);
- df_scan_alloc (NULL);
- df_scan_blocks ();
- df_compute_regs_ever_live (true);
- df_analyze ();
- recompute_frame_layout_p = true;
+ {
+ /* If drap has been set, but it actually isn't live at the
+ start of the function, there is no reason to set it up. */
+ if (crtl->drap_reg)
+ {
+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+ if (! REGNO_REG_SET_P (DF_LR_IN (bb),
+ REGNO (crtl->drap_reg)))
+ {
+ crtl->drap_reg = NULL_RTX;
+ crtl->need_drap = false;
+ }
+ }
+ else
+ cfun->machine->no_drap_save_restore = true;
+
+ frame_pointer_needed = false;
+ stack_realign = false;
+ crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
+ crtl->stack_alignment_needed = incoming_stack_boundary;
+ crtl->stack_alignment_estimated = incoming_stack_boundary;
+ if (crtl->preferred_stack_boundary > incoming_stack_boundary)
+ crtl->preferred_stack_boundary = incoming_stack_boundary;
+ df_finish_pass (true);
+ df_scan_alloc (NULL);
+ df_scan_blocks ();
+ df_compute_regs_ever_live (true);
+ df_analyze ();
+ recompute_frame_layout_p = true;
+ }
}
if (crtl->stack_realign_needed != stack_realign)
diff --git a/gcc/testsuite/gcc.target/i386/pr59501-4a.c b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
index 5c3cb68..908c7f4 100644
--- a/gcc/testsuite/gcc.target/i386/pr59501-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
@@ -5,4 +5,4 @@
#include "pr59501-3a.c"
/* Verify no dynamic realignment is performed. */
-/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
--
2.9.4
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] i386: Avoid stack realignment if possible
2017-07-07 16:58 ` H.J. Lu
@ 2017-07-08 0:57 ` H.J. Lu
2017-07-14 11:46 ` H.J. Lu
0 siblings, 1 reply; 6+ messages in thread
From: H.J. Lu @ 2017-07-08 0:57 UTC (permalink / raw)
To: Jakub Jelinek, Jan Hubicka, Richard Henderson, GCC Patches, Uros Bizjak
On Fri, Jul 07, 2017 at 09:58:42AM -0700, H.J. Lu wrote:
> On Fri, Dec 20, 2013 at 8:06 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> > Hi!
> >
> > Honza recently changed the i?86 backend, so that it often doesn't
> > do -maccumulate-outgoing-args by default on x86_64.
> > Unfortunately, on some of the here included testcases this regressed
> > quite a bit the generated code. As AVX vectors are used, the dynamic
> > realignment code needs to assume e.g. that some of them will need to be
> > spilled, and for -mno-accumulate-outgoing-args the code needs to set
> > need_drap early as well. But in when emitting the prologue/epilogue,
> > if need_drap is set, we don't perform the optimization for leaf functions
> > which have zero size stack frame, thus we end up with uselessly doing
> > dynamic stack realignment, setting up DRAP that nothing uses and later on
> > restore everything back.
> >
> > This patch improves it, if the DRAP register isn't live at the start of
> > entry bb successor and we aren't going to realign the stack, we don't
> > need DRAP at all, and even if we need DRAP register, that can't be the sole
> > reason for doing stack realignment, the prologue code is able to set up DRAP
> > even without dynamic stack realignment.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> > 2013-12-20 Jakub Jelinek <jakub@redhat.com>
> >
> > PR target/59501
> > * config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
> > if !crtl->stack_realign_needed.
> > (ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
> > and stack_realign_needed will be false, clear drap_reg and need_drap.
> > Optimize leaf functions that don't need stack frame even if
> > crtl->need_drap.
> >
> > * gcc.target/i386/pr59501-1.c: New test.
> > * gcc.target/i386/pr59501-1a.c: New test.
> > * gcc.target/i386/pr59501-2.c: New test.
> > * gcc.target/i386/pr59501-2a.c: New test.
> > * gcc.target/i386/pr59501-3.c: New test.
> > * gcc.target/i386/pr59501-3a.c: New test.
> > * gcc.target/i386/pr59501-4.c: New test.
> > * gcc.target/i386/pr59501-4a.c: New test.
> > * gcc.target/i386/pr59501-5.c: New test.
> > * gcc.target/i386/pr59501-6.c: New test.
> >
> >
> > --- gcc/testsuite/gcc.target/i386/pr59501-4a.c.jj 2013-12-20 12:19:20.603212859 +0100
> > +++ gcc/testsuite/gcc.target/i386/pr59501-4a.c 2013-12-20 12:23:33.647881672 +0100
> > @@ -0,0 +1,8 @@
> > +/* PR target/59501 */
> > +/* { dg-do compile { target { ! ia32 } } } */
> > +/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
> > +
> > +#include "pr59501-3a.c"
> > +
> > +/* Verify no dynamic realignment is performed. */
> > +/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
> >
>
> Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
> xfailed due to stack frame access via frame pointer instead of DARP.
> This patch finds the maximum stack alignment from the stack frame access
> instructions and avoids stack realignment if stack alignment needed is
> less than incoming stack boundary.
>
> I am testing this patch. OK for trunk if there is no regression?
>
>
We need to keep the preferred stack alignment as the minimum stack
alignment. Here is the updated patch. Tested on x86-64. OK for
trunk?
Thanks.
H.J.
---
Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
xfailed due to stack frame access via frame pointer instead of DARP.
This patch finds the maximum stack alignment from the stack frame access
instructions and avoids stack realignment if stack alignment needed is
less than incoming stack boundary.
gcc/
PR target/59501
* config/i386/i386.c (ix86_finalize_stack_realign_flags): Don't
realign stack if stack alignment needed is less than incoming
stack boundary.
gcc/testsuite/
PR target/59501
* gcc.target/i386/pr59501-4a.c: Remove xfail.
---
gcc/config/i386/i386.c | 84 +++++++++++++++++++-----------
gcc/testsuite/gcc.target/i386/pr59501-4a.c | 2 +-
2 files changed, 56 insertions(+), 30 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b041524..28febd0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14161,6 +14161,11 @@ ix86_finalize_stack_realign_flags (void)
add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);
+
+ /* The preferred stack alignment is the minimum stack alignment. */
+ unsigned int stack_alignment = crtl->preferred_stack_boundary;
+ bool require_stack_frame = false;
+
FOR_EACH_BB_FN (bb, cfun)
{
rtx_insn *insn;
@@ -14169,43 +14174,64 @@ ix86_finalize_stack_realign_flags (void)
&& requires_stack_frame_p (insn, prologue_used,
set_up_by_prologue))
{
- if (crtl->stack_realign_needed != stack_realign)
- recompute_frame_layout_p = true;
- crtl->stack_realign_needed = stack_realign;
- crtl->stack_realign_finalized = true;
- if (recompute_frame_layout_p)
- ix86_compute_frame_layout ();
- return;
+ require_stack_frame = true;
+
+ /* Find the maximum stack alignment. */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+ if ((MEM_P (*iter)))
+ {
+ unsigned int alignment = MEM_ALIGN (*iter);
+ if (alignment > stack_alignment)
+ stack_alignment = alignment;
+ }
}
}
- /* If drap has been set, but it actually isn't live at the start
- of the function, there is no reason to set it up. */
- if (crtl->drap_reg)
+ if (require_stack_frame)
{
- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
- if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
+ /* Stack frame is required. If stack alignment needed is less
+ than incoming stack boundary, don't realign stack. */
+ stack_realign = incoming_stack_boundary < stack_alignment;
+ if (!stack_realign)
{
- crtl->drap_reg = NULL_RTX;
- crtl->need_drap = false;
+ crtl->max_used_stack_slot_alignment
+ = incoming_stack_boundary;
+ crtl->stack_alignment_needed
+ = incoming_stack_boundary;
}
}
else
- cfun->machine->no_drap_save_restore = true;
-
- frame_pointer_needed = false;
- stack_realign = false;
- crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
- crtl->stack_alignment_needed = incoming_stack_boundary;
- crtl->stack_alignment_estimated = incoming_stack_boundary;
- if (crtl->preferred_stack_boundary > incoming_stack_boundary)
- crtl->preferred_stack_boundary = incoming_stack_boundary;
- df_finish_pass (true);
- df_scan_alloc (NULL);
- df_scan_blocks ();
- df_compute_regs_ever_live (true);
- df_analyze ();
- recompute_frame_layout_p = true;
+ {
+ /* If drap has been set, but it actually isn't live at the
+ start of the function, there is no reason to set it up. */
+ if (crtl->drap_reg)
+ {
+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+ if (! REGNO_REG_SET_P (DF_LR_IN (bb),
+ REGNO (crtl->drap_reg)))
+ {
+ crtl->drap_reg = NULL_RTX;
+ crtl->need_drap = false;
+ }
+ }
+ else
+ cfun->machine->no_drap_save_restore = true;
+
+ frame_pointer_needed = false;
+ stack_realign = false;
+ crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
+ crtl->stack_alignment_needed = incoming_stack_boundary;
+ crtl->stack_alignment_estimated = incoming_stack_boundary;
+ if (crtl->preferred_stack_boundary > incoming_stack_boundary)
+ crtl->preferred_stack_boundary = incoming_stack_boundary;
+ df_finish_pass (true);
+ df_scan_alloc (NULL);
+ df_scan_blocks ();
+ df_compute_regs_ever_live (true);
+ df_analyze ();
+ recompute_frame_layout_p = true;
+ }
}
if (crtl->stack_realign_needed != stack_realign)
diff --git a/gcc/testsuite/gcc.target/i386/pr59501-4a.c b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
index 5c3cb68..908c7f4 100644
--- a/gcc/testsuite/gcc.target/i386/pr59501-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
@@ -5,4 +5,4 @@
#include "pr59501-3a.c"
/* Verify no dynamic realignment is performed. */
-/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
--
2.9.4
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] i386: Avoid stack realignment if possible
2017-07-08 0:57 ` [PATCH] i386: Avoid stack realignment if possible H.J. Lu
@ 2017-07-14 11:46 ` H.J. Lu
0 siblings, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2017-07-14 11:46 UTC (permalink / raw)
To: Jakub Jelinek, Jan Hubicka, Richard Henderson, GCC Patches, Uros Bizjak
On Fri, Jul 7, 2017 at 5:56 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Jul 07, 2017 at 09:58:42AM -0700, H.J. Lu wrote:
>> On Fri, Dec 20, 2013 at 8:06 AM, Jakub Jelinek <jakub@redhat.com> wrote:
>> > Hi!
>> >
>> > Honza recently changed the i?86 backend, so that it often doesn't
>> > do -maccumulate-outgoing-args by default on x86_64.
>> > Unfortunately, on some of the here included testcases this regressed
>> > quite a bit the generated code. As AVX vectors are used, the dynamic
>> > realignment code needs to assume e.g. that some of them will need to be
>> > spilled, and for -mno-accumulate-outgoing-args the code needs to set
>> > need_drap early as well. But in when emitting the prologue/epilogue,
>> > if need_drap is set, we don't perform the optimization for leaf functions
>> > which have zero size stack frame, thus we end up with uselessly doing
>> > dynamic stack realignment, setting up DRAP that nothing uses and later on
>> > restore everything back.
>> >
>> > This patch improves it, if the DRAP register isn't live at the start of
>> > entry bb successor and we aren't going to realign the stack, we don't
>> > need DRAP at all, and even if we need DRAP register, that can't be the sole
>> > reason for doing stack realignment, the prologue code is able to set up DRAP
>> > even without dynamic stack realignment.
>> >
>> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>> >
>> > 2013-12-20 Jakub Jelinek <jakub@redhat.com>
>> >
>> > PR target/59501
>> > * config/i386/i386.c (ix86_save_reg): Don't return true for drap_reg
>> > if !crtl->stack_realign_needed.
>> > (ix86_finalize_stack_realign_flags): If drap_reg isn't live on entry
>> > and stack_realign_needed will be false, clear drap_reg and need_drap.
>> > Optimize leaf functions that don't need stack frame even if
>> > crtl->need_drap.
>> >
>> > * gcc.target/i386/pr59501-1.c: New test.
>> > * gcc.target/i386/pr59501-1a.c: New test.
>> > * gcc.target/i386/pr59501-2.c: New test.
>> > * gcc.target/i386/pr59501-2a.c: New test.
>> > * gcc.target/i386/pr59501-3.c: New test.
>> > * gcc.target/i386/pr59501-3a.c: New test.
>> > * gcc.target/i386/pr59501-4.c: New test.
>> > * gcc.target/i386/pr59501-4a.c: New test.
>> > * gcc.target/i386/pr59501-5.c: New test.
>> > * gcc.target/i386/pr59501-6.c: New test.
>> >
>> >
>> > --- gcc/testsuite/gcc.target/i386/pr59501-4a.c.jj 2013-12-20 12:19:20.603212859 +0100
>> > +++ gcc/testsuite/gcc.target/i386/pr59501-4a.c 2013-12-20 12:23:33.647881672 +0100
>> > @@ -0,0 +1,8 @@
>> > +/* PR target/59501 */
>> > +/* { dg-do compile { target { ! ia32 } } } */
>> > +/* { dg-options "-O2 -mavx -maccumulate-outgoing-args" } */
>> > +
>> > +#include "pr59501-3a.c"
>> > +
>> > +/* Verify no dynamic realignment is performed. */
>> > +/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
>> >
>>
>> Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
>> xfailed due to stack frame access via frame pointer instead of DARP.
>> This patch finds the maximum stack alignment from the stack frame access
>> instructions and avoids stack realignment if stack alignment needed is
>> less than incoming stack boundary.
>>
>> I am testing this patch. OK for trunk if there is no regression?
>>
>>
>
> We need to keep the preferred stack alignment as the minimum stack
> alignment. Here is the updated patch. Tested on x86-64. OK for
> trunk?
>
> Thanks.
Hi Jakub,
This patch fixes the xfailed testcase in your patch:
https://gcc.gnu.org/ml/gcc-patches/2013-12/msg01767.html
Your original patch:
https://gcc.gnu.org/ml/gcc-patches/2011-11/msg01058.html
assumes that any instructions accessing stack require stack
realignment:
+ FOR_EACH_BB (bb)
+ {
+ rtx insn;
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn)
+ && requires_stack_frame_p (insn, prologue_used,
+ set_up_by_prologue))
+ {
+ crtl->stack_realign_needed = stack_realign;
+ crtl->stack_realign_finalized = true;
+ return;
+ }
+ }
This patch checks the actual alignment needed for any instructions
accessing stack. It skips stack realignment if the actual stack alignment
needed is less than or equal to incoming stack alignment.
Can you take look at it?
Thanks.
> H.J.
> ---
> Since DRAP isn't used with -maccumulate-outgoing-args, pr59501-4a.c was
> xfailed due to stack frame access via frame pointer instead of DARP.
> This patch finds the maximum stack alignment from the stack frame access
> instructions and avoids stack realignment if stack alignment needed is
> less than incoming stack boundary.
>
> gcc/
>
> PR target/59501
> * config/i386/i386.c (ix86_finalize_stack_realign_flags): Don't
> realign stack if stack alignment needed is less than incoming
> stack boundary.
>
> gcc/testsuite/
>
> PR target/59501
> * gcc.target/i386/pr59501-4a.c: Remove xfail.
> ---
> gcc/config/i386/i386.c | 84 +++++++++++++++++++-----------
> gcc/testsuite/gcc.target/i386/pr59501-4a.c | 2 +-
> 2 files changed, 56 insertions(+), 30 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index b041524..28febd0 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -14161,6 +14161,11 @@ ix86_finalize_stack_realign_flags (void)
> add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
> add_to_hard_reg_set (&set_up_by_prologue, Pmode,
> HARD_FRAME_POINTER_REGNUM);
> +
> + /* The preferred stack alignment is the minimum stack alignment. */
> + unsigned int stack_alignment = crtl->preferred_stack_boundary;
> + bool require_stack_frame = false;
> +
> FOR_EACH_BB_FN (bb, cfun)
> {
> rtx_insn *insn;
> @@ -14169,43 +14174,64 @@ ix86_finalize_stack_realign_flags (void)
> && requires_stack_frame_p (insn, prologue_used,
> set_up_by_prologue))
> {
> - if (crtl->stack_realign_needed != stack_realign)
> - recompute_frame_layout_p = true;
> - crtl->stack_realign_needed = stack_realign;
> - crtl->stack_realign_finalized = true;
> - if (recompute_frame_layout_p)
> - ix86_compute_frame_layout ();
> - return;
> + require_stack_frame = true;
> +
> + /* Find the maximum stack alignment. */
> + subrtx_iterator::array_type array;
> + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
> + if ((MEM_P (*iter)))
> + {
> + unsigned int alignment = MEM_ALIGN (*iter);
> + if (alignment > stack_alignment)
> + stack_alignment = alignment;
> + }
> }
> }
>
> - /* If drap has been set, but it actually isn't live at the start
> - of the function, there is no reason to set it up. */
> - if (crtl->drap_reg)
> + if (require_stack_frame)
> {
> - basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> - if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
> + /* Stack frame is required. If stack alignment needed is less
> + than incoming stack boundary, don't realign stack. */
> + stack_realign = incoming_stack_boundary < stack_alignment;
> + if (!stack_realign)
> {
> - crtl->drap_reg = NULL_RTX;
> - crtl->need_drap = false;
> + crtl->max_used_stack_slot_alignment
> + = incoming_stack_boundary;
> + crtl->stack_alignment_needed
> + = incoming_stack_boundary;
> }
> }
> else
> - cfun->machine->no_drap_save_restore = true;
> -
> - frame_pointer_needed = false;
> - stack_realign = false;
> - crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
> - crtl->stack_alignment_needed = incoming_stack_boundary;
> - crtl->stack_alignment_estimated = incoming_stack_boundary;
> - if (crtl->preferred_stack_boundary > incoming_stack_boundary)
> - crtl->preferred_stack_boundary = incoming_stack_boundary;
> - df_finish_pass (true);
> - df_scan_alloc (NULL);
> - df_scan_blocks ();
> - df_compute_regs_ever_live (true);
> - df_analyze ();
> - recompute_frame_layout_p = true;
> + {
> + /* If drap has been set, but it actually isn't live at the
> + start of the function, there is no reason to set it up. */
> + if (crtl->drap_reg)
> + {
> + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> + if (! REGNO_REG_SET_P (DF_LR_IN (bb),
> + REGNO (crtl->drap_reg)))
> + {
> + crtl->drap_reg = NULL_RTX;
> + crtl->need_drap = false;
> + }
> + }
> + else
> + cfun->machine->no_drap_save_restore = true;
> +
> + frame_pointer_needed = false;
> + stack_realign = false;
> + crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
> + crtl->stack_alignment_needed = incoming_stack_boundary;
> + crtl->stack_alignment_estimated = incoming_stack_boundary;
> + if (crtl->preferred_stack_boundary > incoming_stack_boundary)
> + crtl->preferred_stack_boundary = incoming_stack_boundary;
> + df_finish_pass (true);
> + df_scan_alloc (NULL);
> + df_scan_blocks ();
> + df_compute_regs_ever_live (true);
> + df_analyze ();
> + recompute_frame_layout_p = true;
> + }
> }
>
> if (crtl->stack_realign_needed != stack_realign)
> diff --git a/gcc/testsuite/gcc.target/i386/pr59501-4a.c b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
> index 5c3cb68..908c7f4 100644
> --- a/gcc/testsuite/gcc.target/i386/pr59501-4a.c
> +++ b/gcc/testsuite/gcc.target/i386/pr59501-4a.c
> @@ -5,4 +5,4 @@
> #include "pr59501-3a.c"
>
> /* Verify no dynamic realignment is performed. */
> -/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not "and\[^\n\r]*sp" } } */
> --
> 2.9.4
>
--
H.J.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-07-14 11:46 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-20 16:07 [PATCH] Improve i?86/x86_64 prologue_and_epilogue for leaf functions (PR target/59501) Jakub Jelinek
2013-12-20 16:10 ` H.J. Lu
2013-12-22 21:14 ` Jan Hubicka
2017-07-07 16:58 ` H.J. Lu
2017-07-08 0:57 ` [PATCH] i386: Avoid stack realignment if possible H.J. Lu
2017-07-14 11:46 ` H.J. Lu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).